C++使用SAPI实现语音合成和语音识别的方法和代码_C/C++_编程开发_程序员俱乐部

中国优秀的程序员网站程序员频道CXYCLUB技术地图
热搜:
更多>>
 
您所在的位置: 程序员俱乐部 > 编程开发 > C/C++ > C++使用SAPI实现语音合成和语音识别的方法和代码

C++使用SAPI实现语音合成和语音识别的方法和代码

 2012/1/19 9:11:52  kevin19900306  程序员俱乐部  我要评论(0)
  • 摘要:语音合成:#include<sapi.h>#pragmacomment(lib,"ole32.lib")//CoInitializeCoCreateInstance需要调用ole32.dll#pragmacomment(lib,"sapi.lib")//sapi.lib在SDK的lib目录,必需正确配置intmain(intargc,char*argv[]){ISpVoice*pVoice=NULL;//COM初始化:if(FAILED(::CoInitialize(NULL))
  • 标签:语音识别 方法 API 实现 使用 c++ 代码 SAP
语音合成:

#include <sapi.h> 

#pragma comment(lib,"ole32.lib") //CoInitialize CoCreateInstance需要调用ole32.dll 
#pragma comment(lib,"sapi.lib") //sapi.lib在SDK的lib目录,必需正确配置 
int main(int argc, char* argv[]) 
{ 
ISpVoice * pVoice = NULL; 

//COM初始化: 
if (FAILED(::CoInitialize(NULL))) 
return FALSE; 

//获取ISpVoice接口HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice); 
if( SUCCEEDED( hr ) ) 
{ 
hr = pVoice->Speak(L"Hello world", 0, NULL); 
pVoice->Release(); 
pVoice = NULL; 
} 

//千万不要忘记: 
::CoUninitialize(); 
return TRUE; 
} 



微软的语音识别,在这里我们简称它为SR(speech recognition),SR分为两种模式的监听:第一种模式:任意监听,即随意输入语音,监听对象将最为接近的字或者词,句反馈出来;第二种模式:划定范围监听,制定一组被选项做为监听的,用户的语音输入被反馈成最为接近的一个选项。说得通俗一些:第一种是填空题,第二种是选择题目。
下面是第一种模式的代码:

#include <windows.h>
#include <sapi.h>
#include <stdio.h>
#include <string.h>
#include <atlbase.h>
#include "sphelper.h"

inline HRESULT BlockForResult(ISpRecoContext * pRecoCtxt, ISpRecoResult ** ppResult)
{
    HRESULT hr = S_OK;
    CSpEvent event;

    while (SUCCEEDED(hr) &&
           SUCCEEDED(hr = event.GetFrom(pRecoCtxt)) &&
           hr == S_FALSE)
    {
        hr = pRecoCtxt->WaitForNotifyEvent(INFINITE);
    }

    *ppResult = event.RecoResult();
    if (*ppResult)
    {
        (*ppResult)->AddRef();
    }

    return hr;
}

const WCHAR * StopWord()
{
    const WCHAR * pchStop;
    
    LANGID LangId = ::SpGetUserDefaultUILanguage();

    switch (LangId)
    {
        case MAKELANGID(LANG_JAPANESE, SUBLANG_DEFAULT):
            pchStop = L"}42N86\0b70e50fc0ea0e70fc/05708504608a087046";;
            break;

        default:
            pchStop = L"Stop";
            break;
    }

    return pchStop;
}
            
int main(int argc, char* argv[])
{
    HRESULT hr = E_FAIL;
    bool fUseTTS = true;            // turn TTS play back on or off
    bool fReplay = true;            // turn Audio replay on or off

    // Process optional arguments
    if (argc > 1)
    {
        int i;

        for (i = 1; i < argc; i++)
        {
            if (_stricmp(argv[i], "-noTTS") == 0)
            {
                fUseTTS = false;
                continue;
            }
            if (_stricmp(argv[i], "-noReplay") == 0)
            {
                fReplay = false;
                continue;
            }       
            printf ("Usage: %s [-noTTS] [-noReplay]  ", argv[0]);
            return hr;
        }
    }

    if (SUCCEEDED(hr = ::CoInitialize(NULL)))
    {
        {
            CComPtr<ISpRecoContext> cpRecoCtxt;
            CComPtr<ISpRecoGrammar> cpGrammar;
            CComPtr<ISpVoice> cpVoice;
            hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext);
            if(SUCCEEDED(hr))
            {
                hr = cpRecoCtxt->GetVoice(&cpVoice);
            }
           
            if (cpRecoCtxt && cpVoice &&
                SUCCEEDED(hr = cpRecoCtxt->SetNotifyWin32Event()) &&
                SUCCEEDED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))) && 
                SUCCEEDED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)) &&
                SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) &&
                SUCCEEDED(hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC)) &&
                SUCCEEDED(hr = cpGrammar->SetDictationState(SPRS_ACTIVE)))
            {
                USES_CONVERSION;
                            
                const WCHAR * const pchStop = StopWord();
                CComPtr<ISpRecoResult> cpResult;

                printf( "I will repeat everything you say. Say "%s" to exit. ", W2A(pchStop) );

                while (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult)))
                {
                    cpGrammar->SetDictationState( SPRS_INACTIVE );

                    CSpDynamicString dstrText;

                    if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, 
                                                    TRUE, &dstrText, NULL)))
                    {
                        printf("I heard:  %s ", W2A(dstrText));

                        if (fUseTTS)
                        {
                            cpVoice->Speak( L"I heard", SPF_ASYNC, NULL);
                            cpVoice->Speak( dstrText, SPF_ASYNC, NULL );
                        }

                        if (fReplay)
                        {
                            if (fUseTTS)
                                cpVoice->Speak( L"when you said", SPF_ASYNC, NULL);
                            else
                                printf (" when you said ");
                            cpResult->SpeakAudio(NULL, 0, NULL, NULL);
                       }

                       cpResult.Release();
                    }
                    if (_wcsicmp(dstrText, pchStop) == 0)
                    {
                        break;
                    }
                    
                    cpGrammar->SetDictationState( SPRS_ACTIVE );
                } 
            }
        }
        ::CoUninitialize();
    }
    return hr;
}
发表评论
用户名: 匿名