所谓音频环回录制就是录制扬声器播放的声音。播放到扬声器的声音已经过混合,Windows 提供了 WASAPI 来获取这种混合的音频信号。

本文将录制到原始音频样本数据写入到文件,可以使用Audacity导入原始数据试听。

在 Audacity 中导入原始数据时需要选择正确的音频参数,否则会导致无法播放。

初始化设备

声明相关变量:

1
2
3
4
5
IMMDeviceEnumerator* pDeviceEnum = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
WAVEFORMATEX* pWaveFormat = NULL;
IAudioCaptureClient* pAudioCaptureClient = NULL;

获取默认音频输出设备并初始化环回录制服务:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
HRESULT InitRecord() {
HRESULT hr;

hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, (void**)&pDeviceEnum);
if (FAILED(hr)) {
printf("Create device enumerator failed, hr: 0x%x", hr);
return hr;
}

hr = pDeviceEnum->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
if (FAILED(hr)) {
printf("Get default audio device failed, hr: 0x%x", hr);
return hr;
}

hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&pAudioClient);
if (FAILED(hr)) {
printf("Create audio client failed, hr: 0x%x", hr);
return hr;
}

hr = pAudioClient->GetMixFormat(&pWaveFormat);
if (FAILED(hr)) {
printf("Get mix format failed, hr: 0x%x", hr);
return hr;
}

printf("Channel: %d, SamplesPerSec: %d, BitsPerSample: %d\n", pWaveFormat->nChannels, pWaveFormat->nSamplesPerSec, pWaveFormat->wBitsPerSample);

hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, BUFFER_TIME_100NS, 0, pWaveFormat, NULL);
if (FAILED(hr)) {
// 兼容Nahimic音频驱动
// https://github.com/rainmeter/rainmeter/commit/0a3dfa35357270512ec4a3c722674b67bff541d6
// https://social.msdn.microsoft.com/Forums/windowsdesktop/en-US/bd8cd9f2-974f-4a9f-8e9c-e83001819942/iaudioclient-initialize-failure

// 初始化失败,尝试使用立体声格式进行初始化
pWaveFormat->nChannels = 2;
pWaveFormat->nBlockAlign = (2 * pWaveFormat->wBitsPerSample) / 8;
pWaveFormat->nAvgBytesPerSec = pWaveFormat->nSamplesPerSec * pWaveFormat->nBlockAlign;

hr = pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, BUFFER_TIME_100NS, 0, pWaveFormat, NULL);
if (FAILED(hr)) {
printf("Initialize audio client failed, hr: 0x%x", hr);
return hr;
}
}

hr = pAudioClient->GetService(IID_IAudioCaptureClient, (void**)&pAudioCaptureClient);
if (FAILED(hr)) {
printf("Get audio capture client failed, hr: 0x%x", hr);
return hr;
}

return S_OK;
}

采样

在初始化成功后,开启独立线程按固定间隔获取缓冲区中的音频样本。exitFlag用于控制线程是否退出。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
// 线程处理函数
void QueryAudioSampleThread() {
UINT32 bufferFrameCount = 0;
HRESULT hr = pAudioClient->GetBufferSize(&bufferFrameCount);
if (FAILED(hr)) {
printf("Get buffer frame count failed, hr: 0x%x", hr);
return;
}

// 根据实际缓冲区中的样本数计算实际填满缓冲区需要的时间
REFERENCE_TIME hnsActualDuration = (double)BUFFER_TIME_100NS *
bufferFrameCount / pWaveFormat->nSamplesPerSec;

UINT32 packetLength = 0;
BYTE* buffer = NULL;
UINT32 numFramesAvailable = 0;
DWORD flags = 0;
while (!exitFlag.load())
{
// 等待半个缓冲周期
Sleep(hnsActualDuration / 10000 / 2);

hr = pAudioCaptureClient->GetNextPacketSize(&packetLength);
if (FAILED(hr)) {
printf("Get next package size failed, hr: 0x%x", hr);
break;
}

while (packetLength > 0)
{
hr = pAudioCaptureClient->GetBuffer(&buffer, &numFramesAvailable, &flags, NULL, NULL);
if (FAILED(hr)) {
printf("Get capture buffer failed, hr: 0x%x", hr);
break;
}

// 将捕获到的样本写入文件
if (!WriteSample(buffer, numFramesAvailable * pWaveFormat->nChannels * pWaveFormat->wBitsPerSample / 8)) {
printf("Write sample to file failed");
}

hr = pAudioCaptureClient->ReleaseBuffer(numFramesAvailable);
if (FAILED(hr)) {
printf("Release capture buffer failed, hr: 0x%x", hr);
break;
}

hr = pAudioCaptureClient->GetNextPacketSize(&packetLength);
if (FAILED(hr)) {
printf("Get next package size failed, hr: 0x%x", hr);
break;
}
}
}
}

Sample和Frame的含义

pWaveFormat->nSamplesPerSec表示每秒采样的次数,如48000的采样率就是每秒采48000个Sample,一个Sample 是一个声道的一个采样。

而Frame 则是一个时间点的Sample集合,举例来说,一个线性的PCM 双声道音频文件每个Frame有2个Sample,一个左声道Sample,和一个右声道Sample。

释放设备和内存

在录制结束后释放设备和内存:

1
2
3
4
5
6
7
8
9
10
11
void UnInitRecord() {
if (pWaveFormat) {
CoTaskMemFree(pWaveFormat);
pWaveFormat = NULL;
}

SAFE_RELEASE(pDeviceEnum);
SAFE_RELEASE(pDevice);
SAFE_RELEASE(pAudioClient);
SAFE_RELEASE(pAudioCaptureClient);
}

完整示例代码见:AudioLoopbackRecord.cpp