recorder.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. #!/usr/bin/python
  2. # coding=utf-8
  3. """
  4. Author: zhaoyong 77912776@qq.com
  5. Date: 2025-07-02
  6. LastEditTime: 2025-08-24
  7. FilePath: \robot_ai\handlers\aiui\Recorder.py
  8. Description: 录音类(优化版)
  9. """
  10. import platform
  11. import struct
  12. import threading
  13. import numpy as np
  14. import platform
  15. import sounddevice as sd
  16. system = platform.system().lower()
  17. if system == "linux":
  18. sd.default.device = 'pulse'
  19. elif system == "windows":
  20. sd.default.device = None
  21. elif system == "darwin":
  22. sd.default.device = None
  23. from contextlib import contextmanager
  24. from utils.echo_cancellation import get_self_voice_detector
  25. from utils.logger import logger
  26. def calc_rms(audio_bytes: bytes) -> float:
  27. """计算音频数据的 RMS 值"""
  28. if not audio_bytes:
  29. return 0
  30. samples = struct.unpack(f'{len(audio_bytes)//2}h', audio_bytes)
  31. arr = np.array(samples, dtype=np.int16)
  32. # 避免数值溢出和无效值
  33. if len(arr) == 0:
  34. return 0
  35. # 使用更安全的计算方式,避免大数值溢出
  36. arr_float = arr.astype(np.float64)
  37. squared = arr_float ** 2
  38. mean_squared = np.mean(squared)
  39. # 检查数值有效性
  40. if np.isnan(mean_squared) or np.isinf(mean_squared) or mean_squared < 0:
  41. return 0
  42. try:
  43. rms = np.sqrt(mean_squared)
  44. # 确保结果有效
  45. if np.isnan(rms) or np.isinf(rms):
  46. return 0
  47. return float(rms)
  48. except (ValueError, RuntimeWarning):
  49. return 0
  50. class Recorder:
  51. def __init__(self, chunk: int, channels: int = 1, rate: int = 16000, fmt: str = None):
  52. self.chunk = chunk
  53. self.channels = channels
  54. self.target_rate = rate # 目标采样率
  55. self.actual_rate = rate # 实际使用的采样率
  56. self.format = fmt or 'int16'
  57. self.platform = platform.system().lower()
  58. self.device_index = self._find_device()
  59. if self.device_index is None:
  60. raise RuntimeError("❌ 未找到可用的音频输入设备")
  61. # 语音活动检测
  62. self.is_voice_active = False
  63. self.voice_activity_threshold = 10.0
  64. self.voice_activity_count = 0
  65. self.voice_activity_lock = threading.RLock()
  66. # 回声消除
  67. self.self_voice_detector = get_self_voice_detector()
  68. self.enable_echo_cancellation = True
  69. # 音频流状态
  70. self._stream = None
  71. self._stream_lock = threading.RLock()
  72. self._is_running = False
  73. logger.info(
  74. f"🎙️ 录音器初始化: 声道={self.channels}, 目标采样率={self.target_rate}, 实际采样率={self.actual_rate}, 块大小={self.chunk}")
  75. logger.info(
  76. f" 使用设备[{self.device_index}]: {sd.query_devices(self.device_index)['name']}")
  77. # 如果实际采样率与目标不同,记录警告
  78. if self.actual_rate != self.target_rate:
  79. logger.warning(
  80. f"⚠️ 采样率不匹配: 目标={self.target_rate}Hz, 实际={self.actual_rate}Hz")
  81. # ----------------- 设备相关 -----------------
  82. def _find_device(self):
  83. """自动查找合适的音频输入设备"""
  84. devices = [
  85. (i, d) for i, d in enumerate(sd.query_devices())
  86. if d.get("max_input_channels", 0) > 0
  87. ]
  88. if not devices:
  89. return None
  90. # Ubuntu环境下的设备优先级
  91. if platform.system().lower() == 'linux':
  92. priority = ["pulse", "default", "sysdefault",
  93. "AIUI-USB-MC", "rockchip-es8388"]
  94. else:
  95. # Windows环境下的设备优先级
  96. priority = ["AIUI-USB-MC", "rockchip-es8388",
  97. "USB Audio", "sysdefault"]
  98. # 按优先级排序
  99. devices.sort(
  100. key=lambda x: next(
  101. (i for i, p in enumerate(priority)
  102. if p.lower() in x[1]["name"].lower()),
  103. 999
  104. )
  105. )
  106. # 在Ubuntu环境中,优先测试PulseAudio设备
  107. for idx, dev in devices:
  108. if self._check_sample_rate_support(idx, self.target_rate):
  109. logger.info(f"📍 选择输入设备: {dev['name']} ({idx})")
  110. self.actual_rate = self.target_rate
  111. return idx
  112. # 如果没有找到支持目标采样率的设备,尝试使用默认设备
  113. logger.warning(f"⚠️ 未找到支持 {self.target_rate}Hz 的设备,尝试使用默认设备")
  114. for idx, dev in devices:
  115. try:
  116. # 尝试使用默认采样率
  117. default_rate = int(dev.get('default_samplerate', 44100))
  118. if self._check_sample_rate_support(idx, default_rate):
  119. logger.info(
  120. f"📍 使用默认采样率设备: {dev['name']} ({idx}) - {default_rate}Hz")
  121. self.actual_rate = default_rate
  122. return idx
  123. except Exception as e:
  124. logger.debug(f"设备 {idx} 测试失败: {e}")
  125. continue
  126. # 最后选择第一个可用设备
  127. if devices:
  128. logger.warning(
  129. f"⚠️ 使用第一个可用设备: {devices[0][1]['name']} ({devices[0][0]})")
  130. # 尝试使用设备的默认采样率
  131. default_rate = int(devices[0][1].get('default_samplerate', 44100))
  132. self.actual_rate = default_rate
  133. return devices[0][0]
  134. return None
  135. def _check_sample_rate_support(self, device_index, target_rate):
  136. """检查设备是否支持目标采样率"""
  137. try:
  138. stream = sd.RawInputStream(
  139. samplerate=target_rate,
  140. channels=self.channels,
  141. dtype=self.format,
  142. blocksize=self.chunk,
  143. device=device_index
  144. )
  145. stream.close()
  146. return True
  147. except Exception as e:
  148. logger.warning(f"设备 {device_index} 不支持 {target_rate}Hz: {e}")
  149. return False
  150. # ----------------- 音频流管理 -----------------
  151. @contextmanager
  152. def audio_stream(self):
  153. """上下文管理器,自动开启/关闭音频流"""
  154. try:
  155. self._stream = sd.RawInputStream(
  156. samplerate=self.actual_rate, # 使用实际采样率
  157. channels=self.channels,
  158. dtype=self.format,
  159. blocksize=self.chunk,
  160. device=self.device_index
  161. )
  162. self._stream.start()
  163. self._is_running = True
  164. logger.info(f"✅ 音频流已启动 (采样率: {self.actual_rate}Hz)")
  165. yield self._stream
  166. finally:
  167. self._close_stream()
  168. def _close_stream(self):
  169. """安全关闭音频流"""
  170. try:
  171. if self._stream:
  172. if hasattr(self._stream, 'stop'):
  173. self._stream.stop()
  174. if hasattr(self._stream, 'close'):
  175. self._stream.close()
  176. logger.debug("🔄 音频流已关闭")
  177. except Exception as e:
  178. logger.warning(f"⚠️ 关闭音频流异常: {e}")
  179. finally:
  180. self._stream = None
  181. self._is_running = False
  182. # ----------------- 核心录音逻辑 -----------------
  183. def _voice_activity_update(self, rms: float):
  184. """更新语音活动检测状态"""
  185. with self.voice_activity_lock:
  186. if rms > self.voice_activity_threshold:
  187. self.voice_activity_count += 1
  188. if self.voice_activity_count >= 2:
  189. self.is_voice_active = True
  190. else:
  191. self.voice_activity_count = 0
  192. self.is_voice_active = False
  193. def read(self):
  194. """读取音频流,生成器返回音频块"""
  195. with self._stream_lock:
  196. with self.audio_stream() as stream:
  197. frame_count = 0
  198. buffer = b''
  199. while self._is_running and stream:
  200. try:
  201. data, _ = stream.read(self.chunk)
  202. buffer += data
  203. while len(buffer) >= self.chunk:
  204. chunk_data, buffer = buffer[:self.chunk], buffer[self.chunk:]
  205. # 回声消除
  206. if self.enable_echo_cancellation and self.self_voice_detector:
  207. if not self.self_voice_detector.process_recording_audio(chunk_data):
  208. continue
  209. # RMS 计算 & VAD
  210. if frame_count % 100 == 0:
  211. rms = calc_rms(chunk_data)
  212. self._voice_activity_update(rms)
  213. logger.debug(
  214. f"🎚️ RMS: {rms:.2f}, 语音活动={self.is_voice_active}")
  215. yield chunk_data
  216. frame_count += 1
  217. except Exception as e:
  218. logger.error(f"❌ 音频读取异常: {e}")
  219. break
  220. # ----------------- 控制接口 -----------------
  221. def close(self):
  222. """完全关闭录音器"""
  223. with self._stream_lock:
  224. logger.info("🔄 关闭录音器...")
  225. self._close_stream()
  226. logger.info("✅ 录音器已关闭")
  227. def stop_recording(self):
  228. """停止录音但保留设备"""
  229. with self._stream_lock:
  230. logger.info("🛑 停止录音...")
  231. self._close_stream()
  232. def __del__(self):
  233. try:
  234. self.close()
  235. except Exception:
  236. pass
  237. # ----------------- 调试入口 -----------------
  238. if __name__ == "__main__":
  239. import time
  240. recorder = Recorder(chunk=640)
  241. start = time.time()
  242. for data in recorder.read():
  243. if time.time() - start > 5:
  244. break
  245. recorder.close()