speech_handler.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. """
  2. 讯飞AIUI语音识别处理模块
  3. """
  4. import json5
  5. import requests
  6. from utils.logger import logger
  7. import json
  8. import random
  9. import threading
  10. from strategies.vision.qwenv import process_base64_image
  11. from utils.pc2_requests import _send_qa_task, _send_led_color_task
  12. from utils.tts_client import play_text_async, is_playing
  13. from utils.logger import logger
  14. from handlers.xunfei.intent_handler import IntentHandler
  15. class XunfeiSpeechHandler:
  16. """讯飞AIUI语音识别处理类"""
  17. def __init__(self):
  18. # 根据文字长度分类的语气助词列表
  19. self.short_thinking_phrases = [
  20. "嗯", "呃", "哦"
  21. ]
  22. self.medium_thinking_phrases = [
  23. "嗯…", "呃…", "哦…", "那个", "这个"
  24. ]
  25. self.long_thinking_phrases = [
  26. "嗯…", "呃…", "哦…", "那个", "这个"
  27. ]
  28. # 添加IntentHandler引用
  29. self.intent_handler = None
  30. self.iat_txt = ""
  31. def play_thinking_phrase(self, text_length: int = 0, type="thinking"):
  32. """根据文字长度播放合适的语气助词(使用缓存)"""
  33. if text_length < 3:
  34. return
  35. elif 3 <= text_length < 7:
  36. phrase = random.choice(self.short_thinking_phrases)
  37. elif 7 <= text_length < 15:
  38. phrase = random.choice(self.medium_thinking_phrases)
  39. elif 15 <= text_length:
  40. phrase = random.choice(self.long_thinking_phrases)
  41. else:
  42. return
  43. logger.info(f"播放语气助词: {phrase}")
  44. play_text_async(phrase, use_cache=True)
  45. logger.info(f"{type}文本({text_length}字),播放语气助词: {phrase}")
  46. def handle_iat_result(self, data: dict) -> None:
  47. """
  48. 处理讯飞AIUI语音识别结果
  49. Args:
  50. data: 语音识别数据
  51. """
  52. try:
  53. # 异步调用 _send_led_color_task,添加异常处理
  54. try:
  55. led_thread = threading.Thread(
  56. target=_send_led_color_task,
  57. args=(self, "DEFAULT", "GREEN"),
  58. daemon=True
  59. )
  60. led_thread.start()
  61. except Exception as e:
  62. logger.debug(f"[LED] LED控制失败,不影响语音识别: {e}")
  63. # logger.info('IAT数据', json.dumps(data, ensure_ascii=False, indent=2))
  64. # 使用get_recognition_text方法提取文本
  65. result_string = self.get_recognition_text(data)
  66. self.iat_txt = result_string
  67. # 获取状态信息
  68. sn_value = data.get('content', {}).get(
  69. 'result', {}).get('text', {}).get('sn')
  70. ls_value = data.get('content', {}).get(
  71. 'result', {}).get('text', {}).get('ls')
  72. # 确定状态值
  73. if sn_value == 1:
  74. status_value = 0
  75. elif ls_value is True:
  76. status_value = 2
  77. else:
  78. status_value = 1
  79. # 输出识别结果
  80. if result_string or status_value == 2:
  81. logger.info(f"讯飞识别到IAT结果是: {result_string}")
  82. # 异步调用 _send_qa_task,添加异常处理
  83. try:
  84. qa_thread = threading.Thread(
  85. target=_send_qa_task,
  86. args=(self, {"question": result_string}),
  87. daemon=True
  88. )
  89. qa_thread.start()
  90. except Exception as e:
  91. logger.debug(f"[QA] QA请求失败,不影响语音识别: {e}")
  92. # 使用改进的播放逻辑
  93. logger.info(f"开始播放语气词")
  94. self.play_thinking_phrase(len(result_string), type="thinking")
  95. except Exception as e:
  96. logger.error(f"讯飞语音识别处理异常: {e}")
  97. def get_recognition_text(self, data: dict) -> str:
  98. """
  99. 获取讯飞识别文本
  100. Args:
  101. data: 语音识别数据
  102. Returns:
  103. str: 识别文本
  104. """
  105. try:
  106. words = []
  107. ws_list = data.get('content', {}).get(
  108. 'result', {}).get('text', {}).get('ws', [])
  109. for item in ws_list:
  110. cw_list = item.get('cw', [])
  111. for cw in cw_list:
  112. words.append(cw.get('w', ''))
  113. return ''.join(words)
  114. except Exception as e:
  115. logger.error(f"获取讯飞识别文本异常: {e}")
  116. return ""