Jon Yao 1 week ago
commit
5aa4f9d93c
100 changed files with 11433 additions and 0 deletions
  1. 97 0
      .asoundrc
  2. 34 0
      .gitignore
  3. 362 0
      aiui.py
  4. 46 0
      build_nuitka.py
  5. 1 0
      config/__init__.py
  6. 47 0
      config/action.yaml
  7. 69 0
      config/aiot.yaml
  8. BIN
      config/aiui/arm/e05e559a9_v201010_aee.so
  9. BIN
      config/aiui/arm/lib4mic_mmsp.so
  10. BIN
      config/aiui/arm/libEdgeEsr.so
  11. BIN
      config/aiui/arm/libaikit.so
  12. BIN
      config/aiui/arm/libaiui.so
  13. BIN
      config/aiui/arm/libavvtn_mic4.so
  14. BIN
      config/aiui/arm/libevad.so
  15. BIN
      config/aiui/arm/libmaxengine_esr.so
  16. BIN
      config/aiui/arm/libmaxengine_esr.so.1.2.0
  17. BIN
      config/aiui/arm/libse30_circle_mmsp.so
  18. BIN
      config/aiui/arm/libvtn_mic4.so
  19. BIN
      config/aiui/arm/libvtn_mic6_circle.so
  20. BIN
      config/aiui/arm/libxlite.so
  21. BIN
      config/aiui/arm/libxlite.so.3.0.1
  22. BIN
      config/aiui/arm/libxlite_asr.so
  23. 120 0
      config/aiui/cfg/aiui.cfg
  24. 118 0
      config/aiui/cfg/aiui_v3.cfg
  25. 125 0
      config/aiui/cfg/aiui示例.cfg
  26. 125 0
      config/aiui/cfg/aiui说明示例.cfg
  27. 38 0
      config/aiui/cfg/avvtn.cfg
  28. BIN
      config/aiui/esr/esr-ch_en.jet
  29. BIN
      config/aiui/esr/mlp_yn.bin
  30. BIN
      config/aiui/esr/pproc/BiLstm_shengxue_offline_20210324_slni2_fixpunc.bin
  31. BIN
      config/aiui/esr/pproc/BiLstm_shengxue_offline_20210324_slni2_punc.bin
  32. BIN
      config/aiui/esr/pproc/newpunc_ch_en_with_vad.bin
  33. 224 0
      config/aiui/esr/pproc/pproc.cfg
  34. 977 0
      config/aiui/esr/pproc/resource_2_0_12/num_not_change_list
  35. BIN
      config/aiui/esr/pproc/resource_2_0_12/number.gram
  36. BIN
      config/aiui/esr/pproc/resource_2_0_12/number_math_normal.dic
  37. BIN
      config/aiui/esr/pproc/resource_2_0_12/number_math_normal.gram
  38. 352 0
      config/aiui/esr/pproc/resource_2_0_12/replace_list
  39. BIN
      config/aiui/esr/word.bin
  40. BIN
      config/aiui/x64/aiui.dll
  41. BIN
      config/aiui/x64/aiui.lib
  42. BIN
      config/aiui/x64/vtn_mic1.dll
  43. BIN
      config/aiui/x86/aiui.dll
  44. BIN
      config/aiui/x86/aiui.lib
  45. BIN
      config/aiui/x86/vtn_mic1.dll
  46. 128 0
      config/config.yaml
  47. 102 0
      config/config/action_config.py
  48. 147 0
      config/config/aiot_config.py
  49. 206 0
      config/config/dify_config.py
  50. 149 0
      config/config/echo_cancellation_conf.py
  51. 184 0
      config/config/settings.py
  52. 26 0
      config/dify.yaml
  53. 77 0
      config/xunfei_config.py
  54. 1 0
      core/__init__.py
  55. 8 0
      core/aiui/__init__.py
  56. 282 0
      core/aiui/recorder.py
  57. 54 0
      core/baidu/message_processor.py
  58. 165 0
      core/message_processor.py
  59. 110 0
      core/socket_client.py
  60. 176 0
      core/xunfei/message_processor.py
  61. 109 0
      docs/aiui.cfg
  62. 144 0
      docs/hot_words.txt
  63. 417 0
      fix_alsa_config.py
  64. 1 0
      handlers/__init__.py
  65. 233 0
      handlers/aiui/AIui_node.py
  66. 427 0
      handlers/aiui/EventListener.py
  67. 0 0
      handlers/aiui/__init__.py
  68. 175 0
      handlers/aiui/pyAIUIConstant.py
  69. 324 0
      handlers/aiui/pyaiui.py
  70. 486 0
      handlers/baidu/speech_handler.py
  71. 0 0
      handlers/dify/__init__.py
  72. 35 0
      handlers/dify/recognize_intention.py
  73. 39 0
      handlers/dify/recognize_models.py
  74. 59 0
      handlers/speech_handler.py
  75. 321 0
      handlers/xunfei/intent_handler.py
  76. 177 0
      handlers/xunfei/knowledge_handler.py
  77. 339 0
      handlers/xunfei/nlp_handler.py
  78. 141 0
      handlers/xunfei/speech_handler.py
  79. 159 0
      main_robot.py
  80. 109 0
      requirements.txt
  81. 1 0
      strategies/__init__.py
  82. 6 0
      strategies/action/__init__.py
  83. 151 0
      strategies/action/action_strategies.py
  84. 17 0
      strategies/action/base_strategy.py
  85. 96 0
      strategies/action/execute_actions.py
  86. 33 0
      strategies/aiui_process.py
  87. 67 0
      strategies/base_strategy.py
  88. 34 0
      strategies/confirm_process.py
  89. BIN
      strategies/vision/demo_pic.png
  90. 64 0
      strategies/vision/dify_vision_demo.py
  91. 98 0
      strategies/vision/qwenv.py
  92. 496 0
      utils/control_aiot.py
  93. 1297 0
      utils/echo_cancellation.py
  94. 173 0
      utils/init_system.py
  95. 36 0
      utils/load_config.py
  96. 87 0
      utils/logger.py
  97. 45 0
      utils/network.py
  98. 149 0
      utils/pc2_requests.py
  99. 162 0
      utils/time_sync.py
  100. 176 0
      utils/tts_cache.py

+ 97 - 0
.asoundrc

@@ -0,0 +1,97 @@
+# ALSA配置文件 - 自动生成
+# 用于修复音频设备配置问题
+
+# 默认设备配置
+pcm.!default {
+    type hw
+    card 0
+    device 0
+}
+
+ctl.!default {
+    type hw
+    card 0
+}
+
+# 简单PCM设备
+pcm.simple {
+    type plug
+    slave.pcm "hw:0,0"
+}
+
+# 默认PCM设备
+pcm.!default {
+    type plug
+    slave.pcm "simple"
+}
+
+# 禁用不存在的设备以避免ALSA错误
+pcm.front {
+    type null
+}
+
+pcm.rear {
+    type null
+}
+
+pcm.center_lfe {
+    type null
+}
+
+pcm.side {
+    type null
+}
+
+pcm.surround21 {
+    type null
+}
+
+pcm.surround40 {
+    type null
+}
+
+pcm.surround41 {
+    type null
+}
+
+pcm.surround50 {
+    type null
+}
+
+pcm.surround51 {
+    type null
+}
+
+pcm.surround71 {
+    type null
+}
+
+pcm.iec958 {
+    type null
+}
+
+pcm.spdif {
+    type null
+}
+
+pcm.hdmi {
+    type null
+}
+
+pcm.modem {
+    type null
+}
+
+pcm.phoneline {
+    type null
+}
+
+# 禁用OSS设备
+pcm.dsp {
+    type null
+}
+
+# 禁用USB音频设备错误
+pcm.usb_stream {
+    type null
+}

+ 34 - 0
.gitignore

@@ -0,0 +1,34 @@
+# IDE和编辑器配置
+.vscode/
+**/.idea/
+
+# Python缓存文件
+**/__pycache__/
+*.pyc
+*.pyo
+*.pyd
+
+# 日志文件
+logs/
+*.log
+**/logs/
+
+# 环境变量
+/.env/
+
+# 临时文件
+tmp/
+keda_tts_files/
+.history/
+.cursor/
+.kiro/
+
+# 大模型和向量数据库
+**/chroma_db/
+**/chroma/
+**/bigModels/
+tts_cache/
+cache/
+build/
+docker/build
+AIUI/

+ 362 - 0
aiui.py

@@ -0,0 +1,362 @@
+import _thread as thread
+import base64
+import datetime
+import hashlib
+import hmac
+import json
+import traceback
+from urllib.parse import urlparse
+import time
+from datetime import datetime
+from time import mktime
+from urllib.parse import urlencode
+from wsgiref.handlers import format_date_time
+
+import websocket
+
+# 修改应用应用配置和文件地址后直接执行即可
+
+# 请求地址
+url = "wss://aiui.xf-yun.com/v3/aiint/sos"
+
+# 应用配置
+appid = "f016fce3"
+api_key = "fcb58dc79de9b0568d2287bd8184e291"
+api_secret = "YTFiN2NkOGVjNTVjY2QyMTlmMTViOTBh"
+
+sn = "test-sn"
+
+# 场景
+scene = "test_box"
+
+vcn = "x5_lingxiaoyue_flow"
+
+# 请求类型用来设置文本请求还是音频请求,text/audio
+data_type = 'text'
+
+# 音频请求需要先设置audio_path
+# 当前音频格式默认pcm 16k 16bit,修改音频格式需要修改audioReq中的payload中音频相关参数
+# data_type = 'audio'
+
+# 音频请求上传的音频文件路径
+text_msg = ""
+audio_path = "weather.pcm"
+
+# 文本请求输入的文本
+question = "介绍下苏超?"
+question = "你好,今天天气怎么样,介绍下苏超"
+
+# 下面两个参数配合音频采样率设置,16k 16bit的音频: 每 40毫秒 发送 1280字节
+# 每帧音频数据大小,单位字节
+frame_size = 1280
+# 每帧音频发送间隔
+sleep_inetrval = 0.04
+
+
+class AIUIV3WsClient(object):
+    # 初始化
+    def __init__(self):
+        self.handshake = self.assemble_auth_url(url)
+
+    # 生成握手url
+    def assemble_auth_url(self, base_url):
+        host = urlparse(base_url).netloc
+        path = urlparse(base_url).path
+        # 生成RFC1123格式的时间戳
+        now = datetime.now()
+        date = format_date_time(mktime(now.timetuple()))
+
+        # 拼接字符串
+        signature_origin = "host: " + host + "\n"
+        signature_origin += "date: " + date + "\n"
+        signature_origin += "GET " + path + " HTTP/1.1"
+
+        # 进行hmac-sha256进行加密
+        print(signature_origin)
+        signature_sha = hmac.new(api_secret.encode('utf-8'), signature_origin.encode('utf-8'),
+                                 digestmod=hashlib.sha256).digest()
+
+        signature_sha_base64 = base64.b64encode(
+            signature_sha).decode(encoding='utf-8')
+
+        authorization_origin = f'api_key="{api_key}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
+
+        print('get authorization_origin:', authorization_origin)
+        authorization = base64.b64encode(
+            authorization_origin.encode('utf-8')).decode(encoding='utf-8')
+
+        # 将请求的鉴权参数组合为字典
+        v = {
+            "host": host,
+            "date": date,
+            "authorization": authorization,
+        }
+        # 拼接鉴权参数,生成url
+        url = base_url + '?' + urlencode(v)
+        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,比对相同参数时生成的url与自己代码生成的url是否一致
+        return url
+
+    def on_open(self, ws):
+        # 连接建立成功后开始发送数据
+        print("### ws connect open")
+        thread.start_new_thread(self.run, ())
+
+    def run(self):
+        if data_type == "text":
+            self.text_req()
+        if data_type == "audio":
+            self.audio_req()
+
+    def text_req(self):
+        # 文本请求status固定为3,interact_mode固定为oneshot
+        aiui_data = {
+            "header": {
+                "appid": appid,
+                "sn": sn,
+                "stmid": "text-1",
+                "status": 3,
+                "scene": scene,
+                "msc.lat": 19.65309164062,
+                "msc.lng": 109.259056086,
+                "os_sys": "android",
+                "interact_mode":"oneshot"
+            },
+            "parameter": {
+                "nlp": {
+                    "nlp": {
+                        "compress": "raw",
+                        "format": "json",
+                        "encoding": "utf8"
+                    },
+                    "new_session": True,
+                },
+                # 合成参数
+                "tts": {
+                    # 发音人
+                    "vcn": vcn,
+                    "tts": {
+                        "channels": 1,
+                        "bit_depth": 16,
+                        "sample_rate": 16000,
+                        "encoding": "raw"
+                    }
+                }
+            },
+            "payload": {
+                "text": {
+                    "compress": "raw",
+                    "format": "plain",
+                    "text": base64.b64encode(question.encode('utf-8')).decode('utf-8'),
+                    "encoding": "utf8",
+                    "status": 3
+                }
+            }
+        }
+        data = json.dumps(aiui_data)
+        print('text request data:', data)
+        self.ws.send(data)
+
+    def audio_req(self):
+        f = open(audio_path, 'rb')
+        try:
+            f.seek(0, 2)
+            eof = f.tell()
+            f.seek(0, 0)
+
+            first = True
+            status = 0
+            while True:
+                d = f.read(frame_size)
+                if not d:
+                    break
+
+                if f.tell() >= eof:
+                    # 尾帧
+                    status = 2
+                elif not first:
+                    # 中间帧
+                    status = 1
+
+                req = self.genAudioReq(d, status)
+                first = False
+                self.ws.send(req)
+                # 发送间隔
+                time.sleep(sleep_inetrval)
+        finally:
+            f.close()
+
+    def genAudioReq(self, data, status):
+        # 构造pcm音频请求参数
+        aiui_data = {
+            "header": {
+                "appid": appid,
+                "sn": sn,
+                "stmid": "audio-1",
+                "status": status,
+                "scene": scene,
+                "interact_mode": "continuous"
+            },
+            "parameter": {
+                "nlp": {
+                    "nlp": {
+                        "compress": "raw",
+                        "format": "json",
+                        "encoding": "utf8"
+                    },
+                    "new_session": True
+                },
+                # 合成参数
+                "tts": {
+                    # 发音人
+                    "vcn": vcn,
+                    "tts": {
+                        "channels": 1,
+                        "bit_depth": 16,
+                        "sample_rate": 16000,
+                        "encoding": "raw"
+                    }
+                }
+            },
+            "payload": {
+                "audio": {
+                    "encoding": "raw",
+                    "sample_rate": 16000,
+                    "channels": 1,
+                    "bit_depth": 16,
+                    "status": status,
+                    "audio": base64.b64encode(data).decode(),
+                }
+            }
+        }
+        return json.dumps(aiui_data)
+
+    # 收到websocket消息的处理
+    def on_message(self, ws, message):
+        try:
+            data = json.loads(message)
+
+            # print('原始结果:', message)
+            header = data['header']
+            code = header['code']
+            # 结果解析
+            if code != 0:
+                print('请求错误:', code, json.dumps(data, ensure_ascii=False))
+                ws.close()
+            sid = header.get('sid', "sid")
+            payload = data.get('payload', {})
+            parameter = data.get('parameter', {})
+            if 'event' in payload:
+                # 事件结果
+                event_json = payload['event']
+                event_text_bs64 = event_json['text']
+                event_text = base64.b64decode(event_text_bs64).decode('utf-8')
+                print("事件,", event_text)
+            if 'iat' in payload:
+                # 识别结果
+                iat_json = payload['iat']
+                iat_text_bs64 = iat_json['text']
+                iat_text = base64.b64decode(iat_text_bs64).decode('utf-8')
+                print("识别结果,seq:", iat_json['seq'], ",status:",
+                      iat_json['status'], ",", self.parse_iat_result(iat_text))
+            if 'cbm_tidy' in payload:
+                # 语义规整结果(历史改写),意图拆分
+                cbm_tidy_json = payload['cbm_tidy']
+                cbm_tidy_text_bs64 = cbm_tidy_json['text']
+                cbm_tidy_text = base64.b64decode(
+                    cbm_tidy_text_bs64).decode('utf-8')
+                cbm_tidy_json = json.loads(cbm_tidy_text)
+                print("语义规整结果:")
+                intents = cbm_tidy_json['intent']
+                for intent in intents:
+                    print("  intent index:",
+                          intent['index'], ",意图语料:", intent['value'])
+            if 'cbm_intent_domain' in payload:
+                # 意图拆分后的落域结果
+                cbm_intent_domain_json = payload['cbm_intent_domain']
+                cbm_intent_domain_text_bs64 = cbm_intent_domain_json['text']
+                cbm_intent_domain_text = base64.b64decode(
+                    cbm_intent_domain_text_bs64).decode('utf-8')
+                index = self.get_intent_index(parameter, "cbm_intent_domain")
+                print("intent index:", index, ",落域结果:", cbm_intent_domain_text)
+            if 'cbm_semantic' in payload:
+                # 技能结果
+                cbm_semantic_json = payload['cbm_semantic']
+                cbm_semantic_text_bs64 = cbm_semantic_json['text']
+                cbm_semantic_text = base64.b64decode(
+                    cbm_semantic_text_bs64).decode('utf-8')
+                cbm_semantic_json = json.loads(cbm_semantic_text)
+                index = self.get_intent_index(parameter, "cbm_semantic")
+                if cbm_semantic_json['rc'] != 0:
+                    print("intent index:", index, ",技能结果:说法:",
+                          cbm_semantic_json['text'], ",", cbm_semantic_text)
+                else:
+                    print("intent index:", index, ",技能结果:说法:",
+                          cbm_semantic_json['text'], ",命中技能:", cbm_semantic_json['category'], ",回复:", cbm_semantic_json['answer']['text'])
+            if 'nlp' in payload:
+                # 语义结果,经过大模型润色的最终结果
+                nlp_json = payload['nlp']
+                nlp_text_bs64 = nlp_json['text']
+                nlp_text = base64.b64decode(nlp_text_bs64).decode('utf-8')
+                print("语义结果 seq:", nlp_json['seq'], ",status:",
+                      nlp_json['status'], ",nlp.text: ", nlp_text)
+            if 'tts' in payload:
+                # 将结果保存到文件,文件后缀名需要根据tts参数中的encoding来决定
+                audioData = payload['tts']['audio']
+                if audioData != None:
+                    audioBytes = base64.b64decode(audioData)
+                    print("tts结果: ", len(audioBytes), " 字节")
+                    with open(sid + "." + self.get_suffix(payload['tts']['encoding']), 'ab') as file:
+                        file.write(audioBytes)
+
+            if 'status' in header and header['status'] == 2:
+                # 接收最后一帧结果,关闭连接
+                ws.close()
+        except Exception as e:
+            traceback.print_exc()
+            pass
+
+    def parse_iat_result(self, iat_res):
+        iat_text = ""
+        iat_res_json = json.loads(iat_res)
+        for cw in iat_res_json['text']['ws']:
+            for cw_item in cw["cw"]:
+                iat_text += cw_item['w']
+
+        return iat_text
+
+    def get_intent_index(self, parameter, key):
+        if key in parameter:
+            return parameter[key]['loc']['intent']
+
+        return "-"
+
+    def get_suffix(self, encoding):
+        if encoding == 'raw':
+            return 'pcm'
+        if encoding == 'lame':
+            return 'mp3'
+
+        return 'unknow'
+
+    def on_error(self, ws, error):
+        print("### connection error: ", str(error))
+        ws.close()
+
+    def on_close(self, ws, close_status_code, close_msg):
+        print("### connection is closed ###, cloce code:", close_status_code)
+
+    def start(self):
+        self.ws = websocket.WebSocketApp(
+            self.handshake,
+            on_open=self.on_open,
+            on_message=self.on_message,
+            on_error=self.on_error,
+            on_close=self.on_close,
+        )
+        self.ws.run_forever()
+
+
+if __name__ == "__main__":
+
+    client = AIUIV3WsClient()
+    client.start()

+ 46 - 0
build_nuitka.py

@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+import os
+import subprocess
+import sys
+import multiprocessing
+
+
+def main():
+    main_file = "main_robot.py"
+    output_name = "robot_ai"
+    build_dir = "build"
+
+    if not os.path.exists(main_file):
+        print("错误: 找不到 main_robot.py")
+        return False
+
+    jobs = multiprocessing.cpu_count()
+
+    # 使用参数列表以避免在 Windows PowerShell 下多行续行被误解析
+    cmd = [
+        sys.executable,
+        "-m",
+        "nuitka",
+        "--standalone",
+        f"--jobs={jobs}",
+        f"--output-dir={build_dir}",
+        f"--output-filename={output_name}",
+        "--nofollow-import-to=pytest,unittest,setuptools",
+        "--include-module=socket",
+        "--include-module=ssl",
+        "--include-data-dir=config=config",
+        # 显式包含 AIUI 运行所需的本地库
+        # Windows x64 AIUI 动态库
+        "--include-data-files=config/aiui/x64/*.dll=config/aiui/x64/",
+        # Linux aarch64 AIUI 动态库(生产环境)
+        "--include-data-files=config/aiui/arm/*.so=config/aiui/arm/",
+        main_file,
+    ]
+
+    print("执行:", " ".join(cmd))
+    # 不使用 shell,避免转义与续行差异导致参数丢失
+    return subprocess.run(cmd).returncode == 0
+
+
+if __name__ == "__main__":
+    sys.exit(0 if main() else 1)

+ 1 - 0
config/__init__.py

@@ -0,0 +1 @@
+# 配置模块

+ 47 - 0
config/action.yaml

@@ -0,0 +1,47 @@
+# 机器人动作指令常量
+actions:
+  # 基础动作常量
+  SALUTE: "SALUTE"  # 敬礼
+  LOVE_PRINT: "LOVE_PRINT"  # 比心
+  SPREAD_ARMS_HORIZONTALLY: "SPREAD_ARMS_HORIZONTALLY"  # 水平展开双臂
+  RAISE_HANDS: "RAISE_HANDS"  # 抬起双臂
+  RIGHT_HAND_GUIDING: "RIGHT_HAND_GUIDING"  # 右手引导
+  LEFT_HAND_GUIDING: "LEFT_HAND_GUIDING"  # 左手引导
+  END_HANDSHAKE: "END_HANDSHAKE"  # 结束握手
+  TURN_AROUND_WAVE_HAND: "TURN_AROUND_WAVE_HAND"  # 转身挥手
+  WAVE_HAND: "WAVE_HAND"  # 挥手
+  KEEP_HANDSHAKE: "KEEP_HANDSHAKE"  # 保持握手
+  AUTO_DROP_HANDSHAKE: "AUTO_DROP_HANDSHAKE"  # 握手结束自动放下
+
+# 动作指令字典
+action_dict:
+  BEFORE: 1001  # 前
+  BACK: 1002    # 后
+  LEFT: 1003    # 左
+  RIGHT: 1004   # 右
+  RESET: 2000   # 重置
+  KEEP_HANDSHAKE: 2001  # 保持握手
+  END_HANDSHAKE: 2002   # 结束握手
+  WAVE_HAND: 2003       # 挥手
+  TURN_AROUND_WAVE_HAND: 2004  # 转身挥手
+  AUTO_DROP_HANDSHAKE: 2005    # 握手结束自动放下
+  RAISE_HAND_TO_CHEST: 2006    # 抬起双臂到胸部
+  SPREAD_ARMS_HORIZONTALLY: 2007  # 水平展开双臂
+  LEFT_HAND_GUIDING: 2008      # 左手引导
+  RIGHT_HAND_GUIDING: 2009     # 右手引导
+  LEFT_TWIST_HANDS_UP: 2010    # 左手向上 twisting
+  RIGHT_TWIST_HANDS_UP: 2011   # 右手向上 twisting
+  RAISE_PLATE_TO_CHEST: 2012   # 抬起 Plate 到胸部
+  RAISE_GLASS: 2013            # 抬起 Glass
+  LOVE_PRINT: 2014             # 比心
+  HUGGING: 2015                # 拥抱
+  SALUTE: 2016                 # 敬礼
+  BATTLE_POSE: 3056            # 战斗姿态
+  HAND_CAMERA: 3057            # 手部相机
+
+# 点位字典
+point_dict:
+  POINT_ONE: "1"    # 点位1
+  POINT_TWO: "2"    # 点位2
+  POINT_THREE: "3"  # 点位3
+  POINT_FOUR: "4"   # 点位4

+ 69 - 0
config/aiot.yaml

@@ -0,0 +1,69 @@
+# AIOT 物联中控平台配置
+
+# 机器ID配置
+machine:
+  machine-id: "d21e4997e4f78314ca991ffa8734c366"
+  machine-name: "robot_ai"
+
+# 物联AIOT平台配置
+aiot_platform:
+  # 江苏移动业务账号
+  union_id: "18505105276"
+  # union_id: "13951742810"
+  
+  # 测试环境配置
+  # host: "http://iot.test.tstar-tech.com:6001"
+  # app_id: "DXJSul8kAD4o0vLm"
+  # app_secret: "NExpWCKDl7vXb4ElFfqktsFr+Go5tiem"
+  
+  # 正式环境配置
+  host: "http://iot.tstar-tech.com:6001/"
+  app_id: "NVOPaEwo8S61ytrZ"
+  app_secret: "/uUMzm79BK0RPzI8VBgomSRjngXb5/sH"
+
+# 场景配置
+scenes:
+  scene_001: "500"  # 明灯id
+  scene_002: "501"  # 灭灯id
+
+# 设备配置
+devices:
+  livingroom_downlight:
+    dev_name: "客厅筒灯"
+    product_id: "RSD00003"
+    device_id: "ATARZ4A3000124587C00D2DC"
+    resource_id: "power1"
+    open_value: "1"
+    close_value: "2"
+  
+  rail_light:
+    dev_name: "客厅轨道灯"
+    product_id: "U7GE5C7X"
+    device_id: "0001200d1cc089fffecaf196"
+    resource_id: "power1"
+    open_value: true
+    close_value: false
+  
+  light_strip:
+    dev_name: "灯带"
+    product_id: "KXYP79V2"
+    device_id: "0001200d90395efffe80c4ee"
+    resource_id: "power1"
+    open_value: true
+    close_value: false
+  
+  ceiling_light:
+    dev_name: "吸顶灯"
+    product_id: "KXYP79V2"
+    device_id: "0001200d90395efffe80c4ee"
+    resource_id: "power2"
+    open_value: true
+    close_value: false
+  
+  the_curtains:
+    dev_name: "窗帘"
+    product_id: "RSD00005"
+    device_id: "ATARWSA40001B8D61AA720B0"
+    resource_id: "work1.work1"
+    open_value: "1"
+    close_value: "2"

BIN
config/aiui/arm/e05e559a9_v201010_aee.so


BIN
config/aiui/arm/lib4mic_mmsp.so


BIN
config/aiui/arm/libEdgeEsr.so


BIN
config/aiui/arm/libaikit.so


BIN
config/aiui/arm/libaiui.so


BIN
config/aiui/arm/libavvtn_mic4.so


BIN
config/aiui/arm/libevad.so


BIN
config/aiui/arm/libmaxengine_esr.so


BIN
config/aiui/arm/libmaxengine_esr.so.1.2.0


BIN
config/aiui/arm/libse30_circle_mmsp.so


BIN
config/aiui/arm/libvtn_mic4.so


BIN
config/aiui/arm/libvtn_mic6_circle.so


BIN
config/aiui/arm/libxlite.so


BIN
config/aiui/arm/libxlite.so.3.0.1


BIN
config/aiui/arm/libxlite_asr.so


+ 120 - 0
config/aiui/cfg/aiui.cfg

@@ -0,0 +1,120 @@
+/* AIUI 版本参数配置 */
+{
+     /* 登录参数 */
+    "login":{
+        "appid": "f016fce3",
+        "sn": "yd-00:00:00:00:00:01",
+        "key": "fcb58dc79de9b0568d2287bd8184e291",
+        "api_secret": "YTFiN2NkOGVjNTVjY2QyMTlmMTViOTBh"
+    },
+
+    /* 全局设置 */
+    // 若要将新版本SDK(6.6.xxxx.xxxx以上)当作老版本(5.6.xxxx.xxxx)使用,请把aiui_ver的值改为1
+    // "scene" 场景,示例:"main","main_box"(带box的为测试环境)。在平台的应用配置页面添加和配置
+    // "aiui_ver" AIUI版本,取值:"1"(通用语义),"2"(交互大模型),"3"(极速交互,默认)
+    // "clean_dialog_history" 清除对话历史方式,取值:"auto"(SDK自动控制,默认,按speech中的interact_mode处理:oneshot模式下首次唤醒清历史,continuous模式下每次唤醒都会清历史),"user"(外部控制)
+    "global": {
+        "scene": "test",
+        "aiui_ver": "3",
+        "clean_dialog_history": "auto"
+    },
+
+    /* 交互参数 */
+    // "interact_timeout" 交互超时时间,单位:毫秒。取值:[10000, 180000],10000(默认)。-1表示不超时
+    // "result_timeout" 结果超时时间,单位:毫秒。取值:5000(默认)
+    "interact":{
+        // 交互超时关闭
+        "interact_timeout": "-1",
+        "result_timeout": "5000"
+    },
+
+    /* 业务相关参数 */
+    // 本地vad参数
+    "vad":{
+        "vad_enable": "1",
+        "engine_type": "meta"
+    },
+
+    // 识别(音频输入)参数
+    // "sample_rate" 音频采集率,取值:"16000"(默认值)
+    // "data_encoding" 音频编码,取值:"raw"(不压缩),"speex-wb"(默认),"opus-wb"
+    "iat":{
+        "sample_rate": "16000",
+        "data_encoding": "speex-wb"
+    },
+
+    // 音频参数,在上传云端时带上
+    // "pers_param" 个性化参数
+    "audioparams":{
+        "msc.lng":"118.78",   //经度 示例:117.16334474(不超过8位精度)
+        "msc.lat":"32.07",   //纬度 示例:31.82102191(不超过8位精度)
+        "pers_param": "{\"appid\":\"\",\"uid\":\"\"}"
+    },
+
+    // 录音参数
+    // "sample_size" 采集值大小,单位:字节,一般是2即16bit编码
+    // "channel_count" 通道数量,不接麦克风阵列时一般为1
+    // "channel_filter" 通道过滤参数,即从原始数据中取相应的通道组成新阵列数据,-1代表填充一个全0通道
+    "recorder":{
+        "channel_count": 1,
+        "channel_filter": "0,-1"
+    },
+
+    // 唤醒参数
+    "ivw":{
+        "mic_type": "mic1",
+        "res_type": "path",
+        "res_path": "AIUI/assets/vtn/vtn.ini"
+    },
+
+    /* 业务流程相关参数 */
+    // 语音业务流程控制
+    // "data_source" 音频数据来源,取值:"sdk"(SDK内部录音,默认),"user"(外部录音,写入SDK)
+    // "wakeup_mode" 唤醒模式,取值:"off"(关闭,无语音唤醒),"vtn"(阵列唤醒)
+    // "interact_mode" 交互模式,取值:"oneshot"(一次唤醒一次交互),"continuous"(一次唤醒多次交互,默认))
+    "speech":{
+        "data_source": "sdk",
+        "wakeup_mode": "off",
+        "interact_mode": "continuous",
+        "audio_captor": "system"
+    },
+
+    // 合成参数
+    // "voice_name" 发音人
+    "tts":{
+        "voice_name": "x5_lingxiaoyue_flow"
+    },
+
+    // 传给大模型的参数
+    "cbmparams": {
+        "nlp": {
+            "nlp": {
+                "encoding": "utf8",
+                "compress": "raw",
+                "format": "json"
+            },
+        "sub_scene": "cbm_v45"
+        }
+    },
+
+    // v3版本头部参数
+    // "prot_interact_mode" 协议交互模式,跟speech中的interact_mode没有关联,v3才支持。取值:"oneshot"(单工,允许使用本地vad,默认),"continuous_vad"(全双工支持本地vad),"continuous"(全双工,必须关闭本地vad)
+    // 一般情况下不需要关注该参数,SDK内部会根据是否配置虚拟人来自动设置,设置策略:无虚拟人选oneshot,有虚拟人选continuous_vad
+    "header": {
+        "prot_interact_mode": "oneshot"
+    },
+
+    /* 日志设置 */
+    // "debug_log" 调试日志开关,取值:"1"(打开),"0"(关闭,默认)
+    // "save_datalog" 保存数据日志开关,打开则会保存输入的录音数据和输出的结果。取值:"1"(打开),"0"(关闭,默认)
+    // "datalog_path" 数据日志保存路径,默认路径为:"/sdcard/AIUI/data/"(Android),"AIUI/data"(其他平台)
+    // "datalog_size" 数据日志大小限制,单位:MB。当保存的日志数据大于该值后,会自动删除最老的日志以保存大小不超出限制
+    // "raw_audio_path" 原始音频保存路径
+    "log":{
+        "debug_log": "1",
+        "save_datalog": "0",
+        "datalog_path": "",
+        "datalog_size": 1024,
+        "raw_audio_path": ""
+    },
+}

+ 118 - 0
config/aiui/cfg/aiui_v3.cfg

@@ -0,0 +1,118 @@
+/* AIUI 版本参数配置 */
+{
+     /* 登录参数 */
+    "login":{
+        "appid": "f016fce3",
+        "sn": "yd-00:00:00:00:00:01",
+        "key": "fcb58dc79de9b0568d2287bd8184e291",
+        "api_secret": "YTFiN2NkOGVjNTVjY2QyMTlmMTViOTBh"
+    },
+
+    /* 全局设置 */
+    // 若要将新版本SDK(6.6.xxxx.xxxx以上)当作老版本(5.6.xxxx.xxxx)使用,请把aiui_ver的值改为1
+    // "scene" 场景,示例:"main","main_box"(带box的为测试环境)。在平台的应用配置页面添加和配置
+    // "aiui_ver" AIUI版本,取值:"1"(通用语义),"2"(交互大模型),"3"(极速交互,默认)
+    // "clean_dialog_history" 清除对话历史方式,取值:"auto"(SDK自动控制,默认,按speech中的interact_mode处理:oneshot模式下首次唤醒清历史,continuous模式下每次唤醒都会清历史),"user"(外部控制)
+    "global": {
+        "scene": "test",
+        "aiui_ver": "3",
+        "clean_dialog_history": "auto"
+    },
+
+    /* 交互参数 */
+    // "interact_timeout" 交互超时时间,单位:毫秒。取值:[10000, 180000],10000(默认)。-1表示不超时
+    // "result_timeout" 结果超时时间,单位:毫秒。取值:5000(默认)
+    "interact":{
+        // 交互超时关闭
+        "interact_timeout": "-1",
+        "result_timeout": "5000"
+    },
+
+    /* 业务相关参数 */
+    // 本地vad参数
+    "vad":{
+        "vad_enable": "1",
+        "engine_type": "meta"
+    },
+
+    // 识别(音频输入)参数
+    // "sample_rate" 音频采集率,取值:"16000"(默认值)
+    // "data_encoding" 音频编码,取值:"raw"(不压缩),"speex-wb"(默认),"opus-wb"
+    "iat":{
+        "sample_rate": "16000",
+        "data_encoding": "speex-wb"
+    },
+
+    // 音频参数,在上传云端时带上
+    // "pers_param" 个性化参数
+    "audioparams":{
+        "pers_param": "{\"appid\":\"\",\"uid\":\"\"}"
+    },
+
+    // 录音参数
+    // "sample_size" 采集值大小,单位:字节,一般是2即16bit编码
+    // "channel_count" 通道数量,不接麦克风阵列时一般为1
+    // "channel_filter" 通道过滤参数,即从原始数据中取相应的通道组成新阵列数据,-1代表填充一个全0通道
+    "recorder":{
+        "channel_count": 1,
+        "channel_filter": "0,-1"
+    },
+
+    // 唤醒参数
+    "ivw":{
+        "mic_type": "mic1",
+        "res_type": "path",
+        "res_path": "AIUI/assets/vtn/vtn.ini"
+    },
+
+    /* 业务流程相关参数 */
+    // 语音业务流程控制
+    // "data_source" 音频数据来源,取值:"sdk"(SDK内部录音,默认),"user"(外部录音,写入SDK)
+    // "wakeup_mode" 唤醒模式,取值:"off"(关闭,无语音唤醒),"vtn"(阵列唤醒)
+    // "interact_mode" 交互模式,取值:"oneshot"(一次唤醒一次交互),"continuous"(一次唤醒多次交互,默认))
+    "speech":{
+        "data_source": "sdk",
+        "wakeup_mode": "off",
+        "interact_mode": "continuous",
+        "audio_captor": "system"
+    },
+
+    // 合成参数
+    // "voice_name" 发音人
+    "tts":{
+        "voice_name": "x5_lingxiaoyue_flow"
+    },
+
+    // 传给大模型的参数
+    "cbmparams": {
+        "nlp": {
+            "nlp": {
+                "encoding": "utf8",
+                "compress": "raw",
+                "format": "json"
+            },
+        "sub_scene": "cbm_v45"
+        }
+    },
+
+    // v3版本头部参数
+    // "prot_interact_mode" 协议交互模式,跟speech中的interact_mode没有关联,v3才支持。取值:"oneshot"(单工,允许使用本地vad,默认),"continuous_vad"(全双工支持本地vad),"continuous"(全双工,必须关闭本地vad)
+    // 一般情况下不需要关注该参数,SDK内部会根据是否配置虚拟人来自动设置,设置策略:无虚拟人选oneshot,有虚拟人选continuous_vad
+    "header": {
+        "prot_interact_mode": "oneshot"
+    },
+
+    /* 日志设置 */
+    // "debug_log" 调试日志开关,取值:"1"(打开),"0"(关闭,默认)
+    // "save_datalog" 保存数据日志开关,打开则会保存输入的录音数据和输出的结果。取值:"1"(打开),"0"(关闭,默认)
+    // "datalog_path" 数据日志保存路径,默认路径为:"/sdcard/AIUI/data/"(Android),"AIUI/data"(其他平台)
+    // "datalog_size" 数据日志大小限制,单位:MB。当保存的日志数据大于该值后,会自动删除最老的日志以保存大小不超出限制
+    // "raw_audio_path" 原始音频保存路径
+    "log":{
+        "debug_log": "1",
+        "save_datalog": "0",
+        "datalog_path": "",
+        "datalog_size": 1024,
+        "raw_audio_path": ""
+    }
+}

+ 125 - 0
config/aiui/cfg/aiui示例.cfg

@@ -0,0 +1,125 @@
+{
+; 登录参数
+"login": {
+        "appid": "352cdc21",
+        "key": "45b5f6a1a4374409e6d72107e365ec1e",
+        "api_secret": "NTkzYTE0NTI4NGUxZGRhMmQwZDFlMDk4"
+    }, 
+;交互控制参数
+"interact": {
+        "interact_timeout": "-1",
+        "result_timeout": "5000"
+    },
+    //全局参数
+    "global": {
+        "scene": "main",     //配置情景 
+        "clean_dialog_history": "auto"
+    },
+    //音频端点检测参数
+    "vad": {
+        "vad_enable": "0",
+        "engine_type": "meta",
+        "res_type": "assets",
+        "res_path": "vad\/meta_vad_16k.jet"
+    },
+    // 识别(音频输入)参数
+    "iat": {
+        "sample_rate": "16000"
+    },
+; 
+"asr": {
+        "threshold": "50",
+        "res_type": "assets",
+        "res_path": "asr\/common.jet"
+    },
+;合成播放控制参数
+"tts": {
+        "engine_type": "cloud",
+        "res_type": "assets",
+        "res_path": "tts\/common.jet;tts\/mengmeng.jet",
+        "voice_name": "x4_lingfeizhe_oral",
+        "ent": "xtts",
+        "play_mode": "sdk"
+    },
+;业务相关参数
+"speech": {
+        "data_source": "sdk",
+        "audio_captor": "none",
+        "interact_mode": "continuous",
+        "intent_engine_type": "cloud"
+    },
+    "ivw": {
+        "mic_type": "mic4",
+        "zeroshot_enable": "0",
+        "res_type": "path",
+        "res_path": "\/sdcard\/AIUI\/assets\/vtn\/config\/vtn.ini"
+    },
+    "mmsp": {
+        "play_mmsp": false,
+        "upload_status_interval": 60,
+        "wakeup_engine": "mic4",
+        "video_delay": 120,
+        "min_face_w": 100,
+        "min_face_h": 100,
+        "head_angle_yaw": 30,
+        "vad_eos": 60,
+        "cae_mode": "ivw",
+        "wakeup_mode": "mmsp",
+        "ivw_interact_timeout": 60000,
+        "auto_lower_rank": false,
+        "face_out_ms": 800,
+        "instance_count": 3,
+        "switch_face_ratio": 1.5,
+        "face_area_ms": 800
+    },
+    "recorder": {
+        "channel_count": 8,
+        "channel_filter": "2,3,4,5,6,7",
+        "channel_filter_cae": "2,3,4,5,6,7",
+        "sound_card_name": "AIUIUSBMC",
+        "sound_device": 0,
+        "min_face_w": 80,
+        "min_face_h": 100,
+        "format": 0,
+        "channel": 8,
+        "input_color": "gray",
+        "cam_aspect_ratio": "4:3",
+        "cam_max_px": 3273600,
+        "cam_zoom": 0,
+        "cam_clip_left": "0.15",
+        "cam_clip_right": "0.15",
+        "cam_clip_top": "0.1",
+        "cam_clip_bottom": "0.1",
+        "cam_rotate": "180",   //摄像头角度
+        "cam_mirror": "false",
+        "cam_id": "",
+        "cam_api": "2"
+    },
+; 大模型参数
+"cbmparams": {
+        "nlp": {
+            "richness": "concise",
+            "env": "{\"human\":{\"expand_persona\":true,\"persona\":\"{\\\"父亲|爸爸\\\":\\\"江苏移动\\\",\\\"姓名|名字|称呼\\\":\\\"机器人小勇\\\"}\"}}"; //env 预制键值对
+            "nlp": {
+                "encoding": "utf8",
+                "compress": "raw",
+                "format": "json"
+            },
+            "sub_scene": "cbm_v45"
+        }
+    },
+    // 日志设置
+    "log": {
+        "debug_log": "1",
+        "save_datalog": "0",
+        "datalog_path": "",
+        "datalog_size": 1024,
+        "raw_audio_path": ""
+    },
+    //音频透传参数
+    "audioparams":{
+        "msc.lng":""   //经度 示例:117.16334474(不超过8位精度)
+        "msc.lat":""   //纬度 示例:31.82102191(不超过8位精度)
+
+   }
+}

+ 125 - 0
config/aiui/cfg/aiui说明示例.cfg

@@ -0,0 +1,125 @@
+{
+    // 登录参数
+    "login": {
+        "appid": "352cdc21",
+        "key": "45b5f6a1a4374409e6d72107e365ec1e",
+        "api_secret": "NTkzYTE0NTI4NGUxZGRhMmQwZDFlMDk4"
+    }, 
+    //交互控制参数
+    "interact": {
+        "interact_timeout": "-1",
+        "result_timeout": "5000"
+    },
+    //全局参数
+    "global": {
+        "scene": "main",     //配置情景 
+        "clean_dialog_history": "auto"
+    },
+    //音频端点检测参数
+    "vad": {
+        "vad_enable": "0",
+        "engine_type": "meta",
+        "res_type": "assets",
+        "res_path": "vad\/meta_vad_16k.jet"
+    },
+    // 识别(音频输入)参数
+    "iat": {
+        "sample_rate": "16000"
+    },
+   // 
+    "asr": {
+        "threshold": "50",
+        "res_type": "assets",
+        "res_path": "asr\/common.jet"
+    },
+    //合成播放控制参数
+    "tts": {
+        "engine_type": "cloud",
+        "res_type": "assets",
+        "res_path": "tts\/common.jet;tts\/mengmeng.jet",
+        "voice_name": "x4_lingfeizhe_oral",
+        "ent": "xtts",
+        "play_mode": "sdk"
+    },
+    //	业务相关参数
+    "speech": {
+        "data_source": "sdk",
+        "audio_captor": "none",
+        "interact_mode": "continuous",
+        "intent_engine_type": "cloud"
+    },
+    "ivw": {
+        "mic_type": "mic4",
+        "zeroshot_enable": "0",
+        "res_type": "path",
+        "res_path": "\/sdcard\/AIUI\/assets\/vtn\/config\/vtn.ini"
+    },
+    "mmsp": {
+        "play_mmsp": false,
+        "upload_status_interval": 60,
+        "wakeup_engine": "mic4",
+        "video_delay": 120,
+        "min_face_w": 100,
+        "min_face_h": 100,
+        "head_angle_yaw": 30,
+        "vad_eos": 60,
+        "cae_mode": "ivw",
+        "wakeup_mode": "mmsp",
+        "ivw_interact_timeout": 60000,
+        "auto_lower_rank": false,
+        "face_out_ms": 800,
+        "instance_count": 3,
+        "switch_face_ratio": 1.5,
+        "face_area_ms": 800
+    },
+    "recorder": {
+        "channel_count": 8,
+        "channel_filter": "2,3,4,5,6,7",
+        "channel_filter_cae": "2,3,4,5,6,7",
+        "sound_card_name": "AIUIUSBMC",
+        "sound_device": 0,
+        "min_face_w": 80,
+        "min_face_h": 100,
+        "format": 0,
+        "channel": 8,
+        "input_color": "gray",
+        "cam_aspect_ratio": "4:3",
+        "cam_max_px": 3273600,
+        "cam_zoom": 0,
+        "cam_clip_left": "0.15",
+        "cam_clip_right": "0.15",
+        "cam_clip_top": "0.1",
+        "cam_clip_bottom": "0.1",
+        "cam_rotate": "180",   //摄像头角度
+        "cam_mirror": "false",
+        "cam_id": "",
+        "cam_api": "2"
+    },
+    /* 大模型参数 */
+    "cbmparams": {
+        "nlp": {
+            "richness": "concise",
+            "env": "{\"human\":{\"expand_persona\":true,\"persona\":\"{\\\"父亲|爸爸\\\":\\\"江苏移动\\\",\\\"姓名|名字|称呼\\\":\\\"机器人小勇\\\"}\"}}", //env 预制键值对
+            "nlp": {
+                "encoding": "utf8",
+                "compress": "raw",
+                "format": "json"
+            },
+            "sub_scene": "cbm_v45"
+        }
+    },
+    // 日志设置
+    "log": {
+        "debug_log": "1",
+        "save_datalog": "0",
+        "datalog_path": "",
+        "datalog_size": 1024,
+        "raw_audio_path": ""
+    }
+    //音频透传参数
+   "audioparams":{
+        "msc.lng":""   //经度 示例:117.16334474(不超过8位精度)
+        "msc.lat":""   //纬度 示例:31.82102191(不超过8位精度)
+
+   }
+}

+ 38 - 0
config/aiui/cfg/avvtn.cfg

@@ -0,0 +1,38 @@
+{
+    "login": {
+        "appid": "352cdc21",
+        "sn": "0123456789-ABCDE1G1",
+        "work_dir": "/home/admin/avvtn_multi_demo/resource"
+    },
+    "mmsp": {
+        "video_delay": -120,
+        "min_face_w": 90,
+        "min_face_h": 90,
+        "head_angle_yaw": 45,
+        "face_out_ms": 800,
+        "face_area_ms": 800,
+        "switch_face_ratio": 1.5,
+        "instance_count": 1,
+        "cae_mode": "mmsp",
+        "auto_lower_rank": false
+    },
+    "vad": {
+        "vad_eos": 500,
+        "threshold": 0.1665
+    },
+    "recorder": {
+        "image_width": 1920,
+        "image_height": 1080,
+        "cam_clip_left": 0,
+        "cam_clip_right": 0,
+        "cam_clip_top": 0,
+        "cam_clip_bottom": 0
+    },
+    "log": {
+        "log_level": 3,
+        "log_max_file": 1,
+        "log_size": 10,
+        "log_save": "0",
+        "log_path": "/home/admin/avvtn_multi_demo/log"
+    }
+}

BIN
config/aiui/esr/esr-ch_en.jet


BIN
config/aiui/esr/mlp_yn.bin


BIN
config/aiui/esr/pproc/BiLstm_shengxue_offline_20210324_slni2_fixpunc.bin


BIN
config/aiui/esr/pproc/BiLstm_shengxue_offline_20210324_slni2_punc.bin


BIN
config/aiui/esr/pproc/newpunc_ch_en_with_vad.bin


+ 224 - 0
config/aiui/esr/pproc/pproc.cfg

@@ -0,0 +1,224 @@
+;#################################################################
+;#	�������ļ����ִ�Сд����������/������һ�ɲ���Сд�ַ�		#
+;#				��������ģ�����á�postproc��					#
+;#################################################################
+[postproc]
+;�Ƿ�ر����ֹ������滻��������㴦��,˳���ͼ�ת����true/false��
+pproc_param_closeallproc=false
+
+;�Ƿ����˳��������true/false��
+smoothproc=false
+
+;�Ƿ���п������������true/false��
+colloqproc=false
+
+;�Ƿ���б�㴦����true/false��
+puncproc=true
+
+;�Ƿ�������ֹ�����true/false��
+numproc=true
+
+;�Ƿ�����滻������true/false��
+replaceproc=false
+
+;�Ƿ���зֶδ�����true/false��
+paragraproc = false
+
+;�ӿ����͡�0 - תд��1 - ��д��
+interfacetype = 1
+
+;�������Ƿ�Ϊ����ģʽ������ģʽʱ�������һ�������true/false������д�������ã�תдʱ������ʧЧ��
+puncCacheMoudle = true
+
+;�������ԡ�0 - ���ģ�1 - ���2 - Ӣ�3 - ������[��Ӣ]��
+languagetype=0
+
+;Quark�������Ĭ��ΪCnNet
+;nextG������͡�0 - JSON��ʽ, 1 - ����(�ı���ʽ)��
+outputtype=0
+
+;0:�����ԣ�  1����㣬 2��˳���� 3���ֶ�
+pproc_test=0
+
+;�Ƿ���ض��㻯��Դ
+bilstm_fix = 1
+
+;�Ƿ񽫼���ת��Ϊ����(ֻ����ͨ������������ʱ��Ч)��true/false��,Ĭ��Ϊfalse
+needJianToFan=false
+
+;�Ƿ�رպ��������Ŷ������true/false��,Ĭ��Ϊfalse
+outCmInfo=true
+
+;#�Ƿ���ݴ������Ż�˳���ͱ�㡾true/false��������Ĵʴ�������ʱ��������Ч��
+useAttribute=false
+
+;#�Ƿ�����n��s��p��g������������ԡ�true/false����Ĭ��Ϊfalse
+reserveAllAttribute= false
+
+
+;#################################################################
+;#						���ִ������á�numproc��					#
+;#################################################################
+[postproc\numproc]
+;nlpģ��dll·��
+nlp_module_path= nlp.dll
+
+;���ֹ���ʹ��c++:0 ���� python:1
+num_proc_type = 0
+;���ֹ���������0:��ͨ������1:����������2:ҽ�Ƴ���
+num_usage_scene = 0
+;nlp�����Դ
+semantic_word_dict=pproc/resource_2_0_12/SemanticWord.dic
+basic_phrase_gram=pproc/resource_2_0_12/BasicPhrase.gram
+number_dict=pproc/resource_2_0_12/number_math_normal.dic
+number_gram=pproc/resource_2_0_12/number_math_normal.gram
+not_replace_list=pproc/resource_2_0_12/num_not_change_list
+
+
+;#��Ҫ��������������
+[postproc\numproc\proc_num_semantic]
+;����=on/off
+NumStr=on
+NumSeq=on
+BigNum=on
+PersonalInformation=on
+Date=on
+Time=on
+Fraction=on
+LicensePlate=on
+NumberOverWan=on
+Delete=on
+MathSymbol=on
+SpecialWord=on
+WebSite=on
+NumstrUighur=on
+NumseqUighur=on
+MonthUighur=on
+NumsymbolUighur=on
+NumzeroUighur=on
+SpecialwordUighur=on
+SpecialnullUighur=on
+SpecialpointUighur=on
+CourtMeasure=on
+
+;#��Ҫ�������������
+[postproc\numproc\output_num_semantic]
+;����=on/off
+
+
+
+
+
+;#################################################################
+;#					�滻�������á�replaceproc��					#
+;#################################################################
+[postproc\replaceproc]
+;�滻�б�
+replace_list=pproc/resource_2_0_12/replace_list
+
+;�������滻����
+proc_tatata = false
+
+tatata_szBinResURL = pproc/resource_2_0_12/bin_model/BiLstm_tatata.bin
+
+
+
+;#################################################################
+;#						˳���������á�smoothproc��				#
+;#################################################################
+[postproc\smoothproc]
+;smooth blstm��������0-float��1-8bit��2-16bit��Ĭ��0
+smooth_calc_type = 1
+;smooth ��Դ����0-��26ά������1-����26ά������Ĭ��0
+smooth_res_type = 0
+;bilstm˳����Դ
+blstm_BinURL_smooth = pproc/resource_2_0_12/bin_model/BiLstm_smooth_model_20170919_smooth.bin
+blstm_BinURL_smooth_English = pproc/resource_2_0_12/bin_model/BiLstm_smooth_2layer_English_20171212_smooth.bin
+
+
+
+;#################################################################
+;#						��㴦�����á�puncproc��				#
+;#################################################################
+[postproc\puncproc]
+;punc��Դ����0-blstm��1-blstm_PD, 2-selfAttentionĬ��0
+punc_res_type = 1
+;punc mlp��������0-float��1-8bit��2-16bit��Ĭ��0��blstmֻ֧��0��1��selfAttenֻ֧��0��2
+punc_calc_type = 1
+
+blstm_BinURL_punc = pproc/BiLstm_shengxue_offline_20210324_slni2_fixpunc.bin
+###pproc/out_punc.bin
+
+;8άwordembeding����BiLstm_punc_2018_1_8����ʹ�ã���ʹ��ʱ�轫��ע��
+
+
+blstm_BinURL_punc_PD = pproc/BiLstm_shengxue_offline_20210324_slni2_fixpunc.bin
+##pproc/resource_2_0_12/bin_model/BiLstm_punc_tingjian_vad_tag_dunhao_20181115_punc.bin
+;new punc model
+selfAtten_BinURL_punc = pproc/resource_2_0_12/bin_model/SelfAttention_20181106_punc.bin
+
+;Ӣ��punc��Դ����0-blstm_eng��1-blstm_eng_PD, Ĭ��0
+punc_res_type_eng = 0
+blstm_BinURL_punc_English = pproc/resource_2_0_12/bin_model/BiLstm_punc_English.bin
+blstm_BinURL_punc_English_PD = 
+
+
+;�������ϵ����Ĭ��Ϊ0.0
+prior_rate = 0.2
+###0
+;Ӣ�ı������ϵ����Ĭ��Ϊ0.0
+prior_rate_en = 
+;��Ӣ������������ֵ
+prior_value = 0.85924767,0.062097898,0.068435067,0.000801522,0.009417843
+####0.86126,0.06213,0.0668,0.00066,0.0091
+prior_value_en = 0.87,0.069,0.055,0.0002,0.0055
+
+;Vad�������ޣ���Ԥ����Ϊ�������ʱ(maxΪ0)����������Ч��
+;�����������趨max��second_max֮��IJ�ֵ����ֵ���ڴ����޲Ų��ӱ�㣬�����second_max���
+;Ĭ��Ϊ0������������ʧЧ(Quark��վ�ͻ���ϵͳ����Ϊ0�����뷨ʱ��Ϊ0.9)
+vad_punc_threshold = 0
+###0.8
+
+;���о�����ޣ��������ƾ��г���ţ�
+;�����о�ŵ÷�-���ŵ÷֣����ڴ�����ʱ�żӾ�ţ�����Ӷ���
+;Ĭ��Ϊ0������������ʧЧ(Quark��վ�ͻ���ϵͳ����Ϊ0�����뷨ʱ��Ϊ7)
+period_punc_threshold = 0
+
+
+;��ӡ���Ԥ������Ĭ��Ϊ0
+# print_punc_result = 0
+
+
+;#################################################################
+;#						�ֶδ������á�paragraproc��					#
+;#################################################################
+[postproc\paragraproc]
+;onlinePara lstm��������0-float��1-8bit��Ĭ��0��(offlineʱ����Ч��ֻʹ��float����)
+para_calc_type = 1
+;paragraph blstm��Դ
+blstm_BinURL_para = pproc/resource_2_0_12/bin_model/BiLstm_para_online_segment_20180611_para.bin
+;��Ƶ���Ự���������Ƿ�ǿ�ƷֶΣ�Ĭ��Ϊfalse
+force_do_paragra = false
+
+;�ֶ���ֵ������÷ִ�����ֵ��ֶ�(Ĭ��0.4)
+threshold_do_paragra = 0.3   
+;ÿ�仰������ֵĬ��Ϊ30bytes(��������Ϊ28�����߲�ʹ�ô����ã�)
+threshold_senten_byteNum = 28
+;������ٴʺ�����ֶ�Ԥ�⣬��ҪС��400�ʣ�����1000���ţ����ߣ�
+;����ʱ����ʾ�ۼƶ��ٴ�ǿ�У�����60�ʽ���
+threshold_article_wordNum = 60
+
+;#################################################################
+;#						��������������á�colloqproc��					#
+;#################################################################
+[postproc\colloqproc]
+Colloq_BinURL=pproc/resource_2_0_12/bin_model/Colloq_v2.bin
+
+
+[logger]
+file                  =    ../log/postproc.log
+level                 =   
+output                =   
+flush                 = 
+maxsize               = 
+overwrite             = 

+ 977 - 0
config/aiui/esr/pproc/resource_2_0_12/num_not_change_list

@@ -0,0 +1,977 @@
+五月天
+七里香
+负一楼
+一块
+三心二意
+三下五除二
+横七竖八
+一心一意
+三三得九
+十一点零五
+的十年
+瀼东瀼西一万家
+阿里巴巴与四十大盗
+爱不代表一切
+爱到一千年
+爱的三次方
+爱你到一百岁
+爱你的真心一万年
+爱你无悔三千年
+爱你一万倍
+爱情高八度
+爱情十二元素
+爱情宣言九个太阳
+爱情一百年
+爱上杨二车娜姆
+爱一万次够不够
+八百里分麾下灸
+八百里分麾下炙
+八骏日行三万里
+八路军拉大栓
+八年的爱
+八年十二月
+八十仍逾四
+八十四吟
+八十西来钓渭滨
+八月长江万里晴
+八月桂花香
+八月寒苇花
+八月蝴蝶黄
+八月湖水平
+八月枚乘笔
+八月秋高风怒号
+八月十五日夜瑶台寺对月绝句
+八月十五日夜湓亭望月
+八月十五夜玩月
+八月十五夜月二首
+八月十五夜赠张功曹
+八月涛声吼地来
+八月西风起
+八月萧关道
+八月中秋月圆人不圆
+白割了五个脚趾头
+白空垂三千丈
+白银十万两
+百里九十半
+百年三万六千朝
+北京房租五年首降
+被偷走的那五年
+编织一千个梦
+便至四十西营田
+变身三分钟
+冰冷七月天
+冰融二月春
+才可容颜十五余
+才知容颜十五余
+参差十万人家
+蚕老麦黄三月天
+策勋十二转
+柴刀十八路
+场景十八主题歌器乐版
+场景十二诀别
+场景十六悲情
+场景十四片尾字幕
+场景十五忧伤
+唱给十年后的自己
+掣铃交通二千石
+陈奕迅的十年
+陈奕迅十年
+城中十万户
+酬王二十舍人雪中见寄
+愁入春风十四弦
+初为霓裳后六幺
+初一到十五
+吹来爱情九家之书插曲
+春风十日放灯时
+刺青三十一号
+从军十年余
+翠屏十二晚峰齐
+大道青楼十二重
+待到秋来九月八
+倒数三秒
+德云二队
+得到一千尺好印象
+登百丈峰二首
+等你一万年
+地狱迷宫三层
+第十八秦鸣曲
+第十一号匈牙利舞曲
+第五十卷
+第一百七十五卷
+第一千滴泪
+第一千个昼夜
+店小二传奇
+调笑二千石
+爹爹称回来二斤面
+爹爹育儿十六岁
+东四十条
+东屯稻畦一百顷
+东下齐城七十二
+斗酒十千
+斗酒十千恣欢谑
+豆蔻梢头二月初
+都门十二
+都门十二清晓
+尔来四万八千岁
+二年三度负东君
+二年随骠骑
+二年终日苦相思
+二人迷失世界
+二人转
+二十年前旧板桥
+二十年重过南楼
+二十三年弃置身
+二十三丝动紫皇
+二十四桥
+二十四桥明月
+二十四桥明月夜
+二十四桥南北
+二十四桥仍在
+二十岁的某一天
+二十五弦弹未尽
+二十五弦弹夜月
+二月初惊见草芽
+二月垂杨未挂丝
+二月风筝线儿断
+二月湖水清
+二月黄鹂飞上林
+二月江南花满枝
+二月青犹短
+二月树色好
+二重唱二十枚钱币
+放一百二十个心
+飞上九天歌一曲
+风疾舟中伏枕书怀三十六韵奉呈湖南亲友
+疯人院二楼
+奉使虚随八月查
+奉赠韦左丞丈二十二韵
+凤楼十二春寒浅
+凤楼十二神仙宅
+凤舞九天
+腹中贮书一万卷
+负尽狂名十五年
+感遇诗三十八首
+港町十三番地
+高蹄战马三千匹
+哥妹二人都快活
+隔离七日情
+给十年后的我
+给十五岁的自己
+更隔蓬山一万重
+更过十年也似他
+功德八尺水
+公鸡大战三只熊
+公元两千年
+够了三次方
+孤臣霜发三千丈
+孤独在十二月末
+孤高一千事变
+姑娘十八似花娇
+古风其十五
+古风其五十九
+古诗十九首青青河畔草
+故国三千里
+故乡七十五长亭
+故乡三千里
+怪人二十面相
+怪人十二面相
+关东人与二人转
+官家二月收新丝
+观世音菩萨十二大愿赞
+观世音菩萨十九字真言
+观音菩萨十二大愿
+广张三千六百钓
+癸卯岁十二月中作与从弟敬远
+癸未八月十四日至十六夜月色皆佳
+国步连营一千里
+海底两万里
+海底三万里
+海角七号
+寒暑运四时
+翰林风月三千首
+汉家此去三千里
+汉家离宫三十六
+汉家组练十万
+汉甲三十万
+好风二十四花期
+好学的三脚猫
+和戎诏下十五年
+何似东都正二月
+何知七十战
+河上花图八大山人
+鹤啸九天
+贺绿汀五首
+黑豹三个人
+黑街二人组
+恨你一万年
+恨天夺我一万年
+恨天借我一万年
+横七竖八
+呼吸八千人
+胡姬年十五
+胡天八月即飞雪
+胡笳十八拍
+蝴蝶第十万个心愿
+花底离愁三月雨
+花底离情三月雨
+花发江边二月晴
+花千骨泪三尺
+花拾三楼主人
+画出清明二月天
+画楼十二
+怀胎十月一朝生
+淮西有贼五十载
+皇家九号独家串烧
+会须一饮三百杯
+活了一百万次的猫
+火山六月应更热
+火山五月行人少
+或从十五北防河
+紞如三鼓
+吉祥天母七支供养
+即兴上海三人娘
+己酉岁九月九日
+寄李十二白二十韵
+寂寞三千粉黛
+继续给十五岁的自己
+家书十五行
+假如拥有三天光明
+捡到五分钟
+见春光三月里百花开遍
+剑网三天策
+将军得名三十载
+江布拉克七月的雨
+江城五月落梅花
+江湖夜雨十年灯
+江水三千里
+降龙十八掌
+捷克五场疯狂定义
+结婚十年
+借三年雨伞
+金发姑娘和三只熊
+金闺二月还
+金坷垃三人组
+金陵十二钗
+金秋十月
+金樽清酒斗十千
+金钗十二行
+今年八月十五夜
+今日俸钱过十万
+今夜二十岁
+锦瑟无端五十弦
+近作十日欢
+镜湖三百里
+九百九十九朵玫瑰
+九个太阳
+九日身心百梦杳
+九十光阴能有几
+九十其仪
+九岁国储新上计
+九万里风鹏正举
+九月寒砧催木叶
+九月降霜秋早寒
+九月九的酒
+九月九日登玄武山
+九月九日眺山川
+九月九日望乡台
+九月九日忆山东兄弟
+九月十日即事
+九月匈奴杀边将
+军容带甲三十万
+军书十二卷
+君不闻汉家山东二百州
+君行逾十年
+看我七十二变
+科多巴十四行诗
+可悲的十八岁
+客行无六月
+阑干十二独凭春
+老人七十仍沽酒
+离家三十五端阳
+离家已二年
+吏禄三百石
+练十次喜爱的舞步
+凉州七里十万家
+两句三年得
+两片三片四五片
+两千年等一次
+两组四人舞
+辽东小妇年十五
+凌晨三点三分
+榴花五月眼边明
+留待行人二月归
+流水十年间
+流言有一千分贝
+柳州二月榕叶落尽偶题
+六片七片八九片
+六十年来兵蔟蔟
+六十余年慕古人
+六月的雨
+六月二十七日望湖楼醉书
+六月二十七日望湖楼醉书五首其一
+六月伏天
+六月禾未秀
+六月栖栖
+六幺令
+六幺水调家家唱
+龙池十日飞霹雳
+龙钟还忝二千石
+旅饭二年无此味
+轮台九月风夜吼
+论交却忆十年时
+洛城一别四千里
+洛阳三月花如锦
+马上离愁三万里
+马诗二十三首
+麦陇青青三月时
+满眼不堪三月暮
+每颗星代表一份爱
+每秒想你一百遍
+每岁烟花一万重
+每一个星期有六天
+美酒斗十千
+美酒三百杯
+美丽的七台河
+美丽的五台山
+美夕八千夜
+妹妹就像三月花正红
+梦断香销四十年
+梦三年又三年
+明堂观礼杂咏十三首
+命三人远飞高离
+魔鬼的三根金发
+魔境十日
+母子三人逃性命
+目断四天垂
+那六年的歌
+南朝四百八十寺
+南园十三首
+能开二月花
+拟把疏狂图一醉
+拟古六首上鲜于大夫子骏其六生年不满百
+年年十月来宣州
+宁知四十九年非
+农场的三只小猪
+农贸市场十四行
+女大十八变
+女大十九变
+陪君醉笑三千场
+陪你醉一千年
+朋友二号
+朋友十年
+鹏抟九万
+飘摇一千年
+平安的七月夜
+平原十日饮
+瀑布银河落九天
+七百万个小宇宙
+七次机会
+七国三边未到忧
+七号公园
+七秒钟的记忆
+七秒钟的记忆缺男
+七千州县尽关门
+七十古来希
+七十人稀到
+七月七日长生殿
+七月七日河边渡
+七月七日晴
+七月坐凉宵
+其十二水如一匹练
+其十六秋浦田舍翁
+其十七祧波一步地
+其十三渌水净素月
+其十四炉火照天地
+其十五白发三千丈
+骑驴三十载
+千年田换八百主
+欠我十块
+且论三万六千是
+秦塞重关一百二
+清平乐六盘山
+清平三百载
+擎天柱变三轮
+晴天的十三棵枫树
+情爱一万年
+情定九份山
+情歌五月天
+请客一万次马上接通
+秋符十年
+秋浦歌十七首
+驱马一万匹
+去国十年老尽少年心
+去国十年同赴召
+泉水中的三枚硬币
+让你幸福多十倍
+绕梁三日
+人到四十
+人活二十多
+人间三度见河清
+人间四月天
+人生二胜一败
+人生七十稀
+日落前七分钟
+日语五十音图
+如果十八岁遇见你
+如果我能活到八十岁
+如今才是十三夜
+如三月兮
+闰八月初吉
+塞门三月犹萧索
+三百尺
+三百内人连袖舞
+三百年来庾楼上
+三杯两盏淡酒
+三城记
+三春三月忆三巴
+三寸天堂
+三度和弦
+三分钟恋爱热度
+三分钟情歌
+三个和尚
+三个人的错
+三个人的时光
+三个人的天地
+三个人一个约会
+三个傻瓜
+三年奔走空皮骨
+三年遇寒食
+三年枕上吴中路
+三年枕上吴中路
+三年谪宦此栖迟
+三年羁旅客
+三千宠爱在一身
+三千里地山河
+三千年的留言
+三千年后知谁在
+三千双蛾献歌笑
+三枪拍案惊奇
+三三得九
+三三五五棘篱门
+三三五五映垂杨
+三十从军今白发
+三十二蹄无歇时
+三十九年非
+三十六帝欲相迎
+三十六峰犹不见
+三十六宫愁重
+三十六离宫
+三十六曲水回萦
+三十六陂人未到
+三十未有二十余
+三十犹未遇
+三台令
+三天三夜
+三万里河东入海
+三万六千顷
+三万六千顷
+三下五除二
+三心二意
+三月残花落更开
+三月的摇滚
+三月俄三十
+三月二十七日夜醉中作
+三月晦日偶题
+三月晦日赠刘评事
+三月七日沙湖道中遇雨
+三月如风
+三月三日天气新
+三月桃花随水转
+三月无雨旱风起
+三月咸阳城
+三月香巢初垒成
+三月香巢已垒成
+三月正当三十日
+三只小熊
+三字经二十转
+三暝三日写三字
+杀气三时作阵云
+山歌唱到一百岁
+山林二十年
+山路十八弯
+伤了三个心
+伤心阔别三千里
+伤心一百天
+伤心一万倍
+伤心一万次
+上帝一百分
+上下五千年
+深宫二十年
+深宵三点
+深夜前的五分钟
+神坑二人组
+省略一万句
+胜读十年书
+失恋七天日记
+失恋三百天
+失恋三十三天
+失恋十四行
+十八岁的天空
+十八岁那年
+十八相送
+十二道锋味
+十二公民
+十二街如种菜畦
+十二楼中尽晓妆
+十二楼中月自明
+十二门前融冷光
+十二生肖
+十二星座
+十二绣帘空卷
+十二学弹筝
+十二月十九日夜中发鄂渚晓泊汉阳亲旧携酒追
+十二月十五夜
+十户中人赋
+十家租税九家毕
+十六君远行
+十六诵诗书
+十轮霜影转庭梧
+十秒的冲动
+十年不调几邅回
+十年不断
+十年断魂潮尾
+十年旧梦无寻处
+十年浪走宁非痴
+十年离乱后
+十年履霜露
+十年梦
+十年歧路
+十年驱驰海色寒
+十年戎马心孤单
+十年为客负黄花
+十年为客在他州
+十年无梦得还家
+十年一觉扬州梦
+十年一刻
+十年遗恨泣英雄
+十年曾一别
+十年征戍忆辽阳
+十年之约
+十年憔悴到秦京
+十年骐驎地上行
+十年蹴踘将雏远
+十千沽酒
+十千美酒渭城歌
+十千一斗
+十日春寒不出门
+十日九风雨
+十日雨丝风片里
+十三能织素
+十三身袭富平候
+十三学得琵琶成
+十生九死到官所
+十室八九贫
+十四藏六亲
+十四万人齐解甲
+十四万众犹虎貔
+十四为君妇
+十四学裁衣
+十岁去踏青
+十万个冷笑话
+十万工农下吉安
+十万嬉皮
+十五弹箜篌
+十五的月亮
+十五即乘潮
+十五年前一转头
+十五泣春风
+十五始展眉
+十五夜观灯
+十五夜望月
+十一月四日风雨大作
+十一月中旬至扶风界见梅花
+十月二十八日夜风雨大作
+十月江南天气好
+十月围城
+十月阴气盛
+十月之交
+时光飘过三十年
+时光十年
+时间一分一秒度过
+收取关山五十州
+手机短信铃音十岁
+手植青松三万栽
+孰知四时间
+霜皮溜雨四十围
+霜叶红于二月花
+双皮溜雨四十围
+谁倚东风十二阑
+睡前十分钟故事
+瞬间白眼一千遍
+四百万人同一哭
+四次元爱情
+四海十年兵不解
+四海十年兵不解
+四面楚歌
+四年思念
+四十年来家国
+四十三年如电抹
+四时佳兴与人同
+四时令
+四时田园杂兴
+四时田园杂兴春日
+四万义军同日死
+四月带花移芍药
+四月的葬礼
+四月南风大麦黄
+四月清和雨乍晴
+四月熟黄梅
+四月望雨
+四月物语
+四月枇杷未黄
+松下茅亭五月凉
+送兵五千人
+送给十八岁的自己
+送你九百九十九朵花
+送你月饼三百斤
+苏三起解
+苏秀道中自七月二十五日夜大雨三日秋苗以苏
+速秒五厘米
+宿灵鹫禅寺二首
+隋堤三月水溶溶
+孙小宝二人转
+他永远十八岁
+叹我虚度二十载
+唐朝国公十八家
+唐诗三百首
+桃花三十六陂
+逃不出的八楼
+腾骧磊落三万匹
+题西湖十八拍
+天门四十里
+天上七颗星
+天台四万八千丈
+天下黄河九十九道弯
+天下黄河十八弯
+天之痕三个人的时光
+天字十三杀
+天竺寺八月十五日夜桂子
+甜蜜十六岁
+铁人三项
+听十年前的歌
+听一千遍后
+停止七秒钟
+同光十三绝
+同心高举十架
+同行十二年
+同学十年
+同桌的你之此处略去一万字
+童话二分之一
+痛一万次都不够
+偷偷说一万遍我爱你
+突然五秒
+外滩十八号
+晚安十九岁
+万古人间四月天
+万死投荒十二年
+万重云水四边齐
+王充道送水仙花五十支
+王妃出宫一月整
+网恋七伤拳
+网恋一百天
+望断翠峰十二
+望星空十五的月亮
+望驿台三月三十日
+忘川河畔的五百年思恋
+为六度音程而作
+为你出生入死九十九次
+为三度音程而作
+为什么一万个理由确没有爱我的理由
+为四度音程而作
+为五只手指而作
+为着十万块
+维此六月
+未够爱别人十秒好
+魏淑芬之十个男人九个色
+问刘十九
+我爱这十月午后的阳光
+我当二十不得意
+我的宝贝四千金
+我的父在三尺剑下把命断
+我的故乡四天大桥
+我的青春高八度
+我独七十而孤栖
+我就想每天只工作四个小时
+我们的十年
+我们今年二十二岁
+我们离三十岁还有几天
+我们是八路军
+我们相距十万光年
+我上任也不过三月整
+我设计一万种方式遇见你
+我说爱你一万遍
+我套上飞快的三套车
+我要一天七次向主赞美
+我有过十二种颜色
+我曾有三次想到过死
+我正十八岁
+我只能爱你到一百岁
+我祝祖国三杯酒
+握长弓十五年
+无敌三脚猫
+无奈十五的月亮
+吾过十四年
+吾日三省吾身
+走去走来三百里
+五百次哭泣
+五楼阳台的男人
+五千仞岳上摩天
+五千貂锦丧胡尘
+五十六个民族五十六朵花
+五十年间似反掌
+五十弦翻塞外声
+五台山困住了杨老将
+五月不可触
+五月的鲜花
+五月的雪
+五月槐花香
+五月激情时光
+五月江吴麦秀寒
+五月榴花妖艳烘
+五月南风兴
+五月人倍忙
+五月十九日大雨
+五月石榴如火
+五月水边柳
+五月天山雪
+五月五日天晴明
+五月五日午
+五月西施采
+五月相呼度太行
+五月渔郎相忆否
+午夜零点零三分
+午夜零点零一分
+午夜零点一刻
+午夜前的十分钟
+午夜三点半
+昔年八月十五夜
+牺牲四毛钱
+稀罕一万年
+夕贬朝阳路八千
+夕贬潮州路八千
+习惯的十一年
+习惯二个人的生活
+戏十八相送
+下沉一万米
+下个五百次回眸
+下雨的四月天
+先帝侍女八千人
+先割十斤肥猪肉
+献赋十年犹未遇
+乡村四月
+想你一百回
+想你一千遍
+想你一千年
+想你一千趟
+想你一千夜
+想你一万遍
+想你一万次
+想你一万分钟
+想像十个你
+像十年挚友
+像我十五岁的生活
+像一百万个秋千
+向天再借五百年
+小八路
+小齐与三个好朋友
+小堂绮帐三千户
+小丸子贺岁之十个祝福
+小王子与死一千万次的猫
+小小扁担三寸宽
+小小墩布二尺长
+笑我一万次
+写在四月的歌
+新丰美酒斗十千
+新年送你五颗星
+新选的僮儿十八春
+新一千年一万年
+新一万个理由
+信给十五岁的自己
+行年将七十
+许愿一千遍
+宣传二人套餐
+旋风九日
+玄关十六夜
+学甲十三鹰
+寻梦八百里瀚海
+寻找一千遍
+压寨宝宝之三个俯卧撑
+牙尖十八怪
+焉知二十载
+烟霞五百名仙
+严霜五月凋桂枝
+艳阳三月天
+燕云十六州
+雁柱十三弦
+杨幺传
+阳春三月
+阳春三月下扬州
+阳关三月
+腰缠十万
+夜半十二时
+夜的十三章
+夜晚九点的阳光
+一百个放心
+一百万种亲吻
+一春略无十日晴
+一个师傅三个徒弟
+一加一等于我爱你
+一加一我和你
+一九七三年的弹子球
+一梦觉来三十载
+一年三百六十日
+一片一片又一片
+一千遍我爱你
+一千个分手的理由
+一千个轮回
+一千零一个愿望
+一千年后记得我
+一千年以后
+一千万个拥抱
+一千余里地无山
+一去三十年
+一身去国六千里
+一身转战三千里
+一生一世一年一月一夜
+一万次悲伤
+一万个对不起
+一万个理由
+一万个女人
+一万个舍不得
+一万光年
+一位十五风的少女
+一卧东山三十春
+一心一意
+一亿个伤心
+一又二分之一的夏天
+一鬟五百万
+遗忘只需七年
+倚遍危楼十二阑
+已忍伶俜十年事
+因着十架爱
+银河落九天
+银河宛转三千曲
+银河宛转三千曲
+英台二十四送歌
+英雄小八路
+拥抱十秒钟
+拥抱一万年
+永王东巡歌十一首
+永远的十八岁
+永远十八岁
+用一千双手臂拥抱你
+用一千只手臂拥抱你
+用一亿种方法逃避
+悠悠三千里
+由十七岁开始
+游龙门分题十五首自菩提步月归广化寺
+有劳二位夫人言
+友情一万年
+又是一年七月七
+又于乐工故书中得商调霓裳曲十八阕
+又作东风十日寒
+于长安归还扬州九月九日行薇
+于今三年
+于三十里
+五月榴花妖艳烘
+雨横风狂三月暮
+与她的三次邂逅
+与夏十二登岳阳楼
+郁郁苍苍三十里
+遇见三颗痣
+欲望六人行
+欲望像山背负一路感伤
+缘分五月
+远戍十年临的博
+愿意将一千场恋爱
+怨气冲天三千丈
+云图六重奏
+云物共倾三月酒
+再等一百年
+再等一万年
+再会吧十七岁
+再活五百年
+再听十一月的雨
+在乎你一分一秒
+在另个四月他日
+在我八十七岁后
+在我们三个人中
+在一个十月非常棒的日子里
+早春呈水部张十八员外
+早春呈水部张十八员外(初春小雨)
+早春二月
+早秋赠裴十七仲堪
+曾驱十万师
+赠庆幺幺
+斩断凡尘三千痛
+战车一点也不辣
+战火四千金
+站台的九月底
+这浪漫一千年有几回
+这一年我们二十多
+浙江八月何如此
+贞元十四年旱甚见权门移芍药花
+正是莺花二月
+正月二十日往岐亭郡人潘古郭三人
+知了只叫三天
+直角等于三角形
+直上三十里
+只想说三个字
+只需三秒大电齐发
+只须给我十岁
+志怪三十题
+至尊三人组
+致不懂二次元的你
+致敬三十年
+致杨二车娜姆
+中花六版
+中华上下五千年
+中间隔着那十年
+中了五百万
+中脉七轮
+忠孝东路走九遍
+重返十七岁
+重来又是三年
+周董的十二新作
+珠湖四十八汊歌
+珠履三千鵷鹭客
+铸墓十二字
+壮士十年归
+壮乡三月歌悠悠
+追加七片柠檬
+追你一万年
+缀玉联珠六十年
+滋味一千一百种
+子夜四时歌春歌
+子夜四时歌秋歌
+自京赴奉先县咏怀五百字
+自怜十五馀
+走去走来三百里
+足有五十趟
+醉了三月的山
+醉我一千年
+醉笑陪公三万场
+醉笑陪君三万场
+醉中有索四时歌者
+最爱西湖三月天
+最多也不过十秒间
+最后的六月盛夏
+最后说一次这三个字
+昨天我十五岁
+廿九岁的遗书
+抟摇直上九万里
+岐山下二首
+汴京纪事二十首其二十
+溧阳酒楼三月春
+娉娉袅袅十三余
+娉娉袅袅十三馀
+敕赐金钱二百万
+飒爽英姿五尺枪
+禅行三十七品经
+恁二人莫动手
+碛里征人三十万
+锵锵四人行
+瞿塘五月谁敢过
+黛色参天二千尺
+勾勾搭搭
+直勾勾

BIN
config/aiui/esr/pproc/resource_2_0_12/number.gram


BIN
config/aiui/esr/pproc/resource_2_0_12/number_math_normal.dic


BIN
config/aiui/esr/pproc/resource_2_0_12/number_math_normal.gram


+ 352 - 0
config/aiui/esr/pproc/resource_2_0_12/replace_list

@@ -0,0 +1,352 @@
+acm:ACM
+aa制:AA制
+address:address
+adobe:Adobe
+adsl:ADSL
+ahtv:AHTV
+ak-47:AK-47
+amd:AMD
+android:Android
+apologize:apologize
+atm:ATM
+avp:AVP
+atv:ATV
+a股:A股
+abc:ABC
+aids:AIDS
+bbc:BBC
+b股:B股
+bbs:BBS
+bbs.:bbs.
+bs:BS
+bt:BT
+btv:BTV
+b2c:B2C
+b2b:B2B
+b超:B超
+c30:C30
+c4:C4
+c5:C5
+cba:CBA
+cba.:cba.
+nba.:nba.
+nba:NBA
+cctv:CCTV
+cd:CD
+cdk:CDK
+cdma:CDMA
+ceo:CEO
+cfo:CFO
+cio:CIO
+cdo:CDO
+cco:CCO
+cbo:CBO
+clo:CLO
+cmo:CMO
+cno:CNO
+cqo:CQO
+cro:CRO
+cuo:CUO
+cisco:CISCO
+cnn:CNN
+cpu:CPU
+cto:CTO
+ctrl:CTRL
+cpa:CPA
+c罗:C罗
+cet:CET
+cmos:CMOS
+cpu:CPU
+d90:D90
+dc:DC
+dna:DNA
+ddr:DDR
+doctor:doctor
+dv:DV
+dvd:DVD
+dvr:DVR
+eos:EOS
+eq:EQ
+erp:ERP
+ems:EMS
+f4:F4
+factory:factory
+fm:FM
+gcd:GCD
+gprs:GPRS
+gsm:GSM
+gto:GTO
+gB:GB
+gb:GB
+Gb:GB
+gm:GM
+husband:husband
+hz:Hz
+ibm:IBM
+ic卡:IC卡
+ie8:IE8
+ipad:iPad
+iphone3g:iPhone3G
+iphone3gs:iPhone3GS
+iphone:iPhone
+七一百二十八G:7 128G
+iPhone7:iPhone 7
+七六十四G:7 64G
+四三十二G:4 32G
+ipod:iPod
+ipodtouch:iPodTouch
+iptv:IPTV
+iq:IQ
+ktv:KTV
+k歌:K歌
+lcd:LCD
+maggieq:maggieQ
+mba:MBA
+mbox:MBox
+mtv:MTV
+mb:MB
+msn:MSN
+m601:M601
+m701:M701
+nba:NBA
+nokia:NOKIA
+oem:OEM
+o2o:O2O
+opec:OPEC
+p2p:P2P
+pda:PDA
+polo:POLO
+pci:PCI
+powerpoint:powerpoint
+ps2:PS2
+pvc:PVC
+p二p:P2P
+q5:Q5
+q7:Q7
+qq:QQ
+qq.:qq.
+q币:Q币
+qq币:QQ币
+qq群:QQ群
+q群:Q群
+rmb:RMB
+rom:ROM
+ram:RAM
+rna:RNA
+rgb:RGB
+sb:SB
+sd卡:SD卡
+sars:SARS
+tcl:TCL
+tnt:TNT
+tokyo:Tokyo
+ttf:TTF
+tts:TTS
+tvb:TVB
+tvs:TVS
+toefl:TOEFL
+t恤:T恤
+ucweb:UCWeb
+ufo:UFO
+usb:USB
+u盘:U盘
+vcd:VCD
+verycd:verycd
+victor:victor
+victory:victory
+vp:VP
+vip:VIP
+vip.:vip.
+wcdma:WCDMA
+wdc:WDC
+wnba:WNBA
+wto:WTO
+xbox:XBox
+apec:APEC
+iso:ISO
+gps:GPS
+dos:DOS
+coo:COO
+mpa:MPA
+cbd:CBD
+gdp:GDP
+gnp:GNP
+cpi:CPI
+gre:GRE
+pets:PETS
+gmat:GMAT
+tofel:TOFEL
+modem:MODEM
+email:EMAIL
+icu:ICU
+gba:GBA
+psp:PSP
+mac:MAC
+pad:PAD
+fifa:FIFA
+voa:VOA
+pc:PC
+diy:DIY
+4g:4G
+他妈的:tmd
+他妈滴:tmd
+他妈地:tmd
+他妈得:tmd
+她妈的:tmd
+她妈滴:tmd
+她妈地:tmd
+她妈得:tmd
+他奶奶的:tnnd
+他奶奶得:tnnd
+他奶奶德:tnnd
+他奶奶滴:tnnd
+她奶奶的:tnnd
+她奶奶得:tnnd
+她奶奶德:tnnd
+她奶奶滴:tnnd
+奶奶得:nnd
+奶奶德:nnd
+奶奶滴:nnd
+他娘的:tnd
+他娘得:tnd
+他娘德:tnd
+他娘滴:tnd
+她娘的:tnd
+她娘得:tnd
+她娘德:tnd
+她娘滴:tnd
+妈了个逼:mlgb
+fuck:***
+.:.
+讯飞口水:讯飞口讯
+讯飞口去:讯飞口讯
+讯飞口县:讯飞口讯
+讯飞口气:讯飞口讯
+讯飞口信:讯飞口讯
+现在口信:讯飞口讯
+讯飞考训:讯飞口讯
+讯飞可去:讯飞口讯
+现在口讯:讯飞口讯
+现在口水:讯飞口讯
+讯飞口述:讯飞口讯
+现在口县:讯飞口讯
+讯菲:讯飞
+俊飞:讯飞
+性飞:讯飞
+狗训:口讯
+狗性:口讯
+狗讯:口讯
+口逊:口讯
+口性:口讯
+三g:3G
+3g:3G
+三G:3G
+真他妈:真tm
+真他妈的:真tmd
+讯飞输入法讯飞输入法讯飞输入法:很好
+讯飞口讯讯飞口讯讯飞口讯:很好
+讯飞语音云讯飞语音云讯飞语音云:很好
+讯飞语音云语音验证码:很好
+讯飞语音云语音验证系统:很好
+讯飞语音输入法后门词汇:好的谢谢
+科大讯飞后门词汇:必须的
+魅蓝手机三:魅蓝手机3
+米优唉:MIUI
+米优:MIUI
+米有外:MIUI
+米有歪:MIUI
+米以外:MIUI
+你y:MIUI
+你哟:MIUI
+10八大:十八大
+108大:十八大
+十8大:十八大
+18大:十八大
+18蛋:十八大
+18道:十八大
+赤八大:十八大
+四G:4G
+四g:4G
+1280乘720:1280x720
+1280乘以720:1280x720
+ghz:GHz
+apq8064:APQ8064
+a p q8064:APQ8064
+4核:四核
+小米手机二:小米手机2
+小米note二:小米note2
+红米note四:红米note4
+十六g:16G
+六十四g: 64G
+note26:note2 16
+note46:note4 16
+两百百:200百
+八百万像素:800万像素
+F/2.0:F/2.0
+讯飞语音加:讯飞语音+
+五八同城:58同城
+好123:hao123
+二逼猴:耳鼻喉
+3w点:www.
+3W点:www.
+3w:www
+3W.:www.
+www点:www.
+1万点com:10000.com
+3p:3P
+点com:.com
+好一二三:hao123
+谷歌.com:google.com
+谷歌点com:谷歌.com
+迅雷.:xunlei.
+迅雷点:xunlei.
+赶集.com:ganji.com
+赶集点com:ganji.com
+百度.com:baidu.com
+百度点com:baidu.com
+搜狐.com:souhu.com
+搜狐点com:souhu.com
+八零后:80后
+九零后:90后
+七零后:70后
+零零后:00后
+128事变:一二八事变
+沙尔克零四:沙尔克04
+多啦a梦:多啦A梦
+敢死队三:敢死队3
+小时代三:小时代3
+木乃伊三:木乃伊3
+拨打1万号:拨打10000
+双十一:双11
+酷六:酷6
+非诚勿扰二:非诚勿扰2
+速八酒店:速8酒店
+pro五:pro5
+pro六:pro6
+魅族pro五:魅族pro5
+魅族pro六:魅族pro6
+p p t v:pptv
+ppt v:pptv
+p ptv:pptv
+p pt v:pptv
+p p tv:pptv
+魅族e:魅族E
+smartisan os:Smartisan OS
+锤子系统:Smartisan OS
+smartisan:Smartisan
+smartisan t1:Smartisan T1
+smartisan t2:Smartisan T2
+smartisan m1:Smartisan M1
+smartisan m1l:Smartisan M1L
+锤子t1:锤子T1
+锤子t2:锤子T2
+锤子m1:锤子M1
+锤子m1l:锤子M1L
+big bang:Big Bang
+ios:IOS
+fbi:FBI
+kfc:KFC
+张3:张三
+AI:A.I.
+后处理版本号:2.1.2017.1115
+讯飞语音识别版本号:3.1.0

BIN
config/aiui/esr/word.bin


BIN
config/aiui/x64/aiui.dll


BIN
config/aiui/x64/aiui.lib


BIN
config/aiui/x64/vtn_mic1.dll


BIN
config/aiui/x86/aiui.dll


BIN
config/aiui/x86/aiui.lib


BIN
config/aiui/x86/vtn_mic1.dll


+ 128 - 0
config/config.yaml

@@ -0,0 +1,128 @@
+# 机器人AI语音识别配置文件
+
+# 服务配置
+machine-name: "robot_ai"
+version: "1.0.0"
+machine-id: "d21e4997e4f78314ca991ffa8734c366"
+machinename: "小勇"
+# 摄像头位置
+server:
+#  ip: "192.168.123.21"
+  ip: "192.168.123.38"
+  port: 19199
+
+# 摄像头地址
+camera_url: "http://127.0.0.1:34550/camera_base64"
+# 语音识别服务选择配置
+speech_recognition:
+  # 选择使用的语音识别服务: "xunfei_aiui", "baidu_realtime" 或 "xunfei_linux"
+  service: "xunfei_linux"
+
+#params_data:
+#  app_id: "121000000147"
+#  process_code: "queryListAPIVcloud"
+#  dataId: "2100000009"
+#  key: "G7H3J9K2L5M8N4P1Q6R3S2T9U6V7W8X5"
+#  userId: "jyyd"
+#  full_identifier: "newland@523652"
+#  params:
+#    regId: "r1957649584173424640"
+## 基站客流数据
+#base_stations_url: "http://robot.yun36.com:8066/base_stations"
+ssh_host: "robot.yun36.com"
+ssh_port: 2066
+ssh_user: "root"
+ssh_pass: "7P8xxEEw9vGx0Yf%"
+
+db_host: "127.0.0.1"
+db_port: 33066
+db_user: "root"
+db_pass: "qweICT!@#444"
+db_name: "passenger_flow"
+table: "flow_data"
+
+# 百炼知识库
+baolian_api_key: "sk-1dcf9fc95ee54c3bbb2a3fdc9ebaa49f"
+baolian_model: "qwen-vl-max-latest"
+baolian_nlp_model: "qwen-plus-latest"
+
+# 网络配置
+network:
+  connection_timeout: 5  # 连接超时时间(秒)
+  receive_timeout: 3     # 接收超时时间(秒)
+  ping_interval: 10      # 心跳检测间隔(秒)
+  reconnect_delay: 5     # 重连延迟时间(秒)
+
+# 日志配置
+logging:
+  level: "INFO"
+  file: "log/robot_ai.log"
+  format: "%(asctime)s.%(msecs)03d %(levelname)s:%(message)s"
+  date_format: "%Y-%m-%d %H:%M:%S"
+
+
+  
+  # 百度端到端实时语音识别配置
+  baidu_realtime:
+    client_id: "erJ8dFilykzXBLf09Sm0u0YQ"
+    client_secret: "EP4TqMs3eRriCFnJ3Dbu9YtHAq1FtP5H"
+    api_key: ""  # iam API_KEY或TOKEN二选一
+    streaming_url: "wss://aip.baidubce.com/ws/2.0/speech/v1/realtime"
+    model_name: "audio-realtime"
+    # 音频参数配置
+    audio:
+      sample_rate: 16000
+      channels: 1
+      format: "int16"
+      chunk_size: 1024
+    # 输出音频配置
+    output_audio:
+      sample_rate: 24000
+      channels: 1
+      format: "int16"
+
+# TTS配置
+tts:
+  voice: "x5_lingfeiyi_flow"
+  speed: 50
+  pitch: 50
+  volume: 50
+
+# 科大讯飞TTS配置
+xunfei:
+  app_id: "f016fce3"
+  api_key: "fcb58dc79de9b0568d2287bd8184e291"
+  api_secret: "YTFiN2NkOGVjNTVjY2QyMTlmMTViOTBh"
+  streaming_tts_url: "wss://aiui.xf-yun.com/v3/aiint/sos"
+  scene: "test"
+  category:
+    - "OS20015785810.ACTIONS"
+    - "OS20015785810.ACTIONS_${area}"
+    - "OS20015785810.control_aiot"
+    - "OS20015785810.VoiceNavigation"
+    - "OS20015785810.CMD_ACTIONS"
+
+# 开发环境配置
+development:
+  debug: false
+  log_to_console: true
+  log_to_file: true
+
+# 生产环境配置
+production:
+  debug: false
+  log_to_console: false
+  log_to_file: true
+
+pc2:
+  ip: "192.168.123.164"
+  port: 8839
+  base_url: "http://192.168.123.164:8839"
+  endpoints:
+      robot_action: "/dbqa_app/pub_webrtc"
+      robot_waypoint: "/dbqa_app/pub_waypoint"
+      robot_cmd_action: "/dbqa_app/pub_remote_cmd"
+      qa_callback: "/rest_api/qa_callback"
+      led: "/rest_api/led"
+
+music_url: "http://192.168.0.10:34505/tts_audio_control_http"

+ 102 - 0
config/config/action_config.py

@@ -0,0 +1,102 @@
+'''
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-24 13:52:33
+LastEditTime: 2025-08-24 14:25:50
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: \robot_ai\config\config\action_config.py
+Description: 头部注释配置模板
+'''
+"""
+动作配置管理模块
+统一加载和管理 action.yaml 配置文件
+"""
+import yaml
+import os
+from pathlib import Path
+from utils.logger import logger
+from typing import Dict, Any
+
+
+class ActionConfig:
+    """动作配置管理类"""
+
+    _instance = None
+    _config = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(ActionConfig, cls).__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        if self._config is None:
+            self._load_config()
+
+    def _load_config(self):
+        """加载配置文件"""
+        try:
+            # 获取配置文件路径
+            config_path = Path(__file__).parent.parent / "action.yaml"
+
+            if not config_path.exists():
+                logger.error(f"配置文件不存在: {config_path}")
+                self._config = {}
+                return
+
+            with open(config_path, 'r', encoding='utf-8') as f:
+                self._config = yaml.safe_load(f)
+
+            logger.info("动作配置文件加载成功")
+
+        except Exception as e:
+            logger.error(f"加载动作配置文件失败: {e}")
+            self._config = {}
+
+    def get_actions(self) -> Dict[str, str]:
+        """获取动作常量"""
+        return self._config.get('actions', {})
+
+    def get_action_dict(self) -> Dict[str, int]:
+        """获取动作指令字典"""
+        return self._config.get('action_dict', {})
+
+    def get_point_dict(self) -> Dict[str, str]:
+        """获取点位字典"""
+        return self._config.get('point_dict', {})
+
+    def get_action_value(self, action_name: str) -> int:
+        """获取动作对应的数值"""
+        action_dict = self.get_action_dict()
+        return action_dict.get(action_name)
+
+    def get_point_value(self, point_name: str) -> str:
+        """获取点位对应的值"""
+        point_dict = self.get_point_dict()
+        return point_dict.get(point_name)
+
+    def reload_config(self):
+        """重新加载配置文件"""
+        self._config = None
+        self._load_config()
+        logger.info("动作配置文件重新加载完成")
+
+
+# 全局配置实例
+action_config = ActionConfig()
+
+# 为了保持向后兼容,提供直接的访问方式
+
+
+def get_action_dict() -> Dict[str, int]:
+    """获取动作指令字典(向后兼容)"""
+    return action_config.get_action_dict()
+
+
+def get_point_dict() -> Dict[str, str]:
+    """获取点位字典(向后兼容)"""
+    return action_config.get_point_dict()
+
+
+def get_actions() -> Dict[str, str]:
+    """获取动作常量(向后兼容)"""
+    return action_config.get_actions()

+ 147 - 0
config/config/aiot_config.py

@@ -0,0 +1,147 @@
+"""
+AIOT 配置管理模块
+统一加载和管理 aiot.yaml 配置文件
+"""
+import yaml
+import os
+from pathlib import Path
+from utils.logger import logger
+from typing import Dict, Any
+
+
+class AiotConfig:
+    """AIOT 配置管理类"""
+
+    _instance = None
+    _config = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(AiotConfig, cls).__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        if self._config is None:
+            self._load_config()
+
+    def _load_config(self):
+        """加载配置文件"""
+        try:
+            # 获取配置文件路径
+            config_path = Path(__file__).parent.parent / "aiot.yaml"
+
+            if not config_path.exists():
+                logger.error(f"AIOT配置文件不存在: {config_path}")
+                self._config = {}
+                return
+
+            with open(config_path, 'r', encoding='utf-8') as f:
+                self._config = yaml.safe_load(f)
+
+            logger.info("AIOT配置文件加载成功")
+
+        except Exception as e:
+            logger.error(f"加载AIOT配置文件失败: {e}")
+            self._config = {}
+
+    def get_machine_id(self) -> str:
+        """获取机器ID"""
+        return self._config.get('machine', {}).get('machine-id', '')
+
+    def get_machine_name(self) -> str:
+        """获取机器名称"""
+        return self._config.get('machine', {}).get('machine-name', '')
+
+    def get_aiot_platform_config(self) -> Dict[str, Any]:
+        """获取AIOT平台配置"""
+        return self._config.get('aiot_platform', {})
+
+    def get_aiot_host(self) -> str:
+        """获取AIOT平台地址"""
+        return self._config.get('aiot_platform', {}).get('host', '')
+
+    def get_aiot_app_id(self) -> str:
+        """获取AIOT应用ID"""
+        return self._config.get('aiot_platform', {}).get('app_id', '')
+
+    def get_aiot_app_secret(self) -> str:
+        """获取AIOT应用密钥"""
+        return self._config.get('aiot_platform', {}).get('app_secret', '')
+
+    def get_aiot_union_id(self) -> str:
+        """获取AIOT联合ID"""
+        return self._config.get('aiot_platform', {}).get('union_id', '')
+
+    def get_scenes(self) -> Dict[str, str]:
+        """获取场景配置"""
+        return self._config.get('scenes', {})
+
+    def get_devices(self) -> Dict[str, Dict[str, Any]]:
+        """获取设备配置"""
+        return self._config.get('devices', {})
+
+    def get_device_config(self, device_name: str) -> Dict[str, Any]:
+        """获取指定设备的配置"""
+        return self._config.get('devices', {}).get(device_name, {})
+
+    def reload_config(self):
+        """重新加载配置文件"""
+        self._config = None
+        self._load_config()
+        logger.info("AIOT配置文件重新加载完成")
+
+
+# 全局配置实例
+aiot_config = AiotConfig()
+
+# 为了保持向后兼容,提供直接的访问方式
+
+
+def get_machine_id() -> str:
+    """获取机器ID(向后兼容)"""
+    return aiot_config.get_machine_id()
+
+
+def get_machine_name() -> str:
+    """获取机器名称(向后兼容)"""
+    return aiot_config.get_machine_name()
+
+
+def get_aiot_platform_config() -> Dict[str, Any]:
+    """获取AIOT平台配置(向后兼容)"""
+    return aiot_config.get_aiot_platform_config()
+
+
+def get_aiot_host() -> str:
+    """获取AIOT平台地址(向后兼容)"""
+    return aiot_config.get_aiot_host()
+
+
+def get_aiot_app_id() -> str:
+    """获取AIOT应用ID(向后兼容)"""
+    return aiot_config.get_aiot_app_id()
+
+
+def get_aiot_app_secret() -> str:
+    """获取AIOT应用密钥(向后兼容)"""
+    return aiot_config.get_aiot_app_secret()
+
+
+def get_aiot_union_id() -> str:
+    """获取AIOT联合ID(向后兼容)"""
+    return aiot_config.get_aiot_union_id()
+
+
+def get_scenes() -> Dict[str, str]:
+    """获取场景配置(向后兼容)"""
+    return aiot_config.get_scenes()
+
+
+def get_devices() -> Dict[str, Dict[str, Any]]:
+    """获取设备配置(向后兼容)"""
+    return aiot_config.get_devices()
+
+
+def get_device_config(device_name: str) -> Dict[str, Any]:
+    """获取指定设备的配置(向后兼容)"""
+    return aiot_config.get_device_config(device_name)

+ 206 - 0
config/config/dify_config.py

@@ -0,0 +1,206 @@
+"""
+Dify 配置管理模块
+统一加载和管理 dify.yaml 配置文件
+"""
+import threading
+import time
+
+import yaml
+import os
+from pathlib import Path
+from utils.logger import logger
+from watchdog.observers import Observer
+from watchdog.events import FileSystemEventHandler
+
+
+class ConfigFileHandler(FileSystemEventHandler):
+    """配置文件变化处理器"""
+
+    def __init__(self, config_instance):
+        self.config_instance = config_instance
+        self.last_modified = 0
+
+    def on_modified(self, event):
+        if event.src_path.endswith('dify.yaml'):
+            now = time.time()
+            # 0.5 秒内的重复事件忽略
+            if now - self.last_modified < 0.5:
+                return
+            self.last_modified = now
+            logger.info("检测到配置文件修改,开始重载配置")
+            self.config_instance.reload_config()
+
+
+class DifyConfig:
+    """Dify 配置管理类"""
+
+    _instance = None
+    _config = None
+    _observer = None
+    _observer_thread = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(DifyConfig, cls).__new__(cls)
+        return cls._instance
+
+    def __init__(self):
+        if self._config is None:
+            self._load_config()
+            self._init_default_model()
+            self._start_file_watcher()
+
+    def _start_file_watcher(self):
+        """启动文件监听器"""
+        try:
+            config_path = Path(__file__).parent.parent / "dify.yaml"
+            config_dir = config_path.parent
+
+            self._observer = Observer()
+            event_handler = ConfigFileHandler(self)
+            self._observer.schedule(event_handler, str(config_dir), recursive=False)
+
+            # 在后台线程中启动监听器
+            self._observer_thread = threading.Thread(target=self._observer.start)
+            self._observer_thread.daemon = True
+            self._observer_thread.start()
+
+            logger.info("配置文件监听器已启动")
+
+        except Exception as e:
+            logger.error(f"启动文件监听器失败: {e}")
+
+    def stop_file_watcher(self):
+        """停止文件监听器"""
+        if self._observer:
+            self._observer.stop()
+            self._observer.join()
+            logger.info("配置文件监听器已停止")
+
+    def _load_config(self):
+        """加载配置文件"""
+        try:
+            # 获取配置文件路径
+            config_path = Path(__file__).parent.parent / "dify.yaml"
+
+            if not config_path.exists():
+                logger.error(f"Dify配置文件不存在: {config_path}")
+                self._config = {}
+                return
+
+            with open(config_path, 'r', encoding='utf-8') as f:
+                self._config = yaml.safe_load(f)
+
+            logger.info("Dify配置文件加载成功")
+
+        except Exception as e:
+            logger.error(f"加载Dify配置文件失败: {e}")
+            self._config = {}
+
+    def _init_default_model(self):
+        """恢复默认模型"""
+        # 首次加载时将current_model改为xunfei
+        try:# 获取配置文件路径
+            config_path = Path(__file__).parent.parent / "dify.yaml"
+            if self._config.get('models', {}).get('current_model') != 'xunfei':
+                self._config['models']['current_model'] = 'xunfei'
+                logger.info("首次加载,将current_model设置为xunfei")
+                # 同时更新配置文件
+                with open(config_path, 'w', encoding='utf-8') as f:
+                    yaml.safe_dump(self._config, f, default_flow_style=False, allow_unicode=True)
+                logger.info("配置文件已更新")
+        except Exception as e:
+            logger.error(f"恢复默认模型失败: {e}")
+
+    def reload_config(self):
+        """重载配置文件"""
+        logger.info("开始重载Dify配置文件")
+        self._load_config()
+        logger.info("Dify配置文件重载完成")
+
+    def set_current_model(self, model_name: str) -> bool:
+        """设置当前使用的模型"""
+        try:
+            # 检查模型是否存在于配置中
+            if model_name not in self._config.get('models', {}):
+                logger.error(f"模型 {model_name} 不存在于配置中")
+                return False
+
+            # 获取配置文件路径
+            config_path = Path(__file__).parent.parent / "dify.yaml"
+
+            if not config_path.exists():
+                logger.error(f"Dify配置文件不存在: {config_path}")
+                return False
+
+            # 读取配置文件
+            with open(config_path, 'r', encoding='utf-8') as f:
+                config_data = yaml.safe_load(f)
+
+            # 修改current_model
+            config_data['models']['current_model'] = model_name
+
+            # 写回配置文件
+            with open(config_path, 'w', encoding='utf-8') as f:
+                yaml.safe_dump(config_data, f, default_flow_style=False, allow_unicode=True)
+
+            # 更新内存中的配置
+            self._config['models']['current_model'] = model_name
+
+            logger.info(f"成功将当前模型设置为: {model_name}")
+            return True
+
+        except Exception as e:
+            logger.error(f"设置当前模型失败: {e}")
+            return False
+
+    def get_available_models(self) -> list:
+        """获取可用的模型列表"""
+        models_config = self._config.get('models', {})
+        available_models = []
+
+        # 排除非模型配置项
+        exclude_keys = ['switch', 'api-key', 'url', 'current_model']
+        for key in models_config.keys():
+            if key not in exclude_keys:
+                available_models.append(key)
+
+        return available_models
+
+    def get_user(self) -> str:
+        """获取用户信息"""
+        return self._config.get('user', '')
+
+    def get_vision_switch(self) -> bool:
+        """获取视觉开关状态"""
+        return self._config.get('vision', {}).get('switch', False)
+
+    def get_recognize_api_key(self) -> str:
+        """获取识别API密钥"""
+        return self._config.get('vision', {}).get('recognize-api-key', '')
+
+    def get_recognize_url(self) -> str:
+        """获取识别URL"""
+        return self._config.get('vision', {}).get('recognize-url', '')
+
+    def get_models_switch(self) -> bool:
+        """获取模型开关状态"""
+        return self._config.get('models', {}).get('switch', False)
+
+    def get_models_api_key(self) -> str:
+        """获取模型API密钥"""
+        return self._config.get('models', {}).get('api-key', '')
+
+    def get_models_url(self) -> str:
+        """获取模型URL"""
+        return self._config.get('models', {}).get('url', '')
+
+    def get_model_dify_type(self,model_name) -> str:
+        """获取模型类型"""
+        return self._config.get('models', {}).get(model_name, {}).get('dify_type', '')
+
+    def get_current_mode(self) -> str:
+        """获取模型类型"""
+        return self._config.get('models', {}).get('current_model','')
+
+difyconfig = DifyConfig()

+ 149 - 0
config/config/echo_cancellation_conf.py

@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+"""
+回声消除配置文件
+管理自我声音检测和回声消除的相关参数
+"""
+
+
+class EchoCancellationConf:
+    """回声消除配置类"""
+
+    # 基础配置
+    ENABLE_ECHO_CANCELLATION = True  # 是否启用回声消除
+    ENABLE_INTERRUPT_DURING_PLAYBACK = True  # 播放中是否允许打断
+
+    # 音频处理参数
+    SAMPLE_RATE = 16000  # 采样率
+    FRAME_SIZE = 1024    # 帧大小
+    OVERLAP_RATIO = 0.5  # 重叠比率
+
+    # 阈值参数
+    ENERGY_THRESHOLD = 500.0        # 基础能量阈值
+    USER_VOICE_ENERGY_MULTIPLIER = 2.0  # 用户语音能量倍数
+    CORRELATION_THRESHOLD = 0.7     # 相关性阈值
+
+    # 时间窗口参数
+    TIME_WINDOW = 3.0               # 指纹比较时间窗口(秒)
+    TTS_FADE_OUT_DURATION = 3.0     # TTS结束后的淡出时间(秒)- 延长到3秒防止声音泄漏
+    TTS_REFERENCE_WINDOW = 3.0      # TTS参考时间窗口(秒)- 延长到3秒
+
+    # 频谱分析参数
+    FFT_SIZE = 512      # FFT大小
+    MEL_FILTERS = 13    # Mel滤波器数量
+
+    # 缓存参数
+    MAX_TTS_FINGERPRINTS = 100      # 最大TTS指纹缓存数量
+    MAX_RECORDING_BUFFER = 1000     # 最大录音缓冲区大小
+
+    # 用户语音检测参数
+    VOICE_DETECTION = {
+        'energy_multiplier': 2.0,           # 能量倍数阈值
+        'spectral_diff_threshold': 0.5,     # 频谱差异阈值
+        'energy_diff_threshold': 0.3,       # 能量差异阈值(30%)
+        'low_freq_weight': 0.7,             # 低频权重
+        'recent_tts_check_count': 3,        # 检查最近TTS指纹数量
+    }
+
+    # TTS音频平衡过滤参数
+    TTS_FILTERING = {
+        'strict_mode': False,               # 关闭严格模式 - 允许用户打断
+        'variant_detection_window': 1.0,    # 适中的TTS变种检测时间窗口
+        'energy_attenuation_range': (0.2, 0.7),  # 适中的能量衰减范围
+        'variant_similarity_threshold': 0.6,     # 适中的变种相似度阈值
+        'frequency_correlation_threshold': 0.5,   # 适中的频率相关性阈值
+        'definite_voice_energy_multiplier': 2.5,  # 适中的确定用户语音的能量倍数
+        'max_similarity_with_tts': 0.4,           # 适中的与TTS的最大允许相似度
+        'min_spectral_variation': 0.7,            # 适中的最小频谱变化要求
+        'min_spectral_range': 2.2,                # 适中的最小频谱动态范围要求
+    }
+
+    # 调试和日志配置
+    DEBUG_MODE = False              # 调试模式
+    LOG_DETECTION_DETAILS = False   # 记录检测详情
+    LOG_AUDIO_FINGERPRINTS = False  # 记录音频指纹
+
+    # 性能优化配置
+    CLEANUP_INTERVAL = 1.0          # 清理间隔(秒)
+    PROCESSING_TIMEOUT = 0.1        # 处理超时(秒)
+
+    @classmethod
+    def get_user_voice_energy_threshold(cls):
+        """获取用户语音能量阈值"""
+        return cls.ENERGY_THRESHOLD * cls.USER_VOICE_ENERGY_MULTIPLIER
+
+    @classmethod
+    def is_debug_enabled(cls):
+        """检查是否启用调试模式"""
+        return cls.DEBUG_MODE
+
+    @classmethod
+    def should_log_detection_details(cls):
+        """是否应该记录检测详情"""
+        return cls.LOG_DETECTION_DETAILS or cls.DEBUG_MODE
+
+    @classmethod
+    def should_log_audio_fingerprints(cls):
+        """是否应该记录音频指纹"""
+        return cls.LOG_AUDIO_FINGERPRINTS or cls.DEBUG_MODE
+
+    @classmethod
+    def get_voice_detection_config(cls):
+        """获取语音检测配置"""
+        return cls.VOICE_DETECTION.copy()
+
+    @classmethod
+    def update_config(cls, **kwargs):
+        """动态更新配置"""
+        for key, value in kwargs.items():
+            if hasattr(cls, key.upper()):
+                setattr(cls, key.upper(), value)
+            elif key in cls.VOICE_DETECTION:
+                cls.VOICE_DETECTION[key] = value
+
+    @classmethod
+    def get_all_config(cls):
+        """获取所有配置"""
+        config = {}
+        for attr_name in dir(cls):
+            if not attr_name.startswith('_') and not callable(getattr(cls, attr_name)):
+                config[attr_name] = getattr(cls, attr_name)
+        return config
+
+    @classmethod
+    def validate_config(cls):
+        """验证配置参数的合理性"""
+        errors = []
+
+        # 检查阈值参数
+        if cls.ENERGY_THRESHOLD <= 0:
+            errors.append("ENERGY_THRESHOLD必须大于0")
+
+        if cls.CORRELATION_THRESHOLD < 0 or cls.CORRELATION_THRESHOLD > 1:
+            errors.append("CORRELATION_THRESHOLD必须在0-1之间")
+
+        # 检查时间窗口参数
+        if cls.TIME_WINDOW <= 0:
+            errors.append("TIME_WINDOW必须大于0")
+
+        if cls.TTS_FADE_OUT_DURATION < 0:
+            errors.append("TTS_FADE_OUT_DURATION不能为负数")
+
+        # 检查频谱参数
+        if cls.FFT_SIZE <= 0 or (cls.FFT_SIZE & (cls.FFT_SIZE - 1)) != 0:
+            errors.append("FFT_SIZE必须是2的幂次方")
+
+        if cls.MEL_FILTERS <= 0:
+            errors.append("MEL_FILTERS必须大于0")
+
+        # 检查缓存参数
+        if cls.MAX_TTS_FINGERPRINTS <= 0:
+            errors.append("MAX_TTS_FINGERPRINTS必须大于0")
+
+        if cls.MAX_RECORDING_BUFFER <= 0:
+            errors.append("MAX_RECORDING_BUFFER必须大于0")
+
+        return errors
+
+
+# 提供全局配置实例
+echo_config = EchoCancellationConf()

+ 184 - 0
config/config/settings.py

@@ -0,0 +1,184 @@
+"""
+机器人AI语音识别配置文件
+支持环境变量和YAML配置文件
+"""
+import os
+import yaml
+from typing import Tuple, Dict, Any
+from pathlib import Path
+from utils.logger import logger
+
+
+class Config:
+    """配置类"""
+
+    def __init__(self):
+        self._config_data = {}
+        self._load_config()
+
+    def _load_config(self):
+        """加载配置文件"""
+        # 加载YAML配置
+        self._load_yaml_config()
+
+    def _load_yaml_config(self):
+        """加载YAML配置文件"""
+        # 修复路径:config.yaml在上级目录
+        config_file = Path(__file__).parent.parent / "config.yaml"
+        if config_file.exists():
+            try:
+                with open(config_file, 'r', encoding='utf-8') as f:
+                    self._config_data = yaml.safe_load(f) or {}
+                logger.info(f"成功加载配置文件: {config_file}")
+            except Exception as e:
+                logger.info(f"加载YAML配置文件失败: {e}")
+                self._config_data = {}
+        else:
+            logger.warning(f"配置文件不存在: {config_file}")
+            self._config_data = {}
+
+    @property
+    def SERVER_IP(self) -> str:
+        """讯飞套件IP"""
+        return self._config_data.get('server', {}).get('ip', '192.168.0.26')
+
+    @property
+    def SERVER_PORT(self) -> int:
+        """服务器端口"""
+        return self._config_data.get('server', {}).get('port', 19199)
+
+    @property
+    def SERVER_ADDRESS(self) -> Tuple[str, int]:
+        """获取服务器地址"""
+        return (self.SERVER_IP, self.SERVER_PORT)
+
+    @property
+    def CONNECTION_TIMEOUT(self) -> int:
+        """连接超时时间"""
+        return self._config_data.get('network', {}).get('connection_timeout', 5)
+
+    @property
+    def RECEIVE_TIMEOUT(self) -> int:
+        """接收超时时间"""
+        return self._config_data.get('network', {}).get('receive_timeout', 3)
+
+    @property
+    def PING_INTERVAL(self) -> int:
+        """心跳检测间隔"""
+        return self._config_data.get('network', {}).get('ping_interval', 10)
+
+    @property
+    def RECONNECT_DELAY(self) -> int:
+        """重连延迟时间"""
+        return self._config_data.get('network', {}).get('reconnect_delay', 5)
+
+    @property
+    def machinename(self) -> str:
+        """机器人对外称呼"""
+        return self._config_data.get('machinename', '小勇')
+
+    @property
+    def LOG_LEVEL(self) -> str:
+        """日志级别"""
+        return self._config_data.get('logging', {}).get('level', 'INFO')
+
+    @property
+    def LOG_FILE(self) -> str:
+        """日志文件"""
+        return self._config_data.get('logging', {}).get('file', 'robot_ai.log')
+
+    @property
+    def LOG_FORMAT(self) -> str:
+        """日志格式"""
+        return self._config_data.get('logging', {}).get('format', '%(asctime)s %(levelname)s:%(message)s')
+
+    @property
+    def LOG_DATE_FORMAT(self) -> str:
+        """日志日期格式"""
+        return self._config_data.get('logging', {}).get('date_format', '%Y-%m-%d %H:%M:%S')
+
+    @property
+    def TTS_VOICE(self) -> str:
+        """TTS语音"""
+        return self._config_data.get('tts', {}).get('voice', 'x4_lingxiaoxuan_oral')
+
+    @property
+    def TTS_SPEED(self) -> int:
+        """TTS速度"""
+        return self._config_data.get('tts', {}).get('speed', 50)
+
+    @property
+    def TTS_PITCH(self) -> int:
+        """TTS音调"""
+        return self._config_data.get('tts', {}).get('pitch', 50)
+
+    @property
+    def TTS_VOLUME(self) -> int:
+        """TTS音量"""
+        return self._config_data.get('tts', {}).get('volume', 50)
+
+    @property
+    def XUNFEI_API_KEY(self) -> str:
+        """科大讯飞API Key"""
+        return self._config_data.get('xunfei', {}).get('api_key', 'your_api_key_here')
+
+    @property
+    def XUNFEI_API_SECRET(self) -> str:
+        """科大讯飞API Secret"""
+        return self._config_data.get('xunfei', {}).get('api_secret', 'your_api_secret_here')
+
+    @property
+    def XUNFEI_APP_ID(self) -> str:
+        """科大讯飞App ID"""
+        return self._config_data.get('xunfei', {}).get('app_id', 'your_app_id_here')
+
+    @property
+    def XUNFEI_STREAMING_TTS_URL(self) -> str:
+        """科大讯飞流式TTS URL"""
+        return self._config_data.get('xunfei', {}).get('streaming_tts_url', 'wss://tts-api.xfyun.cn/v2/tts')
+
+    @property
+    def XUNFEI_SCENE(self) -> str:
+        """科大讯飞场景"""
+        return self._config_data.get('xunfei', {}).get('scene', 'main')
+
+    @property
+    def ENVIRONMENT(self) -> str:
+        """运行环境"""
+        return self._config_data.get('environment', 'development')
+
+    @property
+    def DEBUG(self) -> bool:
+        """调试模式"""
+        return self._config_data.get('debug', False)
+
+    def get_config(self) -> Dict[str, Any]:
+        """获取完整配置"""
+        return self._config_data.copy()
+
+    def reload(self):
+        """重新加载配置"""
+        self._load_config()
+
+    @property
+    def pc2_base_url(self) -> str:
+        """PC2基础URL"""
+        return self._config_data.get('pc2', {}).get('base_url', '')
+
+    @property
+    def pc2_time(self) -> int:
+        """PC2超时时间"""
+        return self._config_data.get('pc2', {}).get('timeout', 30)
+
+    def get_pc2_url(self, endpoint: str) -> str:
+        """
+        获取PC2 API URL
+        :param endpoint: 端点名称 (robot_action/robot_waypoint/robot_cmd_action)
+        :return: 完整的API URL
+        """
+        endpoints = self._config_data.get('pc2', {}).get('endpoints', {})
+        return f"{self.pc2_base_url}{endpoints.get(endpoint, '')}"
+
+
+# 全局配置实例
+config = Config()

+ 26 - 0
config/dify.yaml

@@ -0,0 +1,26 @@
+models:
+  api-key: app-CvpbsWQAnQN1ejnXKzCblT8J
+  current_model: xunfei
+  deepseek:
+    dify_type: deepseek
+  ernie:
+    dify_type: ernie
+  kimi:
+    dify_type: kimi
+  llama:
+    dify_type: llama
+  qwen2.5:
+    dify_type: qwen2.5
+  qwen3:
+    dify_type: qwen3
+  qwenvl:
+    dify_type: qwenvl
+  switch: true
+  xunfei:
+    dify_type: xunfei
+url: http://robot.yun36.com:8066/v1/chat-messages
+user: howsoGQ@qq.com
+vision:
+  recognize-api-key: app-wzRAbuWYxm9WiyY6fpQzA7Cu
+  recognize-url: http://robot.yun36.com:8066/v1/chat-messages
+  switch: true

+ 77 - 0
config/xunfei_config.py

@@ -0,0 +1,77 @@
+# -*- coding:utf-8 -*-
+"""
+讯飞大模型多语种语音识别配置
+"""
+
+# 讯飞开放平台配置
+XUNFEI_CONFIG = {
+    "APPID": "f016fce3",
+    "APIKey": "fcb58dc79de9b0568d2287bd8184e291",
+    "APISecret": "YTFiN2NkOGVjNTVjY2QyMTlmMTViOTBh",
+    "URL": "wss://iat.cn-huabei-1.xf-yun.com/v1"
+}
+
+# 大模型多语种语音识别参数
+IAT_PARAMS = {
+    "domain": "slm",  # 指定访问的领域 slm
+    "language": "mul_cn",  # 语种 mul_cn
+    "accent": "mandarin",  # 口音
+    "eos": 6000,  # 静音多少秒停止识别 如6000毫秒
+    "vinfo": 1,  # 句子级别帧对齐
+    "result": {
+        "encoding": "utf8",
+        "compress": "raw",
+        "format": "json"
+    }
+}
+
+# 音频参数配置
+AUDIO_CONFIG = {
+    "frame_size": 1280,  # 每一帧的音频大小(建议值)
+    "interval": 0.04,  # 发送音频间隔(单位:s) - 建议40ms
+    "sample_rate": 16000,  # 采样率
+    "channels": 1,  # 声道数
+    "bit_depth": 16,  # 位深度
+    "encoding": "raw"  # 音频编码格式
+}
+
+# 支持的语种列表
+SUPPORTED_LANGUAGES = {
+    "zh": "中文",
+    "en": "英文",
+    "ja": "日语",
+    "ko": "韩语",
+    "ru": "俄语",
+    "fr": "法语",
+    "es": "西班牙语",
+    "ar": "阿拉伯语",
+    "de": "德语",
+    "th": "泰语",
+    "vi": "越南语",
+    "hi": "印地语",
+    "pt": "葡萄牙语",
+    "it": "意大利语",
+    "ms": "马来语",
+    "id": "印尼语",
+    "fil": "菲律宾语",
+    "tr": "土耳其语",
+    "el": "希腊语",
+    "cs": "捷克语",
+    "ur": "乌尔都语",
+    "bn": "孟加拉语",
+    "ta": "泰米尔语",
+    "uk": "乌克兰语",
+    "kk": "哈萨克语",
+    "uz": "乌兹别克语",
+    "pl": "波兰语",
+    "mn": "蒙语",
+    "sw": "斯瓦西里语",
+    "ha": "豪撒语",
+    "fa": "波斯语",
+    "nl": "荷兰语",
+    "sv": "瑞典语",
+    "ro": "罗马尼亚语",
+    "bg": "保加利亚语",
+    "ug": "维语",
+    "tib": "藏语"
+}

+ 1 - 0
core/__init__.py

@@ -0,0 +1 @@
+# 核心模块

+ 8 - 0
core/aiui/__init__.py

@@ -0,0 +1,8 @@
+"""
+AIUI 核心模块
+包含 AIUI 语音识别相关的核心组件
+"""
+
+from .recorder import Recorder
+
+__all__ = ['Recorder']

+ 282 - 0
core/aiui/recorder.py

@@ -0,0 +1,282 @@
+#!/usr/bin/python
+# coding=utf-8
+"""
+Author: zhaoyong 77912776@qq.com
+Date: 2025-07-02
+LastEditTime: 2025-08-24
+FilePath: \robot_ai\handlers\aiui\Recorder.py
+Description: 录音类(优化版)
+"""
+
+import platform
+import struct
+import threading
+import numpy as np
+import platform
+import sounddevice as sd
+system = platform.system().lower()
+if system == "linux":
+    sd.default.device = 'pulse'
+elif system == "windows":
+    sd.default.device = None
+elif system == "darwin":
+    sd.default.device = None
+from contextlib import contextmanager
+from utils.echo_cancellation import get_self_voice_detector
+from utils.logger import logger
+
+
+def calc_rms(audio_bytes: bytes) -> float:
+    """计算音频数据的 RMS 值"""
+    if not audio_bytes:
+        return 0
+    samples = struct.unpack(f'{len(audio_bytes)//2}h', audio_bytes)
+    arr = np.array(samples, dtype=np.int16)
+
+    # 避免数值溢出和无效值
+    if len(arr) == 0:
+        return 0
+
+    # 使用更安全的计算方式,避免大数值溢出
+    arr_float = arr.astype(np.float64)
+    squared = arr_float ** 2
+    mean_squared = np.mean(squared)
+
+    # 检查数值有效性
+    if np.isnan(mean_squared) or np.isinf(mean_squared) or mean_squared < 0:
+        return 0
+
+    try:
+        rms = np.sqrt(mean_squared)
+        # 确保结果有效
+        if np.isnan(rms) or np.isinf(rms):
+            return 0
+        return float(rms)
+    except (ValueError, RuntimeWarning):
+        return 0
+
+
+class Recorder:
+    def __init__(self, chunk: int, channels: int = 1, rate: int = 16000, fmt: str = None):
+        self.chunk = chunk
+        self.channels = channels
+        self.target_rate = rate  # 目标采样率
+        self.actual_rate = rate  # 实际使用的采样率
+        self.format = fmt or 'int16'
+        self.platform = platform.system().lower()
+
+        self.device_index = self._find_device()
+        if self.device_index is None:
+            raise RuntimeError("❌ 未找到可用的音频输入设备")
+
+        # 语音活动检测
+        self.is_voice_active = False
+        self.voice_activity_threshold = 10.0
+        self.voice_activity_count = 0
+        self.voice_activity_lock = threading.RLock()
+
+        # 回声消除
+        self.self_voice_detector = get_self_voice_detector()
+        self.enable_echo_cancellation = True
+
+        # 音频流状态
+        self._stream = None
+        self._stream_lock = threading.RLock()
+        self._is_running = False
+
+        logger.info(
+            f"🎙️ 录音器初始化: 声道={self.channels}, 目标采样率={self.target_rate}, 实际采样率={self.actual_rate}, 块大小={self.chunk}")
+        logger.info(
+            f"   使用设备[{self.device_index}]: {sd.query_devices(self.device_index)['name']}")
+
+        # 如果实际采样率与目标不同,记录警告
+        if self.actual_rate != self.target_rate:
+            logger.warning(
+                f"⚠️ 采样率不匹配: 目标={self.target_rate}Hz, 实际={self.actual_rate}Hz")
+
+    # ----------------- 设备相关 -----------------
+    def _find_device(self):
+        """自动查找合适的音频输入设备"""
+        devices = [
+            (i, d) for i, d in enumerate(sd.query_devices())
+            if d.get("max_input_channels", 0) > 0
+        ]
+        if not devices:
+            return None
+
+        # Ubuntu环境下的设备优先级
+        if platform.system().lower() == 'linux':
+            priority = ["pulse", "default", "sysdefault",
+                        "AIUI-USB-MC", "rockchip-es8388"]
+        else:
+            # Windows环境下的设备优先级
+            priority = ["AIUI-USB-MC", "rockchip-es8388",
+                        "USB Audio", "sysdefault"]
+
+        # 按优先级排序
+        devices.sort(
+            key=lambda x: next(
+                (i for i, p in enumerate(priority)
+                 if p.lower() in x[1]["name"].lower()),
+                999
+            )
+        )
+
+        # 在Ubuntu环境中,优先测试PulseAudio设备
+        for idx, dev in devices:
+            if self._check_sample_rate_support(idx, self.target_rate):
+                logger.info(f"📍 选择输入设备: {dev['name']} ({idx})")
+                self.actual_rate = self.target_rate
+                return idx
+
+        # 如果没有找到支持目标采样率的设备,尝试使用默认设备
+        logger.warning(f"⚠️ 未找到支持 {self.target_rate}Hz 的设备,尝试使用默认设备")
+        for idx, dev in devices:
+            try:
+                # 尝试使用默认采样率
+                default_rate = int(dev.get('default_samplerate', 44100))
+                if self._check_sample_rate_support(idx, default_rate):
+                    logger.info(
+                        f"📍 使用默认采样率设备: {dev['name']} ({idx}) - {default_rate}Hz")
+                    self.actual_rate = default_rate
+                    return idx
+            except Exception as e:
+                logger.debug(f"设备 {idx} 测试失败: {e}")
+                continue
+
+        # 最后选择第一个可用设备
+        if devices:
+            logger.warning(
+                f"⚠️ 使用第一个可用设备: {devices[0][1]['name']} ({devices[0][0]})")
+            # 尝试使用设备的默认采样率
+            default_rate = int(devices[0][1].get('default_samplerate', 44100))
+            self.actual_rate = default_rate
+            return devices[0][0]
+
+        return None
+
+    def _check_sample_rate_support(self, device_index, target_rate):
+        """检查设备是否支持目标采样率"""
+        try:
+            stream = sd.RawInputStream(
+                samplerate=target_rate,
+                channels=self.channels,
+                dtype=self.format,
+                blocksize=self.chunk,
+                device=device_index
+            )
+            stream.close()
+            return True
+        except Exception as e:
+            logger.warning(f"设备 {device_index} 不支持 {target_rate}Hz: {e}")
+            return False
+
+    # ----------------- 音频流管理 -----------------
+    @contextmanager
+    def audio_stream(self):
+        """上下文管理器,自动开启/关闭音频流"""
+        try:
+            self._stream = sd.RawInputStream(
+                samplerate=self.actual_rate,  # 使用实际采样率
+                channels=self.channels,
+                dtype=self.format,
+                blocksize=self.chunk,
+                device=self.device_index
+            )
+            self._stream.start()
+            self._is_running = True
+            logger.info(f"✅ 音频流已启动 (采样率: {self.actual_rate}Hz)")
+            yield self._stream
+        finally:
+            self._close_stream()
+
+    def _close_stream(self):
+        """安全关闭音频流"""
+        try:
+            if self._stream:
+                if hasattr(self._stream, 'stop'):
+                    self._stream.stop()
+                if hasattr(self._stream, 'close'):
+                    self._stream.close()
+                logger.debug("🔄 音频流已关闭")
+        except Exception as e:
+            logger.warning(f"⚠️ 关闭音频流异常: {e}")
+        finally:
+            self._stream = None
+            self._is_running = False
+
+    # ----------------- 核心录音逻辑 -----------------
+    def _voice_activity_update(self, rms: float):
+        """更新语音活动检测状态"""
+        with self.voice_activity_lock:
+            if rms > self.voice_activity_threshold:
+                self.voice_activity_count += 1
+                if self.voice_activity_count >= 2:
+                    self.is_voice_active = True
+            else:
+                self.voice_activity_count = 0
+                self.is_voice_active = False
+
+    def read(self):
+        """读取音频流,生成器返回音频块"""
+        with self._stream_lock:
+            with self.audio_stream() as stream:
+                frame_count = 0
+                buffer = b''
+
+                while self._is_running and stream:
+                    try:
+                        data, _ = stream.read(self.chunk)
+                        buffer += data
+
+                        while len(buffer) >= self.chunk:
+                            chunk_data, buffer = buffer[:self.chunk], buffer[self.chunk:]
+
+                            # 回声消除
+                            if self.enable_echo_cancellation and self.self_voice_detector:
+                                if not self.self_voice_detector.process_recording_audio(chunk_data):
+                                    continue
+
+                            # RMS 计算 & VAD
+                            if frame_count % 100 == 0:
+                                rms = calc_rms(chunk_data)
+                                self._voice_activity_update(rms)
+                                logger.debug(
+                                    f"🎚️ RMS: {rms:.2f}, 语音活动={self.is_voice_active}")
+
+                            yield chunk_data
+                            frame_count += 1
+                    except Exception as e:
+                        logger.error(f"❌ 音频读取异常: {e}")
+                        break
+
+    # ----------------- 控制接口 -----------------
+    def close(self):
+        """完全关闭录音器"""
+        with self._stream_lock:
+            logger.info("🔄 关闭录音器...")
+            self._close_stream()
+            logger.info("✅ 录音器已关闭")
+
+    def stop_recording(self):
+        """停止录音但保留设备"""
+        with self._stream_lock:
+            logger.info("🛑 停止录音...")
+            self._close_stream()
+
+    def __del__(self):
+        try:
+            self.close()
+        except Exception:
+            pass
+
+
+# ----------------- 调试入口 -----------------
+if __name__ == "__main__":
+    import time
+    recorder = Recorder(chunk=640)
+    start = time.time()
+    for data in recorder.read():
+        if time.time() - start > 5:
+            break
+    recorder.close()

+ 54 - 0
core/baidu/message_processor.py

@@ -0,0 +1,54 @@
+"""
+百度实时语音识别消息处理器
+
+说明:不改动原有讯飞(AIUI)流程,单独提供百度处理器。
+该处理器负责启动/停止百度实时语音识别流程,核心音频采集、VAD门控、通道过滤、
+以及自回声抑制(在助手播报期间暂停上行音频)等逻辑由 `handlers.baidu.speech_handler.BaiduSpeechHandler` 内部实现。
+"""
+from utils.logger import logger
+import time
+from typing import Optional
+
+from handlers.baidu.speech_handler import BaiduSpeechHandler
+
+
+class BaiduMessageProcessor:
+    """百度实时语音识别消息处理器"""
+
+    def __init__(self) -> None:
+        self._running: bool = False
+        self._handler: Optional[BaiduSpeechHandler] = None
+
+    def start(self) -> bool:
+        """启动百度实时语音识别主流程"""
+        try:
+            logger.info("初始化百度实时语音识别处理器…")
+            self._handler = BaiduSpeechHandler()
+
+            logger.info("启动百度实时语音识别…")
+            ok = self._handler.start_recognition()
+            if not ok:
+                logger.error("百度实时语音识别启动失败")
+                return False
+
+            self._running = True
+            logger.info("百度实时语音识别处理器已启动")
+            return True
+        except Exception as e:
+            logger.error(f"百度处理器启动异常: {e}")
+            return False
+
+    def run_forever(self) -> None:
+        """保持运行(供主循环调用)"""
+        while self._running:
+            time.sleep(1)
+
+    def stop(self) -> None:
+        """停止百度实时语音识别主流程"""
+        self._running = False
+        if self._handler:
+            try:
+                self._handler.stop_recognition()
+            except Exception as e:
+                logger.error(f"停止百度处理器异常: {e}")
+        logger.info("百度实时语音识别处理器已停止")

+ 165 - 0
core/message_processor.py

@@ -0,0 +1,165 @@
+"""
+消息处理核心逻辑模块
+"""
+from utils.logger import logger
+import struct
+import json
+from typing import Optional, Tuple
+
+from core.socket_client import SocketClient
+from strategies.confirm_process import ConfirmProcess
+from strategies.aiui_process import AiuiMessageProcess
+from handlers.xunfei.intent_handler import IntentHandler
+from handlers.xunfei.speech_handler import XunfeiSpeechHandler
+from handlers.xunfei.nlp_handler import NLPHandler
+from handlers.xunfei.knowledge_handler import KnowledgeHandler
+
+
+class MessageProcessor:
+    """消息处理核心类"""
+
+    def __init__(self, socket_client: SocketClient):
+        self.socket_client = socket_client
+        self.aiui_type = ""
+
+        # 初始化策略处理器
+        self.confirm_process = ConfirmProcess()
+        self.aiui_process = AiuiMessageProcess()
+
+        # 初始化业务处理器
+        self.intent_handler = IntentHandler()
+        self.speech_handler = XunfeiSpeechHandler()  # 使用讯飞AIUI处理器
+        self.nlp_handler = NLPHandler()
+        self.knowledge_handler = KnowledgeHandler()
+        # 把意图类注册到语音处理器
+        self.speech_handler.intent_handler = self.intent_handler
+        # 设置NLP处理器的意图处理器引用
+        self.nlp_handler.intent_handler = self.intent_handler
+        # 设置NLP处理器的IAT处理器引用
+        self.nlp_handler.iat_handler = self.speech_handler
+
+    def process(self) -> bool:
+        """
+        处理接收到的消息
+
+        Returns:
+            bool: 处理是否成功
+        """
+        try:
+            socket = self.socket_client.get_socket()
+            if not socket:
+                logger.error("Socket未连接,重新连接...")
+                self.socket_client.connected_event.clear()
+                self.socket_client.connect()
+                return False
+
+            # 设置接收超时
+            socket.settimeout(3)
+
+            # 接收消息头
+            recv_data = self.socket_client.receive_full_data(7)
+            if not recv_data:
+                # logger.error("未接收到数据,重新连接...")
+                self.socket_client.connected_event.clear()
+                self.socket_client.connect()
+                return False
+
+            if len(recv_data) < 7:
+                logger.error(f"数据不完整: {recv_data}")
+                return False
+
+            # 解析消息头
+            sync_head, user_id, msg_type, msg_length, msg_id = struct.unpack(
+                '<BBBHH', recv_data)
+
+            # 接收消息体
+            msg_data = self.socket_client.receive_full_data(msg_length + 1)
+            if len(msg_data) < msg_length + 1:
+                logger.error(f"消息体数据不完整: {msg_data}")
+                return False
+
+            # 解析消息数据
+            msg = msg_data[:msg_length]
+            check_code = msg_data[-1]  # 校验码
+
+            # 处理消息
+            if sync_head == 0xa5 and user_id == 0x01:
+                return self._handle_message(msg_type, msg_id, msg)
+
+            return True
+
+        except Exception as e:
+            logger.error(f"消息处理异常: {e}")
+            return False
+
+    def _handle_message(self, msg_type: int, msg_id: int, msg: bytes) -> bool:
+        """
+        处理具体消息类型
+
+        Args:
+            msg_type: 消息类型
+            msg_id: 消息ID
+            msg: 消息内容
+
+        Returns:
+            bool: 处理结果
+        """
+        try:
+            socket = self.socket_client.get_socket()
+
+            if msg_type == 0x01:
+                # 确认消息
+                self.confirm_process.process(socket, msg_id)
+
+            elif msg_type == 0x04:
+                # AIUI消息
+                self.confirm_process.process(socket, msg_id)
+                success, result = self.aiui_process.process(socket, msg)
+
+                if success:
+                    self.aiui_type = ""
+                    data = json.loads(result)
+                    self._get_aiui_type(data)
+
+                    # 处理唤醒事件
+                    if data.get('content', {}).get('eventType', {}) == 4:
+                        logger.info(f"唤醒成功:===={msg_id} 我在 ====")
+                        from utils.tts_client import play_text_async
+                        play_text_async('我在呢,有什么可以帮您的?', use_cache=True)
+
+                    # 根据AIUI类型处理
+                    if self.aiui_type == "iat":
+                        self.speech_handler.handle_iat_result(data)
+                    elif self.aiui_type == "nlp":
+                        self.nlp_handler.handle_nlp_result(data)
+                    elif self.aiui_type == "cbm_semantic":
+                        self.intent_handler.handle_intent_result(data)
+                    elif self.aiui_type == "cbm_knowledge":
+                        self.knowledge_handler.handle_knowledge_result(data)
+                else:
+                    logger.warning("AIUI消息处理失败")
+
+            return True
+
+        except Exception as e:
+            logger.error(f"消息类型处理异常: {e}")
+            return False
+
+    def _get_aiui_type(self, data: dict) -> None:
+        """
+        获取AIUI类型
+
+        Args:
+            data: AIUI数据
+        """
+        if 'content' in data:
+            content = data['content']
+            if 'info' in content:
+                info = content['info']
+                if 'data' in info and isinstance(info['data'], list) and len(info['data']) > 0:
+                    data_item = info['data'][0]
+                    if 'params' in data_item:
+                        params = data_item['params']
+                        sub_value = params.get('sub')
+                        if sub_value is not None:
+                            self.aiui_type = sub_value

+ 110 - 0
core/socket_client.py

@@ -0,0 +1,110 @@
+'''
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-14 22:19:23
+LastEditTime: 2025-08-24 14:21:05
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: \robot_ai\core\socket_client.py
+Description: 头部注释配置模板
+'''
+"""
+Socket客户端连接管理模块
+"""
+import time
+from utils.logger import logger
+import struct
+from socket import socket, AF_INET, SOCK_STREAM, timeout
+from threading import Thread, Event
+from typing import Optional, Tuple
+from utils.network import ping_host
+from config.config.settings import config
+
+
+class SocketClient:
+    """Socket客户端连接管理类"""
+
+    def __init__(self):
+        self.client_socket: Optional[socket] = None
+        self.server_address = config.SERVER_ADDRESS
+        self.connected_event = Event()
+        self.stop_event = Event()
+        self.connect()
+        self.start_ping_check()
+
+    def connect(self) -> None:
+        """建立连接"""
+        while not self.stop_event.is_set():
+            try:
+                if self.client_socket:
+                    self.client_socket.close()
+
+                self.client_socket = socket(AF_INET, SOCK_STREAM)
+                self.client_socket.settimeout(config.CONNECTION_TIMEOUT)
+                self.client_socket.connect(self.server_address)
+
+                logger.info(f"已连接到讯飞套件: {self.server_address}")
+                self.client_socket.settimeout(None)  # 连接成功后取消超时
+                self.connected_event.set()
+                break
+
+            except (ConnectionError, OSError, timeout) as e:
+                logger.error(
+                    f"讯飞套件 {self.server_address} 连接失败: {e}. {config.RECONNECT_DELAY}秒后重试...")
+                # 摄像头连接失败播放提示音
+                from utils.tts_client import play_text_async
+                play_text_async('讯飞套件连接失败,请检查讯飞套件是否正常', use_cache=True)
+                self.connected_event.clear()
+                time.sleep(config.RECONNECT_DELAY)
+
+    def receive_full_data(self, expected_length: int) -> Optional[bytes]:
+        """
+        接收完整数据
+
+        Args:
+            expected_length: 期望接收的数据长度
+
+        Returns:
+            Optional[bytes]: 接收到的数据,失败时返回None
+        """
+        received_data = bytearray()
+        while len(received_data) < expected_length:
+            try:
+                chunk = self.client_socket.recv(
+                    expected_length - len(received_data))
+                if not chunk:
+                    return None
+                received_data.extend(chunk)
+            except timeout:
+                return None
+        return bytes(received_data)
+
+    def start_ping_check(self) -> None:
+        """启动心跳检测"""
+        Thread(target=self.ping_check, daemon=True).start()
+
+    def ping_check(self) -> None:
+        """心跳检测循环"""
+        while not self.stop_event.is_set():
+            try:
+                if not ping_host(self.server_address[0]):
+                    logger.error(
+                        f"Ping检测失败: {self.server_address[0]}. 重新连接...")
+                    self.connected_event.clear()
+                    self.connect()
+            except Exception as e:
+                logger.error(f"心跳检测异常: {e}")
+            time.sleep(config.PING_INTERVAL)
+
+    def close(self) -> None:
+        """关闭连接"""
+        self.stop_event.set()
+        if self.client_socket:
+            self.client_socket.close()
+        logger.info("Socket连接已关闭")
+
+    def is_connected(self) -> bool:
+        """检查是否已连接"""
+        return self.connected_event.is_set()
+
+    def get_socket(self) -> Optional[socket]:
+        """获取socket对象"""
+        return self.client_socket if self.is_connected() else None

+ 176 - 0
core/xunfei/message_processor.py

@@ -0,0 +1,176 @@
+"""
+消息处理核心逻辑模块
+"""
+from utils.logger import logger
+import struct
+import json
+from typing import Optional, Tuple
+
+from core.socket_client import SocketClient
+from strategies.confirm_process import ConfirmProcess
+from strategies.aiui_process import AiuiMessageProcess
+from handlers.xunfei.intent_handler import IntentHandler
+from handlers.xunfei.speech_handler import XunfeiSpeechHandler
+from handlers.xunfei.nlp_handler import NLPHandler
+from handlers.xunfei.knowledge_handler import KnowledgeHandler
+
+
+class XunfeiMessageProcessor:
+    """消息处理核心类"""
+
+    def __init__(self, socket_client: SocketClient):
+        self.socket_client = socket_client
+        self.aiui_type = ""
+
+        # 初始化策略处理器
+        self.confirm_process = ConfirmProcess()
+        self.aiui_process = AiuiMessageProcess()
+
+        # 初始化业务处理器
+        self.intent_handler = IntentHandler()
+        self.speech_handler = XunfeiSpeechHandler()  # 使用讯飞AIUI处理器
+        self.nlp_handler = NLPHandler()
+        self.knowledge_handler = KnowledgeHandler()
+        # 把意图类注册到语音处理器
+        self.speech_handler.intent_handler = self.intent_handler
+        # 设置NLP处理器的意图处理器引用
+        self.nlp_handler.intent_handler = self.intent_handler
+        # 把iat处理器注册到nlp
+        self.nlp_handler.iat_handler = self.speech_handler
+
+    def process(self) -> bool:
+        """
+        处理接收到的消息
+
+        Returns:
+            bool: 处理是否成功
+        """
+        try:
+            socket = self.socket_client.get_socket()
+            if not socket:
+                logger.debug("Socket未连接,等待连接恢复...")
+                # 不主动触发连接,避免多重连接
+                return False
+
+            # 检查连接状态
+            if not self.socket_client.is_connected():
+                logger.debug("Socket连接已断开,等待重连...")
+                return False
+
+            # 设置接收超时 - 增加超时时间,减少频繁失败
+            socket.settimeout(5)  # 从3秒增加到5秒
+
+            # 接收消息头
+            recv_data = self.socket_client.receive_full_data(7)
+            if not recv_data:
+                logger.debug("未接收到数据,可能是连接空闲或断开")
+                # 检查socket是否仍然有效
+                try:
+                    # 发送一个小的测试数据来检查连接
+                    socket.send(b'\x00')
+                    return True  # 连接正常,只是没有数据
+                except Exception:
+                    logger.debug("Socket连接已断开")
+                    self.socket_client.connected_event.clear()
+                    return False
+
+            if len(recv_data) < 7:
+                logger.error(f"数据不完整: {recv_data}")
+                return False
+
+            # 解析消息头
+            sync_head, user_id, msg_type, msg_length, msg_id = struct.unpack(
+                '<BBBHH', recv_data)
+
+            # 接收消息体
+            msg_data = self.socket_client.receive_full_data(msg_length + 1)
+            if len(msg_data) < msg_length + 1:
+                logger.error(f"消息体数据不完整: {msg_data}")
+                return False
+
+            # 解析消息数据
+            msg = msg_data[:msg_length]
+            check_code = msg_data[-1]  # 校验码
+
+            # 处理消息
+            if sync_head == 0xa5 and user_id == 0x01:
+                return self._handle_message(msg_type, msg_id, msg)
+
+            return True
+
+        except Exception as e:
+            logger.error(f"消息处理异常: {e}")
+            return False
+
+    def _handle_message(self, msg_type: int, msg_id: int, msg: bytes) -> bool:
+        """
+        处理具体消息类型
+
+        Args:
+            msg_type: 消息类型
+            msg_id: 消息ID
+            msg: 消息内容
+
+        Returns:
+            bool: 处理结果
+        """
+        try:
+            socket = self.socket_client.get_socket()
+
+            if msg_type == 0x01:
+                # 确认消息
+                self.confirm_process.process(socket, msg_id)
+
+            elif msg_type == 0x04:
+                # AIUI消息
+                self.confirm_process.process(socket, msg_id)
+                success, result = self.aiui_process.process(socket, msg)
+
+                if success:
+                    self.aiui_type = ""
+                    data = json.loads(result)
+                    self._get_aiui_type(data)
+
+                    # 处理唤醒事件
+                    if data.get('content', {}).get('eventType', {}) == 4:
+                        logger.info(f"唤醒成功:===={msg_id} 我在 ====")
+                        from utils.tts_client import play_text_async
+                        play_text_async('我在呢,有什么可以帮您的?', use_cache=True)
+
+                    # 根据AIUI类型处理
+                    if self.aiui_type == "iat":
+                        self.speech_handler.handle_iat_result(data)
+                    elif self.aiui_type == "cbm_knowledge":
+                        logger.info(f"cbm_knowledge: {data}")
+                        self.knowledge_handler.handle_knowledge_result(data)
+                    elif self.aiui_type == "nlp":
+                        self.nlp_handler.handle_nlp_result(data)
+                    elif self.aiui_type == "cbm_semantic":
+                        self.intent_handler.handle_intent_result(data)
+                else:
+                    logger.warning("AIUI消息处理失败")
+
+            return True
+
+        except Exception as e:
+            logger.error(f"消息类型处理异常: {e}")
+            return False
+
+    def _get_aiui_type(self, data: dict) -> None:
+        """
+        获取AIUI类型
+
+        Args:
+            data: AIUI数据
+        """
+        if 'content' in data:
+            content = data['content']
+            if 'info' in content:
+                info = content['info']
+                if 'data' in info and isinstance(info['data'], list) and len(info['data']) > 0:
+                    data_item = info['data'][0]
+                    if 'params' in data_item:
+                        params = data_item['params']
+                        sub_value = params.get('sub')
+                        if sub_value is not None:
+                            self.aiui_type = sub_value

+ 109 - 0
docs/aiui.cfg

@@ -0,0 +1,109 @@
+{
+    "login": {
+        "appid": "352cdc21",
+        "key": "45b5f6a1a4374409e6d72107e365ec1e",
+        "api_secret": "NTkzYTE0NTI4NGUxZGRhMmQwZDFlMDk4"
+    },
+    "interact": {
+        "interact_timeout": "-1",
+        "result_timeout": "5000"
+    },
+    "global": {
+        "scene": "test",
+        "clean_dialog_history": "auto"
+    },
+    "vad": {
+        "vad_enable": "0",
+        "engine_type": "meta",
+        "res_type": "assets",
+        "res_path": "vad\/meta_vad_16k.jet"
+    },
+    "iat": {
+        "sample_rate": "16000"
+    },
+    "asr": {
+        "threshold": "50",
+        "res_type": "assets",
+        "res_path": "asr\/common.jet"
+    },
+    "tts": {
+        "engine_type": "cloud",
+        "res_type": "assets",
+        "res_path": "tts\/common.jet;tts\/mengmeng.jet",
+        "voice_name": "x4_lingxiaoying_em_v2",
+        "ent": "xtts",
+        "play_mode": "sdk"
+    },
+    "speech": {
+        "data_source": "sdk",
+        "audio_captor": "none",
+        "interact_mode": "continuous",
+        "intent_engine_type": "cloud"
+    },
+    "ivw": {
+        "mic_type": "mic4",
+        "zeroshot_enable": "0",
+        "res_type": "path",
+        "res_path": "\/sdcard\/AIUI\/assets\/vtn\/config\/vtn.ini"
+    },
+    "mmsp": {
+        "play_mmsp": false,
+        "upload_status_interval": 60,
+        "wakeup_engine": "mic4",
+        "video_delay": 120,
+        "min_face_w": 100,
+        "min_face_h": 100,
+        "head_angle_yaw": 45,
+        "vad_eos": 60,
+        "cae_mode": "mmsp",
+        "wakeup_mode": "mmsp",
+        "ivw_interact_timeout": 60000,
+        "auto_lower_rank": false,
+        "face_out_ms": 800
+    },
+    "recorder": {
+        "channel_count": 8,
+        "channel_filter": "2,3,4,5,6,7",
+        "channel_filter_cae": "2,3,4,5,6,7",
+        "sound_card_name": "AIUIUSBMC",
+        "sound_device": 0,
+        "min_face_w": 80,
+        "min_face_h": 100,
+        "format": 0,
+        "channel": 8,
+        "input_color": "gray",
+        "cam_aspect_ratio": "4:3",
+        "cam_max_px": 3273600,
+        "cam_zoom": 0,
+        "cam_clip_left": "0.15",
+        "cam_clip_right": "0.15",
+        "cam_clip_top": "0.1",
+        "cam_clip_bottom": "0.1",
+        "cam_rotate": 180,
+        "cam_mirror": "false",
+        "cam_id": "",
+        "cam_api": "2"
+    },
+    "cbmparams": {
+        "nlp": {
+            "nlp": {
+                "encoding": "utf8",
+                "compress": "raw",
+                "format": "json"
+            },
+            "sub_scene": "cbm_v45",
+            "richness": "concise"
+        }
+    },
+    "log": {
+        "debug_log": "1",
+        "save_datalog": "1",
+        "datalog_path": "",
+        "datalog_size": 1024,
+        "raw_audio_path": ""
+    },
+    "audioparams":{
+        "msc.lng":"118.28", 
+        "msc.lat":"33.97"
+   }
+}

+ 144 - 0
docs/hot_words.txt

@@ -0,0 +1,144 @@
+#HOT	热词后可以加[weight:4.0]来增加激励权重。使用英文的逗号冒号,数字范围[0-4.0](大概率生效),此行不可删除!
+科大讯飞,weight:4.0
+讯飞开放平台,weight:4.0
+语音云开放平台,weight:4.0
+语音服务,weight:4.0
+比心,weight:4.0
+比个心,weight:4.0
+敬礼,weight:4.0
+airan,weight:4.0
+需要语音服务,weight:4.0
+将语音转成文字,weight:4.0
+识别个性化词句,weight:4.0
+南京,weight:4.0
+无锡,weight:4.0
+徐州,weight:4.0
+常州,weight:4.0
+苏州,weight:4.0
+南通,weight:4.0
+连云港,weight:4.0
+淮安,weight:4.0
+盐城,weight:4.0
+扬州,weight:4.0
+镇江,weight:4.0
+泰州,weight:4.0
+宿迁,weight:4.0
+苏晓伴,weight:4.0
+苏超,weight:4.0
+移动,weight:4.0
+小勇,weight:4.0
+营收,weight:4.0
+南京,weight:4.0
+ICT,weight:4.0
+九三,weight:4.0
+阅兵,weight:4.0
+移动集成,weight:4.0
+中国移动,weight:4.0
+江苏移动,weight:4.0
+万欣,weight:4.0
+乙泓,weight:4.0
+于锦莹,weight:4.0
+任虹珊,weight:4.0
+余冰,weight:4.0
+余雷,weight:4.0
+关懿珉,weight:4.0
+凌锋,weight:4.0
+刘宗春,weight:4.0
+刘康,weight:4.0
+刘猛,weight:4.0
+刘皓,weight:4.0
+单绍勇,weight:4.0
+卢晓炯,weight:4.0
+吴扣树,weight:4.0
+唐忠伟,weight:4.0
+唐泽荣,weight:4.0
+孙凯,weight:4.0
+宗迅怀,weight:4.0
+左会军,weight:4.0
+庞瑶,weight:4.0
+张国兵,weight:4.0
+张洋博,weight:4.0
+张海忠,weight:4.0
+徐光,weight:4.0
+方晓军,weight:4.0
+晁泉泉,weight:4.0
+朱同先,weight:4.0
+朱建军,weight:4.0
+朱若冲,weight:4.0
+李昕,weight:4.0
+李锋,weight:4.0
+杨璇,weight:4.0
+杨谦,weight:4.0
+樊野,weight:4.0
+沈岩,weight:4.0
+沈建林,weight:4.0
+测试,weight:4.0
+王宏图,weight:4.0
+王小霞,weight:4.0
+王晔,weight:4.0
+王浩,weight:4.0
+童恩,weight:4.0
+罗一民,weight:4.0
+许兴盛,weight:4.0
+谢翌耿,weight:4.0
+赵挺总,weight:4.0
+邓丁,weight:4.0
+郑建兵,weight:4.0
+郑明,weight:4.0
+陈冬生,weight:4.0
+陶晨,weight:4.0
+雷丹雷总,weight:4.0
+马晓明,weight:4.0
+马远,weight:4.0
+魏海彬,weight:4.0
+黄丛伟,weight:4.0
+黄珂,weight:4.0
+科大讯飞,weight:4.0
+陈骏,weight:4.0
+过利平,weight:4.0
+张建云,weight:4.0
+朱怀诚,weight:4.0
+任洪强,weight:4.0
+冯少东,weight:4.0
+徐春生,weight:4.0
+方胜昔,weight:4.0
+夏军,weight:4.0
+杨文新,weight:4.0
+范银宏,weight:4.0
+焦新安,weight:4.0
+张凌浩,weight:4.0
+丁建宁,weight:4.0
+严云洋,weight:4.0
+王建浦,weight:4.0
+李爱宏,weight:4.0
+殷咏梅,weight:4.0
+朱军,weight:4.0
+任洪兴,weight:4.0
+杨帆,weight:4.0
+成媛媛,weight:4.0
+尹相伦,weight:4.0
+解满启,weight:4.0
+卫爱军,weight:4.0
+陈石,weight:4.0
+包荣军,weight:4.0
+朱嘉浩,weight:4.0
+袁大勇,weight:4.0
+吴莉萍,weight:4.0
+赵辉,weight:4.0
+杨冬生,weight:4.0
+翟小忠,weight:4.0
+陈杰,weight:4.0
+张红兵,weight:4.0
+岳智顺,weight:4.0
+金雷,weight:4.0
+刘大明,weight:4.0
+夏月生,weight:4.0
+黎梅梅,weight:4.0
+李莹,weight:4.0
+马立涛,weight:4.0
+张驰,weight:4.0
+王岩,weight:4.0
+刘慧磊,weight:4.0
+王晓忠,weight:4.0
+张杰,weight:4.0
+周亚琳,weight:4.0

+ 417 - 0
fix_alsa_config.py

@@ -0,0 +1,417 @@
+'''
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-24 15:45:57
+LastEditTime: 2025-08-24 16:02:32
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: \robot_ai\fix_alsa_config.py
+Description: 头部注释配置模板
+'''
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+ALSA配置优化脚本
+解决Ubuntu环境中的ALSA音频问题
+"""
+
+import os
+import platform
+import subprocess
+import shutil
+from utils.logger import setup_logger, logger
+
+
+def check_alsa_config():
+    """检查ALSA配置"""
+    logger.info("🔍 检查ALSA配置...")
+
+    try:
+        # 检查ALSA版本
+        result = subprocess.run(['aplay', '--version'],
+                                capture_output=True, text=True, timeout=10)
+        if result.returncode == 0:
+            logger.info(f"📋 ALSA版本: {result.stdout.strip()}")
+        else:
+            logger.warning("⚠️ 无法获取ALSA版本信息")
+    except Exception as e:
+        logger.error(f"❌ 检查ALSA版本失败: {e}")
+
+
+def check_audio_devices():
+    """检查音频设备"""
+    logger.info("🎵 检查音频设备...")
+
+    try:
+        # 列出音频设备
+        result = subprocess.run(['aplay', '-l'],
+                                capture_output=True, text=True, timeout=10)
+        if result.returncode == 0:
+            logger.info("📋 音频播放设备:")
+            for line in result.stdout.strip().split('\n'):
+                if line.strip():
+                    logger.info(f"    {line}")
+        else:
+            logger.warning("⚠️ 无法获取音频设备列表")
+    except Exception as e:
+        logger.error(f"❌ 检查音频设备失败: {e}")
+
+
+def create_alsa_config():
+    """创建优化的ALSA配置"""
+    logger.info("⚙️ 创建优化的ALSA配置...")
+
+    # ALSA配置文件路径 - 应该在用户主目录
+    if platform.system().lower() == 'linux':
+        config_path = os.path.expanduser("~/.asoundrc")
+    else:
+        logger.info("📋 非Linux系统,跳过ALSA配置")
+        return False
+
+    # 优化的ALSA配置 - 专门针对音色清晰度优化
+    alsa_config = """# ALSA配置文件 - 自动生成
+# 用于修复音频设备配置问题,解决电流声和音色不清晰问题
+
+# 默认PCM设备配置 - 使用优化的缓冲区设置,提高音色清晰度
+pcm.!default {
+    type plug
+    slave.pcm {
+        type hw
+        card 0
+        device 0
+        rate 16000
+        channels 1
+        format S16_LE
+        buffer_size 1024
+        period_size 512
+    }
+}
+
+# 默认控制设备配置
+ctl.!default {
+    type hw
+    card 0
+}
+
+# 高清晰度PCM设备 - 专门用于TTS播放,确保音色清晰
+pcm.high_quality {
+    type plug
+    slave.pcm {
+        type hw
+        card 0
+        device 0
+        rate 16000
+        channels 1
+        format S16_LE
+        buffer_size 512
+        period_size 256
+    }
+}
+
+# 低延迟PCM设备 - 用于TTS播放
+pcm.lowlatency {
+    type plug
+    slave.pcm {
+        type hw
+        card 0
+        device 0
+        rate 16000
+        channels 1
+        format S16_LE
+        buffer_size 1024
+        period_size 512
+    }
+}
+
+# 稳定PCM设备 - 用于避免电流声
+pcm.stable {
+    type plug
+    slave.pcm {
+        type hw
+        card 0
+        device 0
+        rate 16000
+        channels 1
+        format S16_LE
+        buffer_size 2048
+        period_size 1024
+    }
+}
+
+# 无电流声PCM设备 - 专门用于避免电流声
+pcm.no_current_noise {
+    type plug
+    slave.pcm {
+        type hw
+        card 0
+        device 0
+        rate 16000
+        channels 1
+        format S16_LE
+        buffer_size 1024
+        period_size 512
+    }
+}
+
+# 简单PCM设备
+pcm.simple {
+    type plug
+    slave.pcm "hw:0,0"
+}
+
+# 禁用不存在的设备以避免ALSA错误
+pcm.front {
+    type null
+}
+
+pcm.rear {
+    type null
+}
+
+pcm.center_lfe {
+    type null
+}
+
+pcm.side {
+    type null
+}
+
+pcm.surround21 {
+    type null
+}
+
+pcm.surround40 {
+    type null
+}
+
+pcm.surround41 {
+    type null
+}
+
+pcm.surround50 {
+    type null
+}
+
+pcm.surround51 {
+    type null
+}
+
+pcm.surround71 {
+    type null
+}
+
+pcm.iec958 {
+    type null
+}
+
+pcm.spdif {
+    type null
+}
+
+pcm.hdmi {
+    type null
+}
+
+pcm.modem {
+    type null
+}
+
+pcm.phoneline {
+    type null
+}
+
+# 禁用OSS设备
+pcm.dsp {
+    type null
+}
+
+# 禁用USB音频设备错误
+pcm.usb_stream {
+    type null
+}
+"""
+
+    try:
+        # 备份现有配置
+        if os.path.exists(config_path):
+            backup_path = config_path + ".backup"
+            shutil.copy2(config_path, backup_path)
+            logger.info(f"📋 已备份现有配置到: {backup_path}")
+
+        # 写入新配置
+        with open(config_path, 'w') as f:
+            f.write(alsa_config)
+
+        logger.info(f"✅ ALSA配置文件已创建: {config_path}")
+
+        # 设置文件权限
+        os.chmod(config_path, 0o644)
+        logger.info("✅ 已设置配置文件权限")
+
+        return True
+    except Exception as e:
+        logger.error(f"❌ 创建ALSA配置文件失败: {e}")
+        return False
+
+
+def create_simple_alsa_config():
+    """创建简单的ALSA配置"""
+    logger.info("⚙️ 创建简单的ALSA配置...")
+
+    # ALSA配置文件路径
+    if platform.system().lower() == 'linux':
+        config_path = os.path.expanduser("~/.asoundrc")
+    else:
+        logger.info("📋 非Linux系统,跳过ALSA配置")
+        return False
+
+    # 简单的ALSA配置
+    simple_config = """# 简单ALSA配置文件
+# 用于修复音频设备配置问题
+
+# 默认PCM设备
+pcm.!default {
+    type plug
+    slave.pcm "hw:0,0"
+}
+
+# 默认控制设备
+ctl.!default {
+    type hw
+    card 0
+}
+"""
+
+    try:
+        # 备份现有配置
+        if os.path.exists(config_path):
+            backup_path = config_path + ".backup"
+            shutil.copy2(config_path, backup_path)
+            logger.info(f"📋 已备份现有配置到: {backup_path}")
+
+        # 写入新配置
+        with open(config_path, 'w') as f:
+            f.write(simple_config)
+
+        logger.info(f"✅ 简单ALSA配置文件已创建: {config_path}")
+        logger.info("✅ 配置文件已简化,避免复杂配置导致的错误")
+
+        # 设置文件权限
+        os.chmod(config_path, 0o644)
+        logger.info("✅ 已设置配置文件权限")
+
+        return True
+    except Exception as e:
+        logger.error(f"❌ 创建ALSA配置文件失败: {e}")
+        return False
+
+
+def remove_alsa_config():
+    """移除ALSA配置文件"""
+    logger.info("🗑️ 移除ALSA配置文件...")
+
+    if platform.system().lower() != 'linux':
+        logger.info("📋 非Linux系统,跳过ALSA配置移除")
+        return False
+
+    config_path = os.path.expanduser("~/.asoundrc")
+
+    try:
+        if os.path.exists(config_path):
+            # 备份现有配置
+            backup_path = config_path + ".backup"
+            shutil.copy2(config_path, backup_path)
+            logger.info(f"📋 已备份现有配置到: {backup_path}")
+
+            # 移除配置文件
+            os.remove(config_path)
+            logger.info(f"✅ 已移除ALSA配置文件: {config_path}")
+            logger.info("💡 系统将使用默认ALSA配置")
+            return True
+        else:
+            logger.info("📋 ALSA配置文件不存在")
+            return False
+    except Exception as e:
+        logger.error(f"❌ 移除ALSA配置文件失败: {e}")
+        return False
+
+
+def check_pulseaudio():
+    """检查PulseAudio状态"""
+    logger.info("🔊 检查PulseAudio状态...")
+
+    try:
+        # 检查PulseAudio是否运行
+        result = subprocess.run(['pulseaudio', '--check'],
+                                capture_output=True, text=True, timeout=10)
+        if result.returncode == 0:
+            logger.info("✅ PulseAudio正在运行")
+        else:
+            logger.warning("⚠️ PulseAudio未运行,尝试启动...")
+            subprocess.run(['pulseaudio', '--start'],
+                           capture_output=True, text=True, timeout=10)
+    except Exception as e:
+        logger.error(f"❌ 检查PulseAudio失败: {e}")
+
+
+def reload_alsa():
+    """重新加载ALSA配置"""
+    logger.info("🔄 重新加载ALSA配置...")
+
+    try:
+        # 重启ALSA服务
+        subprocess.run(['sudo', 'systemctl', 'restart', 'alsa-utils'],
+                       capture_output=True, text=True, timeout=10)
+        logger.info("✅ ALSA服务已重启")
+    except Exception as e:
+        logger.warning(f"⚠️ 重启ALSA服务失败: {e}")
+        logger.info("💡 请手动重启音频服务或重新登录")
+
+
+def optimize_audio_settings():
+    """优化音频设置"""
+    logger.info("🎛️ 优化音频设置...")
+
+    if platform.system().lower() != 'linux':
+        logger.info("📋 非Linux系统,跳过音频优化")
+        return
+
+    # 首先尝试移除现有配置
+    if remove_alsa_config():
+        logger.info("✅ 已移除有问题的ALSA配置")
+
+    # 创建简单的ALSA配置
+    if create_simple_alsa_config():
+        logger.info("✅ 简单ALSA配置创建完成")
+    else:
+        logger.warning("⚠️ 无法创建ALSA配置,将使用系统默认配置")
+
+    # 重新加载ALSA配置
+    reload_alsa()
+
+    # 检查PulseAudio
+    check_pulseaudio()
+
+    # 检查音频设备
+    check_audio_devices()
+
+
+def main():
+    """主函数"""
+    setup_logger('fix_alsa', 'logs')
+    logger.info("🚀 开始ALSA配置优化...")
+
+    # 检查系统
+    logger.info(f"📋 操作系统: {platform.system()} {platform.release()}")
+
+    # 检查ALSA配置
+    check_alsa_config()
+
+    # 优化音频设置
+    optimize_audio_settings()
+
+    logger.info("✅ ALSA配置优化完成")
+    logger.info("💡 建议重启音频服务或重新登录以应用配置")
+    logger.info("💡 如果仍有问题,请运行: sudo systemctl restart pulseaudio")
+
+
+if __name__ == "__main__":
+    main()

+ 1 - 0
handlers/__init__.py

@@ -0,0 +1 @@
+# 业务逻辑处理模块

+ 233 - 0
handlers/aiui/AIui_node.py

@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+# coding=utf-8
+from utils.logger import logger
+from core.aiui.recorder import Recorder
+import handlers.aiui.pyaiui as pyaiui
+from handlers.aiui.pyAIUIConstant import AIUIConstant
+import json5
+import os
+import sys
+import random
+import json
+from threading import Thread
+import time
+from handlers.aiui.EventListener import EventListener, EventData
+from pathlib import Path
+
+CHUNK = 2048
+
+# 使用相对路径配置
+script_path = Path(__file__).resolve()
+BASE_DIR = script_path.parent.parent.parent
+cfg_file = os.path.join(BASE_DIR, "config", "aiui", "cfg", "aiui.cfg")
+
+WAITING = 0
+ABORT = -1
+ALLOW = 1
+
+
+def read_json5(path):
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json5.loads(f.read())
+    except Exception as e:
+        logger.error("{},{}".format(e, __file__))
+    return None
+
+
+class AIUINode():
+    def __init__(self, event_listener, debug=False, wakeup_mute=False):
+        self.agent_status = None
+        event_listener.set_agent(self)
+        self.event_listener = event_listener
+        self.agent = pyaiui.IAIUIAgent.createAgent(json5.dumps(
+            read_json5(cfg_file), quote_keys=True), event_listener)
+        self.__recorder_ = None
+        self.is_recording = False
+        self.recorder_states = WAITING  # -1:中断状态;0:等待状态;1:允许录音状态
+        self.wakeup_mute = wakeup_mute
+        self.is_hlw = False
+        self.set_is_hlw()
+        self.running = True
+
+    def text_to_speak_multiple_options_callback(self, text, vcn="qige", speed=50, pitch=50, volume=50):
+        self.cmd_tts(text, vcn, speed, pitch, volume)
+        return True
+
+    def direct_control_callback(self, text):
+        self.event_listener.EventNLP(EventData({"text": text}))
+        return True
+
+    def text_to_speak_callback(self, text):
+        self.cmd_tts(text)
+        return True
+
+    def set_is_hlw(self):
+        # 简化HLW检测逻辑
+        self.is_hlw = False
+
+    def cmd_tts(self, text, vcn="qige", speed=50, pitch=50, volume=50):
+        text = pyaiui.Buffer.create(bytes(text, encoding="utf-8"))
+        TTSMsg = pyaiui.IAIUIMessage.create(
+            AIUIConstant.CMD_TTS, 1, 0, "text_encoding=utf-8,vcn={},ent=x,speed={},pitch={},volume={}".format(vcn, speed, pitch, volume), text)
+        self.agent.sendMessage(TTSMsg)
+        TTSMsg.destroy()
+
+    def cmd_tts_stop(self):
+        text = pyaiui.Buffer.create(bytes("", encoding="utf-8"))
+        TTSMsg = pyaiui.IAIUIMessage.create(
+            AIUIConstant.CMD_TTS, 2, 0, "text_encoding=utf-8,vcn=qige,ent=x", text)
+        self.agent.sendMessage(TTSMsg)
+        TTSMsg.destroy()
+
+    def wakeup_callback(self, msg=None):
+        self.recorder_states = ABORT  # 中断上一次录音等待
+        self.getState()
+        if self.is_recording:
+            self.reset_wakeup_agent()
+            self.is_recording = False
+        if self.agent_status != AIUIConstant.STATE_WORKING:
+            self.wakeup_agent()
+
+        # 简化唤醒处理,直接开始录音
+        logger.info("🎤 AIUI唤醒,开始录音")
+        self.recorder_states = ALLOW  # 直接允许录音
+        Thread(target=self.__wait_play_to_start_record).start()
+
+    def __wait_play_to_start_record(self):
+        # 简化等待逻辑,直接开始录音
+        self.recorder_states = ALLOW  # 直接赋值允许录音
+        self.start_record()
+
+    def start_record(self):
+        logger.info("开始录音...")
+        self.is_recording = True
+        if self.is_hlw == False:
+            logger.info("启动非HLW录音模式")
+            # 使用连续录音循环
+            import threading
+            threading.Thread(target=self.__continuous_record,
+                             daemon=True).start()
+        else:
+            logger.info("HLW录音模式已启用")
+
+    @property
+    def __recorder(self):
+        if self.__recorder_ == None:
+            self.__recorder_ = Recorder(CHUNK)
+        return self.__recorder_
+
+    def __start_record(self):
+        """单次录音会话"""
+        logger.info("录音流已启动,开始接收音频数据...")
+        audio_count = 0
+        for data in self.__recorder.read():
+            if self.is_recording:
+                audio_count += 1
+                if audio_count % 100 == 0:  # 每100个音频块记录一次
+                    logger.info(f"已接收 {audio_count} 个音频块")
+                self.audio_stream_callback(data)
+            else:
+                logger.info("录音已停止")
+                break
+        logger.info(f"录音流结束,总共接收 {audio_count} 个音频块")
+
+    def __continuous_record(self):
+        """连续录音循环"""
+        while self.running and self.recorder_states == ALLOW:
+            self.is_recording = True
+            self.__start_record()
+
+            # 录音结束后短暂等待
+            if self.running and self.recorder_states == ALLOW:
+                logger.info("等待重新开始录音...")
+                time.sleep(1)
+
+    def audio_stream_callback(self, data):
+        if self.is_recording:
+            if self.is_hlw:
+                audio_buffer = pyaiui.Buffer.create(data.data)
+                self.record_stream_call_back(data.data)
+            else:
+                audio_buffer = pyaiui.Buffer.create(data)
+                self.record_stream_call_back(data)
+            self.cmd_write_audio(audio_buffer)
+        else:
+            logger.debug("录音已停止,跳过音频数据处理")
+
+    def record_stream_call_back(self, buffer):
+        pass
+
+    def wakeup_agent(self):
+        wakeup_msg = pyaiui.IAIUIMessage.create(AIUIConstant.CMD_WAKEUP)
+        self.agent.sendMessage(wakeup_msg)
+        wakeup_msg.destroy()
+
+    def reset_wakeup_agent(self):
+        reset_wakeup_msg = pyaiui.IAIUIMessage.create(
+            AIUIConstant.CMD_RESET_WAKEUP)
+        self.agent.sendMessage(reset_wakeup_msg)
+        reset_wakeup_msg.destroy()
+
+    def getState(self):
+        get_state_msg = pyaiui.IAIUIMessage.create(AIUIConstant.CMD_GET_STATE)
+        self.agent.sendMessage(get_state_msg)
+        get_state_msg.destroy()
+
+    def cmd_write_audio(self, audio_buffer):
+        writeMsg = pyaiui.IAIUIMessage.create(
+            AIUIConstant.CMD_WRITE, 0, 0, "data_type=audio", audio_buffer)
+        self.agent.sendMessage(writeMsg)
+        writeMsg.destroy()
+        # 添加调试日志,每1000个音频块记录一次
+        if hasattr(self, '_audio_sent_count'):
+            self._audio_sent_count += 1
+        else:
+            self._audio_sent_count = 1
+
+        if self._audio_sent_count % 1000 == 0:
+            logger.info(f"已发送 {self._audio_sent_count} 个音频块给AIUI引擎")
+
+    def cmd_write_text(self, text):
+        text = pyaiui.Buffer.create(bytes(text, encoding="utf-8"))
+        writeMsg = pyaiui.IAIUIMessage.create(
+            AIUIConstant.CMD_WRITE, 0, 0, "data_type=text", text)
+        self.agent.sendMessage(writeMsg)
+        writeMsg.destroy()
+
+    def stop_write(self):
+        writeMsgStop = pyaiui.IAIUIMessage.create(
+            AIUIConstant.CMD_STOP_WRITE, params="data_type=audio")
+        self.agent.sendMessage(writeMsgStop)
+        writeMsgStop.destroy()
+
+    def start(self, spin=True):
+        self.cmd_tts_stop()
+        # 唤醒AIUI引擎
+        logger.info("正在唤醒AIUI引擎...")
+        self.wakeup_agent()
+
+        # 等待引擎启动
+        time.sleep(1)
+
+        # 设置录音状态为允许录音
+        logger.info("设置录音状态为允许录音...")
+        self.recorder_states = ALLOW
+
+        # 启动录音
+        logger.info("正在启动录音...")
+        self.start_record()
+
+        if spin:  # 是否阻塞运行
+            while self.running:
+                time.sleep(0.1)
+
+    def shutdown(self):
+        self.running = False
+
+
+def load_sn():
+    return "yd-00:00:00:00:00:01"
+
+
+pyaiui.AIUISetting.setSystemInfo("sn", load_sn())

+ 427 - 0
handlers/aiui/EventListener.py

@@ -0,0 +1,427 @@
+#!/usr/bin/env python3
+# coding=utf-8
+from handlers.xunfei.knowledge_handler import KnowledgeHandler
+from handlers.xunfei.intent_handler import IntentHandler
+from handlers.xunfei.nlp_handler import NLPHandler
+from handlers.xunfei.speech_handler import XunfeiSpeechHandler
+import time
+import handlers.aiui.pyaiui as pyaiui
+from handlers.aiui.pyAIUIConstant import AIUIConstant
+import json5
+import os
+import sys
+import random
+from utils.logger import logger
+
+# 导入xunfei的处理逻辑
+
+#### EVENT_RESULT ####
+IAT = "iat"     # 听写结果
+NLP = "nlp"     # 语义结果
+TPP = "tpp"     # 后处理服务结果
+TTS = "tts"     # 云端tts结果
+ITRANS = "itrans"   # 翻译结果
+
+NO_SKILL = 0
+IS_LOCAL_SKILL = 1
+IS_BOTH_AIUI_LOCAL_SKILL = 2
+LAST_TTS_FLAG_KEY = "lrst"
+IS_LAST_TTS_AUDIO = "1"
+EMPTY_PACKET = "AIUI DATA NULL"
+
+# 缓存配置,避免重复获取
+_config_cache = None
+_wakeup_words_cache = None
+_detection_config_cache = None
+
+
+class EventData(object):
+    def __init__(self, data):
+        self.data = data
+
+    def get_data(self):
+        return self.data
+
+
+class EventListener(pyaiui.AIUIEventListener):
+    def __init__(self, skills_dict, debug=False):
+        self.Event_Listener_dict = {
+            AIUIConstant.EVENT_STATE: self.getEventState,
+            AIUIConstant.EVENT_WAKEUP: self.eventWakeup,
+            AIUIConstant.EVENT_SLEEP: self.eventSleep,
+            AIUIConstant.EVENT_VAD: self.eventVAD,
+            AIUIConstant.EVENT_RESULT: self.eventResult,
+            AIUIConstant.EVENT_ERROR: self.eventError,
+            AIUIConstant.EVENT_CONNECTED_TO_SERVER: self.eventConnectedToServer,
+            AIUIConstant.EVENT_SERVER_DISCONNECTED: self.eventServerDisconnected
+        }
+        self.local_custom_skills_dict = skills_dict
+        self.debug = debug
+        self.agent = None
+        self.param = None
+        self.content = None
+        self.is_aiui_local_skill = NO_SKILL
+        self.is_tts_reply = False
+        self.tts_processing = False  # 添加TTS处理状态标记
+
+        # 初始化xunfei处理器
+        self.speech_handler = XunfeiSpeechHandler()
+        self.nlp_handler = NLPHandler()
+        self.intent_handler = IntentHandler()
+        self.knowledge_handler = KnowledgeHandler()
+        # 把意图类注册到语音处理器
+        self.speech_handler.intent_handler = self.intent_handler
+        # 设置NLP处理器的意图处理器引用
+        self.nlp_handler.intent_handler = self.intent_handler
+        # 把iat处理器注册到nlp
+        self.nlp_handler.iat_handler = self.speech_handler
+
+    def _log_performance(self, event_type, start_time):
+        """记录性能统计"""
+        global _performance_stats
+
+        if self.debug:
+            end_time = time.time()
+            duration = (end_time - start_time) * 1000  # 转换为毫秒
+
+            if event_type not in _performance_stats['event_times']:
+                _performance_stats['event_times'][event_type] = []
+
+            _performance_stats['event_times'][event_type].append(duration)
+            _performance_stats['total_events'] += 1
+
+            # 计算平均时间
+            times = _performance_stats['event_times'][event_type]
+            avg_time = sum(times) / len(times)
+            _performance_stats['avg_times'][event_type] = avg_time
+
+            # 每100个事件输出一次性能统计
+            if _performance_stats['total_events'] % 100 == 0:
+                logger.info(
+                    f"📊 性能统计 (事件 {_performance_stats['total_events']}): {event_type} 平均耗时 {avg_time:.2f}ms")
+
+    def get_performance_stats(self):
+        """获取性能统计信息"""
+        return _performance_stats.copy()
+
+    def set_agent(self, agent):
+        self.agent = agent
+
+    def onEvent(self, event):
+        """agent事件回调解析"""
+        eventType = event.getEventType()
+        if eventType in self.Event_Listener_dict:
+            self.Event_Listener_dict[eventType](event)
+        else:
+            logger.info("接收到未处理的 EventType: {}".format(eventType))
+
+    def eventError(self, event):
+        logger.info(f"AIUI错误: code={event.getArg1()}, info={event.getInfo()}")
+        from utils.tts_client import play_text_async
+        if event.getArg1() == 10019:
+            play_text_async("非常抱歉,根据相关法律法规,我们无法提供关于以下内容的答案,谢谢您的理解。", use_cache=True)
+
+    def getEventState(self, event):
+        arg1 = event.getArg1()
+        self.agent.agent_status = arg1
+        logger.info(f"🔄 AIUI状态: {arg1}")
+
+    def eventWakeup(self, event):
+        logger.info("🎤 AIUI唤醒")
+
+    def eventSleep(self, event):
+        arg1 = event.getArg1()
+        if arg1 == AIUIConstant.TYPE_AUTO:
+            logger.info("😴 自动休眠")
+        if arg1 == AIUIConstant.TYPE_COMPEL:
+            logger.info("😴 强制休眠")
+            self.agent.wakeup_agent()
+
+    def eventVAD(self, event):
+        arg1 = event.getArg1()
+        if AIUIConstant.VAD_BOS == arg1:
+            logger.info("🎤 VAD:检测到前端点")
+        elif AIUIConstant.VAD_EOS == arg1:
+            logger.info("🎤 VAD:检测到后端点")
+            self.agent.is_recording = False
+
+    def eventConnectedToServer(self, event):
+        """处理与服务端连接建立事件"""
+        logger.info("🔗 AIUI已连接到服务器")
+
+    def eventServerDisconnected(self, event):
+        """处理与服务端断开连接事件"""
+        logger.info("🔌 AIUI与服务器断开连接")
+
+    def _extract_text_from_iat(self, resultJson):
+        """优化的文本提取方法,避免重复代码"""
+        try:
+            text_data = resultJson.get('text', {})
+            ws_list = text_data.get('ws', [])
+
+            # 使用列表推导式优化字符串拼接
+            words = [cw.get('w', '')
+                     for item in ws_list for cw in item.get('cw', [])]
+            return ''.join(words), text_data
+        except Exception as e:
+            logger.error(f"文本提取失败: {e}")
+            return "", {}
+
+    def eventResult(self, event):
+        try:
+            info = json5.loads(event.getInfo().encode("utf-8"))
+            datas = info["data"]
+            data0 = datas[0]
+            self.param = data0["params"]
+            contents = data0["content"]
+            content0 = contents[0]
+            self.content = content0
+
+            sub = self.param["sub"]
+
+            # 避免TTS重复输出
+            if sub == TTS and self.tts_processing:
+                # 如果正在处理TTS,只处理音频数据,不重复打印
+                pass
+            else:
+                logger.info(f"📝 收到AIUI结果,类型: {sub}")
+
+            dataBundle = event.getData()
+            cnt_id = content0["cnt_id"]
+
+            # 性能优化:根据结果类型分发处理
+            if sub == NLP:
+                self._process_nlp_result(dataBundle, cnt_id)
+            elif sub == IAT:
+                self._process_iat_result(dataBundle, cnt_id)
+            elif sub == TTS:
+                self._process_tts_result(content0, dataBundle, cnt_id)
+            elif sub == "cbm_semantic":
+                self._process_cbm_semantic_result(dataBundle, cnt_id)
+            elif sub == "cbm_knowledge":
+                self._process_cbm_knowledge_result(dataBundle, cnt_id)
+            else:
+                # 处理其他类型的结果 - 减少日志输出
+                if self.debug:
+                    self._process_other_result(sub, dataBundle, cnt_id)
+
+        except Exception as e:
+            logger.info(f"❌ 结果处理错误: {e}")
+
+    def _process_nlp_result(self, dataBundle, cnt_id):
+        """处理NLP结果"""
+        resultStr = dataBundle.getBinaryAsStr(cnt_id)
+        resultJson = json5.loads(resultStr)
+        logger.info(f"🧠 NLP结果: {resultJson}")
+        self._handle_nlp_result(resultJson)
+
+    def _process_iat_result(self, dataBundle, cnt_id):
+        """处理语音识别结果"""
+        resultStr = dataBundle.getBinaryAsStr(cnt_id)
+        resultJson = json5.loads(resultStr)
+
+        # 提取文本(优化版本)
+        recognized_text, text_data = self._extract_text_from_iat(resultJson)
+        if recognized_text:
+            logger.info(f"🎤 IAT结果: {recognized_text}")
+
+            # 复用xunfei的语音识别处理逻辑
+            self._handle_iat_result_optimized(
+                resultJson, recognized_text, text_data)
+
+    def _process_tts_result(self, content0, dataBundle, cnt_id):
+        """处理TTS结果"""
+        if "error" in content0 and content0["error"] == EMPTY_PACKET:
+            return
+
+        # 设置TTS处理状态
+        if not self.tts_processing:
+            self.tts_processing = True
+
+        buffer = dataBundle.getBinary(cnt_id)
+        dts = content0["dts"]  # 音频块位置状态信息
+        self.EventTTS(EventData({"buffer": buffer, "dts": dts}))
+
+    def _process_cbm_semantic_result(self, dataBundle, cnt_id):
+        """处理语义理解结果"""
+        resultStr = dataBundle.getBinaryAsStr(cnt_id)
+        resultJson = json5.loads(resultStr)
+        logger.info(f"🧠 语义理解结果: {resultJson}")
+        self._handle_cbm_semantic_result(resultJson)
+
+    def _process_cbm_knowledge_result(self, dataBundle, cnt_id):
+        """处理知识库结果"""
+        resultStr = dataBundle.getBinaryAsStr(cnt_id)
+        resultJson = json5.loads(resultStr)
+        logger.info(f"📚 知识库结果: {resultJson}")
+        self._handle_cbm_knowledge_result(resultJson)
+
+    def _process_other_result(self, sub, dataBundle, cnt_id):
+        """处理其他类型结果"""
+        logger.info(f"📝 其他类型结果: {sub}")
+        resultStr = dataBundle.getBinaryAsStr(cnt_id)
+        if resultStr:
+            try:
+                resultJson = json5.loads(resultStr)
+                logger.info(f"📄 结果内容: {resultJson}")
+            except:
+                logger.info(f"📄 结果内容: {resultStr}")
+
+    def _handle_nlp_result(self, resultJson):
+        """处理NLP结果,复用xunfei逻辑"""
+        try:
+            # 从AIUI的NLP结果中提取text和status
+            nlp_text = resultJson.get('nlp', {}).get('text', '')
+            nlp_status = resultJson.get('nlp', {}).get('status', 0)
+
+            logger.info(f"📝 提取NLP文本: {nlp_text}, 状态: {nlp_status}")
+
+            # 如果是结束状态(status=2),即使文本是标点符号也要处理
+            if nlp_status == 2:
+                logger.info("🎯 AIUI检测到结束状态,强制处理NLP结果")
+            else:
+                # 非结束状态下的验证
+                # 验证NLP结果的有效性
+                if not nlp_text or len(nlp_text.strip()) == 0:
+                    logger.debug("AIUI NLP文本为空,跳过处理")
+                    return
+
+                # 检查是否只是标点符号
+                stripped_text = nlp_text.strip()
+                if len(stripped_text) == 1 and stripped_text in '。,!?;:""''()【】':
+                    logger.debug(f"AIUI NLP文本为纯标点符号,跳过处理: {stripped_text}")
+                    return
+
+            # 构造xunfei格式的数据
+            xunfei_data = {
+                'content': {
+                    'result': {
+                        'nlp': {
+                            'text': nlp_text,
+                            'status': nlp_status
+                        }
+                    }
+                }
+            }
+
+            logger.info(f"🚀 准备调用xunfei NLP处理器: {xunfei_data}")
+            # 使用xunfei的NLP处理器
+            self.nlp_handler.handle_nlp_result(xunfei_data)
+
+        except Exception as e:
+            logger.error(f"❌ AIUI NLP处理错误: {e}")
+            # 异常情况下尝试重置NLP处理器状态
+            try:
+                if hasattr(self.nlp_handler, '_reset_state'):
+                    self.nlp_handler._reset_state()
+                    logger.info("已重置NLP处理器状态")
+            except Exception as reset_error:
+                logger.error(f"重置NLP处理器状态失败: {reset_error}")
+
+    def _handle_iat_result_optimized(self, resultJson, recognized_text, text_data):
+        """优化的语音识别结果处理"""
+        try:
+            logger.info(f"🎤 提取IAT文本: {recognized_text}")
+
+            # 构造xunfei格式的数据
+            xunfei_data = {
+                'content': {
+                    'result': {
+                        'text': {
+                            'ws': text_data.get('ws', []),
+                            'ls': text_data.get('ls', False),
+                            'sn': text_data.get('sn', 0)
+                        }
+                    }
+                }
+            }
+
+            # 使用xunfei的语音识别处理器
+            self.speech_handler.handle_iat_result(xunfei_data)
+
+        except Exception as e:
+            logger.info(f"❌ IAT处理错误: {e}")
+
+    def _handle_cbm_semantic_result(self, resultJson):
+        """处理语义理解结果,复用xunfei逻辑"""
+        try:
+            # 从 resultJson 中提取 text 字段并解析
+            text_value = resultJson.get('cbm_semantic', {}).get('text', '')
+            if text_value:
+                # text_value 是一个 JSON 字符串,需要解析
+                parsed_text = json5.loads(text_value)
+
+                # 构造xunfei格式的数据 - 匹配 intent_handler.py 期望的格式
+                xunfei_data = {
+                    'content': {
+                        'result': {
+                            'cbm_semantic': {
+                                'text': text_value  # 传递原始的 JSON 字符串
+                            }
+                        }
+                    }
+                }
+
+                logger.info(f"🔍 解析后的语义数据: {parsed_text}")
+                logger.info(f"🔍 传递的数据结构: {xunfei_data}")
+                # 使用xunfei的意图处理器
+                self.intent_handler.handle_intent_result(xunfei_data)
+            else:
+                logger.warning("语义理解结果中没有找到 text 字段")
+
+        except Exception as e:
+            logger.info(f"❌ 语义理解处理错误: {e}")
+
+    def _handle_cbm_knowledge_result(self, resultJson):
+        """处理知识库结果,复用xunfei逻辑"""
+        try:
+            # 构造xunfei格式的数据
+            xunfei_data = {
+                'content': {
+                    'result': {
+                        'cbm_knowledge': resultJson
+                    }
+                }
+            }
+
+            # 使用xunfei的知识库处理器
+            self.knowledge_handler.handle_knowledge_result(xunfei_data)
+
+        except Exception as e:
+            logger.info(f"❌ 知识库处理错误: {e}")
+
+    def EventNLP(self, event):
+        """语义结果回调 - 简化版本"""
+        nlp_result_text = event.data["text"]
+        logger.info(f"🎯 NLP处理: {nlp_result_text}")
+
+        # 简单的回复逻辑
+        if self.agent:
+            reply = f"我听到了:{nlp_result_text}"
+            logger.info(f"🔊 语音回复: {reply}")
+            self.agent.cmd_tts(reply)
+
+    def EventTTS(self, event):
+        """TTS结果 - 简化版本,使用xunfei的TTS处理"""
+        pass
+
+    def packed_socket_msg(self, cmd_type, params_dict={}):
+        msg = {
+            "cmd": cmd_type
+        }
+        msg.update(params_dict)
+        return msg
+
+    def is_tts_download_complete(self):
+        if "dts" in self.content and self.content["dts"] in (2, 3):
+            return True
+        return False
+
+
+def parse_iat(resultJson):
+    is_last = resultJson["text"]["ls"]
+    joinText = ""
+    if is_last == False:
+        for ws in resultJson["text"]["ws"]:
+            joinText += ws["cw"][0]["w"]
+        logger.info(joinText)

+ 0 - 0
handlers/aiui/__init__.py


+ 175 - 0
handlers/aiui/pyAIUIConstant.py

@@ -0,0 +1,175 @@
+#!/usr/bin python3
+# coding=utf-8
+from enum import IntEnum
+
+
+class AIUIConstant(IntEnum):
+    """aiui事件和消息相关常量"""
+
+    EVENT_RESULT = 1
+    """结果事件
+    data 字段携带结果数据,info 字段为描述数据的 JSON 字符串
+    """
+
+    EVENT_ERROR = 2
+    """出错事件
+    arg1 字段为错误码,info字段为错误描述信息
+    """
+
+    EVENT_STATE = 3
+    """服务状态事件
+    arg1 为服务状态,取值:STATE_IDLE、STATE_READY、STATE_WORKING
+    """
+
+    EVENT_WAKEUP = 4
+    """唤醒事件
+    arg1 字段取值:
+        0 => 内部语音唤醒
+        1 => 外部手动唤醒
+    info 字段为唤醒结果 JSON 字符串
+    """
+
+    EVENT_SLEEP = 5
+    """休眠事件
+    出现交互超,服务会进入休眠状态(待唤醒),或者发送了 CMD_RESET_WAKEUP 时,抛出该事件
+    arg1 字段取值:
+        0 => TYPE_AUTO(自动休眠,即交互超时)
+        1 => TYPE_COMPEL(外部强制休眠,即发送 CMD_RESET_WAKEUP)
+    """
+
+    EVENT_VAD = 6
+    """ VAD 事件
+    当检测到输入音频的前端点后,会抛出该事件,用 arg1 标识前后端点或音量信息:
+        0 => 前端点
+        1 => 音量
+        2 => 后端点
+        3 => 前端点超时
+    当 arg1 取值为 1 时,arg2 为音量大小,取值范围:[0-30]
+    """
+
+    EVENT_CMD_RETURN = 8
+    """某条 CMD 命令对应的返回事件
+    对于除 CMD_GET_STATE 外的有返回的命令,会返回该事件
+    用 arg1 标识对应的CMD命令,arg2 为返回值,0 表示成功,info 字段为描述信息
+    """
+
+    EVENT_AUDIO = 9
+    """音频事件"""
+
+    EVENT_PRE_SLEEP = 10
+    """准备休眠事件
+    当出现交互超时,服务会先抛出准备休眠事件,用户可在收到该事件后 10s 内继续交互,10s 后进入休眠状态
+    """
+
+    EVENT_START_RECORD = 11
+    """抛出该事件通知外部录音开始,用户可以开始说话"""
+
+    EVENT_STOP_RECORD = 12
+    """通知外部录音停止"""
+
+    EVENT_CONNECTED_TO_SERVER = 13
+    """与服务端建立起连接事件
+    连接建立后,才能进行数据同步等操作
+    """
+
+    EVENT_SERVER_DISCONNECTED = 14
+    """与服务端断开连接事件
+    连接断开后,将不能进行数据同步等操作
+    """
+
+    EVENT_TTS = 15
+    """语音合成事件"""
+
+    STATE_IDLE = 1
+    """空闲状态,AIUI服务未开启"""
+
+    STATE_READY = 2
+    """就绪状态,等待唤醒"""
+
+    STATE_WORKING = 3
+    """工作状态,已经唤醒,可以开始人机交互"""
+
+    CMD_GET_STATE = 1
+    """获取交互状态
+    AIUI 会回应 EVENT_STATE 事件
+    """
+
+    CMD_WRITE = 2
+    """写入数据"""
+
+    CMD_STOP_WRITE = 3
+    """停止写入数据"""
+
+    CMD_RESET = 4
+    """重置 AIUI 服务的状态
+    服务会立即停止并重新启动,进入到待唤醒状态
+    """
+
+    CMD_START = 5
+    """启动 AIUI 服务"""
+
+    CMD_STOP = 6
+    """停止 AIUI 服务"""
+
+    CMD_WAKEUP = 7
+    """手动唤醒"""
+
+    CMD_RESET_WAKEUP = 8
+    """休眠消息
+    服务重置为待唤醒状态,若当前为唤醒状态,发送该消息重置后会抛出 EVENT_SLEEP 事件
+    """
+
+    CMD_SET_PARAMS = 10
+    """设置参数配置
+    用 params 携带参数设置 JSON 字符串,具体格式参照 aiui.cfg 文件
+    """
+
+    CMD_SYNC = 13
+    """同步个性化数据
+    arg1 表示同步的数据类型
+    data 表示同步的数据内容
+    """
+
+    CMD_RESULT_VALIDATION_ACK = 20
+    """
+    结果确认
+    在接收到语义、听写、后处理的结果后 5s 内发送该指令对结果进行确认,AIUI会认为该条结果有效,并重新开始 AIUI 交互超时的计时
+    """
+
+    CMD_CLEAN_DIALOG_HISTORY = 21
+    """清除云端语义对话历史"""
+
+    CMD_QUERY_SYNC_STATUS = 24
+    """查询数据同步状态
+    arg1 表示状态查询的类型
+    params 包含查询条件,需要在 params 中通过 sid 字段指定 CMD_SYNC 返回的 sid
+    """
+
+    CMD_TTS = 27
+    """语音合成命令"""
+
+    # VAD相关常量
+    VAD_BOS = 0
+    """VAD前端点"""
+
+    VAD_VOLUME = 1
+    """VAD音量"""
+
+    VAD_EOS = 2
+    """VAD后端点"""
+
+    VAD_TIMEOUT = 3
+    """VAD前端点超时"""
+
+    # 自动休眠类型
+    TYPE_AUTO = 0
+    """自动休眠"""
+
+    TYPE_COMPEL = 1
+    """强制休眠"""
+
+    SUCCESS = 0
+    """成功"""
+
+    FAIL = -1
+    """失败"""

+ 324 - 0
handlers/aiui/pyaiui.py

@@ -0,0 +1,324 @@
+'''
+Author: zhaoyong 77912776@qq.com
+Date: 2025-07-02 06:34:52
+LastEditTime: 2025-07-19 12:24:11
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: \robot_ai\src\aiui\pyaiui.py
+Description: aiui的python调用c++接口类
+'''
+#!/usr/bin python3
+# coding=utf-8
+import ctypes
+import os
+from .pyAIUIConstant import AIUIConstant
+import abc
+import sys
+import platform
+from pathlib import Path
+
+AIUI_DLL = None
+# 当前脚本路径
+script_path = Path(__file__).resolve()
+
+# 项目根目录(向上两级:src/aiui -> src -> 根目录)
+BASE_DIR = script_path.parent.parent.parent
+
+if platform.system() == "Windows":
+    AIUI_DLL = ctypes.windll.LoadLibrary(
+        str(BASE_DIR / "config" / "aiui" / "x64" / "aiui.dll"))
+else:
+    AIUI_DLL = ctypes.cdll.LoadLibrary(
+        str(BASE_DIR / "config" / "aiui" / "arm" / "libaiui.so"))
+
+
+def aiui_get_version():
+    if AIUI_DLL is None:
+        return "AIUI DLL not loaded"
+
+    if not hasattr(AIUI_DLL, 'aiui_get_version'):
+        return "aiui_get_version function not found in DLL"
+
+    _f = AIUI_DLL.aiui_get_version
+    _f.restype = ctypes.c_char_p
+    s = _f()
+    return str(s, encoding="utf-8")
+
+
+class IDataBundle:
+    aiui_db = None
+
+    def __init__(self, aiui_db: ctypes.c_void_p):
+        self.aiui_db = aiui_db
+
+        if AIUI_DLL is not None and hasattr(AIUI_DLL, 'aiui_db_int'):
+            self.aiui_db_int = AIUI_DLL.aiui_db_int
+            self.aiui_db_int.restype = ctypes.c_int
+            self.aiui_db_int.argtypes = [
+                ctypes.c_void_p, ctypes.c_char_p, ctypes.c_void_p]
+        else:
+            self.aiui_db_int = None
+
+        if AIUI_DLL is not None and hasattr(AIUI_DLL, 'aiui_db_long'):
+            self.aiui_db_long = AIUI_DLL.aiui_db_long
+            self.aiui_db_long.restype = ctypes.c_long
+            self.aiui_db_long.argtypes = [
+                ctypes.c_void_p, ctypes.c_char_p, ctypes.c_void_p]
+        else:
+            self.aiui_db_long = None
+
+        if AIUI_DLL is not None and hasattr(AIUI_DLL, 'aiui_db_string'):
+            self.aiui_db_string = AIUI_DLL.aiui_db_string
+            self.aiui_db_string.restype = ctypes.c_char_p
+            self.aiui_db_string.argtypes = [
+                ctypes.c_void_p, ctypes.c_char_p, ctypes.c_char_p]
+        else:
+            self.aiui_db_string = None
+
+        if AIUI_DLL is not None and hasattr(AIUI_DLL, 'aiui_db_binary'):
+            self.aiui_db_binary = AIUI_DLL.aiui_db_binary
+            self.aiui_db_binary.restype = ctypes.c_void_p
+            self.aiui_db_binary.argtypes = [
+                ctypes.c_void_p, ctypes.c_char_p, ctypes.c_void_p]
+        else:
+            self.aiui_db_binary = None
+
+    def getInt(self, key: str, defaultVal: int):
+        if self.aiui_db_int is None:
+            return defaultVal
+        return int(self.aiui_db_int(self.aiui_db, ctypes.c_char_p(key.encode("utf-8")),
+                                    ctypes.pointer(ctypes.c_int(defaultVal))))
+
+    def getLong(self, key: str, defaultVal: int):
+        if self.aiui_db_long is None:
+            return defaultVal
+        return int(self.aiui_db_long(self.aiui_db, ctypes.c_char_p(key.encode("utf-8")),
+                                     ctypes.pointer(ctypes.c_long(defaultVal))))
+
+    def getString(self, key: str, defaultVal: str):
+        if self.aiui_db_string is None:
+            return defaultVal
+        s = self.aiui_db_string(self.aiui_db, ctypes.c_char_p(key.encode("utf-8")),
+                                ctypes.c_char_p(defaultVal.encode("utf-8")))
+        return str(s, encoding="utf-8")
+
+    def getBinary(self, key: str):
+        if self.aiui_db_binary is None:
+            return b""
+        datalen = ctypes.c_int(0)
+
+        s = self.aiui_db_binary(self.aiui_db, ctypes.c_char_p(
+            key.encode("utf-8")), ctypes.pointer(datalen))
+
+        ArrayType = ctypes.c_char * datalen.value
+        pa = ctypes.cast(s, ctypes.POINTER(ArrayType))
+
+        return bytes(pa.contents)
+
+    def getBinaryAsStr(self, key: str):
+        """获取二进制数据并转换为字符串,修复JSON截断问题"""
+        try:
+            if self.aiui_db_binary is None:
+                return ""
+
+            datalen = ctypes.c_int(0)
+            binary = self.aiui_db_binary(self.aiui_db, ctypes.c_char_p(
+                key.encode('utf-8')), ctypes.pointer(datalen))
+
+            if datalen.value <= 0:
+                return ""
+
+            # 移除-1操作,避免截断JSON
+            arrayType = ctypes.c_char * datalen.value
+            pointArray = ctypes.cast(binary, ctypes.POINTER(arrayType))
+
+            # 获取字节数据并转换为字符串
+            raw_bytes = bytes(pointArray.contents)
+
+            # 处理可能的空字节终止符
+            if raw_bytes and raw_bytes[-1] == 0:
+                raw_bytes = raw_bytes[:-1]
+
+            return str(raw_bytes, encoding='utf-8')
+        except Exception as e:
+            # logger.info(f"getBinaryAsStr error: {e}")
+            return ""
+
+
+class IAIUIEvent:
+    aiui_event = None
+
+    def __init__(self, aiui_event):
+        self.aiui_event = aiui_event
+
+        if AIUI_DLL is not None:
+            self.aiui_event_type = AIUI_DLL.aiui_event_type
+            self.aiui_event_type.restype = ctypes.c_int
+            self.aiui_event_type.argtypes = [ctypes.c_void_p]
+
+            self.aiui_event_arg1 = AIUI_DLL.aiui_event_arg1
+            self.aiui_event_arg1.restype = ctypes.c_int
+            self.aiui_event_arg1.argtypes = [ctypes.c_void_p]
+
+            self.aiui_event_arg2 = AIUI_DLL.aiui_event_arg2
+            self.aiui_event_arg2.restype = ctypes.c_int
+            self.aiui_event_arg2.argtypes = [ctypes.c_void_p]
+
+            self.aiui_event_info = AIUI_DLL.aiui_event_info
+            self.aiui_event_info.restype = ctypes.c_char_p
+            self.aiui_event_info.argtypes = [ctypes.c_void_p]
+
+            self.aiui_event_databundle = AIUI_DLL.aiui_event_databundle
+            self.aiui_event_databundle.restype = ctypes.c_void_p
+            self.aiui_event_databundle.argtypes = [ctypes.c_void_p]
+
+    def getEventType(self) -> int:
+        return self.aiui_event_type(self.aiui_event)
+
+    def getArg1(self) -> int:
+        return self.aiui_event_arg1(self.aiui_event)
+
+    def getArg2(self) -> int:
+        return self.aiui_event_arg2(self.aiui_event)
+
+    def getInfo(self) -> str:
+        s = self.aiui_event_info(self.aiui_event)
+        return str(s, encoding="utf-8")
+
+    def getData(self) -> IDataBundle:
+        db = self.aiui_event_databundle(self.aiui_event)
+        return IDataBundle(db)
+
+
+class Buffer:
+    aiui_buf = None
+
+    def __init__(self, aiui_buf):
+        self.aiui_buf = aiui_buf
+
+    @staticmethod
+    def create(dataBytearray: bytes):
+        if AIUI_DLL is None:
+            return None
+
+        _f = AIUI_DLL.aiui_create_buffer_from_data
+        _f.restype = ctypes.c_void_p
+        _f.argtypes = [ctypes.c_char_p, ctypes.c_size_t]
+
+        return Buffer(_f(ctypes.c_char_p(dataBytearray), ctypes.c_size_t(len(dataBytearray))))
+
+
+class IAIUIMessage:
+    aiui_msg = None
+
+    def __init__(self, aiui_msg):
+        self.aiui_msg = aiui_msg
+
+    @staticmethod
+    def create(msgType: AIUIConstant, arg1=0, arg2=0, params="", data=Buffer(None)):
+        if AIUI_DLL is None:
+            return None
+
+        _f = AIUI_DLL.aiui_msg_create
+        _f.restype = ctypes.c_void_p
+        _f.argtypes = [ctypes.c_int, ctypes.c_int,
+                       ctypes.c_int, ctypes.c_char_p, ctypes.c_void_p]
+
+        return IAIUIMessage(
+            _f(ctypes.c_int(msgType.value), ctypes.c_int(arg1), ctypes.c_int(arg2),
+               ctypes.c_char_p(params.encode("utf-8")), data.aiui_buf)
+        )
+
+    def destroy(self):
+        if AIUI_DLL is None:
+            return None
+
+        _f = AIUI_DLL.aiui_msg_destroy
+        _f.argtypes = [ctypes.c_void_p]
+
+        return _f(self.aiui_msg)
+
+
+class AIUIEventListener:
+    @abc.abstractmethod
+    def onEvent(self, ev: IAIUIEvent):
+        pass
+
+
+def eventCallback(obj: AIUIEventListener):
+    def wrapper(ev: ctypes.c_void_p, data: ctypes.c_void_p):
+        obj.onEvent(IAIUIEvent(ev))
+
+    return wrapper
+
+
+class IAIUIAgent:
+    aiui_agent = None
+    ListenerWarpper = None
+    AIUIListenerCallback = None
+
+    def __init__(self, aiui_agent):
+        self.aiui_agent = aiui_agent
+
+        if AIUI_DLL is not None:
+            self.aiui_agent_send_message = AIUI_DLL.aiui_agent_send_message
+            self.aiui_agent_send_message.argtypes = [
+                ctypes.c_void_p, ctypes.c_void_p]
+
+            self.aiui_agent_destroy = AIUI_DLL.aiui_agent_destroy
+            self.aiui_agent_destroy.argtypes = [ctypes.c_void_p]
+        else:
+            self.aiui_agent_send_message = None
+            self.aiui_agent_destroy = None
+
+    def sendMessage(self, msg: IAIUIMessage):
+        if self.aiui_agent_send_message is not None:
+            return self.aiui_agent_send_message(self.aiui_agent, msg.aiui_msg)
+        return None
+
+    def destroy(self):
+        if self.aiui_agent_destroy is not None:
+            self.aiui_agent_destroy(self.aiui_agent)
+        self.AIUIListenerCallback = None
+        self.ListenerWarpper = None
+        self.aiui_agent = None
+
+    @staticmethod
+    def createAgent(params: str, listener):
+        if AIUI_DLL is None:
+            return None
+
+        _f = AIUI_DLL.aiui_agent_create
+        _f.argtypes = [ctypes.c_char_p, ctypes.c_void_p, ctypes.c_void_p]
+        _f.restype = ctypes.c_void_p
+
+        agent = IAIUIAgent(None)
+        agent.ListenerWarpper = eventCallback(listener)
+        agent.AIUIListenerCallback = ctypes.CFUNCTYPE(
+            None, ctypes.c_void_p, ctypes.c_void_p)(agent.ListenerWarpper)
+        agent.aiui_agent = _f(ctypes.c_char_p(
+            params.encode('utf-8')), agent.AIUIListenerCallback, None)
+
+        return agent
+
+
+class AIUISetting:
+    @staticmethod
+    def setSystemInfo(key: str, val: str):
+        if AIUI_DLL is None:
+            return None
+
+        _f = AIUI_DLL.aiui_set_system_info
+        _f.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
+
+        return _f(ctypes.c_char_p(key.encode("utf-8")), ctypes.c_char_p(val.encode("utf-8")))
+
+    @staticmethod
+    def setMscDir(szDir: str):
+        if AIUI_DLL is None:
+            return False
+
+        _f = AIUI_DLL.aiui_set_msc_dir
+        _f.restype = ctypes.c_bool
+        _f.argtypes = [ctypes.c_char_p]
+
+        return _f(ctypes.c_char_p(szDir.encode('utf-8')))

+ 486 - 0
handlers/baidu/speech_handler.py

@@ -0,0 +1,486 @@
+"""
+百度实时语音识别处理模块
+"""
+from utils.logger import logger
+import json
+import struct
+import os
+import threading
+import queue
+import base64
+import requests
+import random
+import sounddevice as sd
+import platform
+system = platform.system().lower()
+if system == "linux":
+    sd.default.device = 'pulse'
+elif system == "windows":
+    sd.default.device = None
+elif system == "darwin":
+    sd.default.device = None
+from socket import socket, AF_INET, SOCK_STREAM
+from websockets.sync.client import connect
+from websockets import ConnectionClosedOK
+from utils.load_config import load_config
+
+
+class BaiduSpeechHandler:
+    """百度实时语音识别处理类"""
+
+    def __init__(self):
+        # 根据文字长度分类的语气助词列表
+        self.short_thinking_phrases = [
+            "嗯",
+            "这个……",
+            "嗯……",
+        ]
+
+        self.medium_thinking_phrases = [
+            "稍等",
+            "我想想",
+            "等一下",
+        ]
+
+        self.long_thinking_phrases = [
+            "嗯,我想一想",
+            "我琢磨一下",
+            "我思考一下",
+        ]
+
+        # 加载配置
+        self.config = load_config()
+        self.baidu_config = self.config.get(
+            'speech_recognition', {}).get('baidu_realtime', {})
+
+        # 百度实时语音识别相关
+        self.audio_socket = None
+        self.baidu_ws = None
+        self.play_buffer = queue.Queue()
+        self.chatting = threading.Event()
+        self.recording = threading.Event()
+        self.is_running = False
+        self.access_token = None
+
+        # 音频播放相关
+        self.output_device = None
+        self.audio_stream = None
+
+        # 初始化百度实时语音识别
+        self._init_baidu_realtime()
+
+    def _init_baidu_realtime(self):
+        """初始化百度实时语音识别"""
+        try:
+            # 获取百度配置
+            client_id = self.baidu_config.get('client_id')
+            client_secret = self.baidu_config.get('client_secret')
+
+            if not client_id or not client_secret:
+                logger.error("百度配置中缺少client_id或client_secret")
+                return
+
+            # 获取access token
+            token_url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={client_id}&client_secret={client_secret}"
+            response = requests.get(token_url)
+            if response.status_code == 200:
+                self.access_token = response.json().get('access_token')
+                logger.info("百度实时语音识别初始化成功")
+            else:
+                logger.error("获取百度access token失败")
+                return
+
+        except Exception as e:
+            logger.error(f"初始化百度实时语音识别失败: {e}")
+
+    def _init_audio_socket(self):
+        """初始化音频Socket连接"""
+        try:
+            self.audio_socket = socket(AF_INET, SOCK_STREAM)
+            server_ip = self.config.get(
+                'server', {}).get('ip', '192.168.123.21')
+            self.audio_socket.connect((server_ip, 9080))
+            logger.info("音频Socket连接建立成功")
+            return True
+        except Exception as e:
+            from utils.tts_client import play_text_async
+            play_text_async('讯飞套件连接失败,请检查讯飞套件是否正常', use_cache=True)
+            logger.error(f"音频Socket连接失败: {e}")
+            return False
+
+    def _recv_all(self, num_byte):
+        """接收指定字节数的数据"""
+        data = b''
+        while len(data) < num_byte:
+            packet = self.audio_socket.recv(num_byte - len(data))
+            if not packet:
+                return None
+            data += packet
+        return data
+
+    def _process_audio_data(self):
+        """处理音频数据"""
+        try:
+            recv_data = self._recv_all(9)
+            if not recv_data:
+                return None
+
+            sync_head, user_id, msg_type, msg_length, msg_id = struct.unpack(
+                '<BBBIH', recv_data)
+
+            if sync_head == 0xa5 and user_id == 0x01:
+                recv_data = self._recv_all(msg_length + 1)
+                if recv_data and recv_data[1] == 0:  # 只处理第0路音频
+                    audio_data = recv_data[8:-1]
+                    return audio_data
+            return None
+        except Exception as e:
+            logger.error(f"处理音频数据异常: {e}")
+            return None
+
+    def _process_audio_to_baidu(self):
+        """将音频数据发送给百度"""
+        try:
+            while self.is_running:
+                audio_data = self._process_audio_data()
+                if audio_data:
+                    audio_base64 = base64.b64encode(audio_data).decode()
+                    self.baidu_ws.send(json.dumps({
+                        "type": "input_audio_buffer.append",
+                        "audio": audio_base64
+                    }))
+
+        except Exception as e:
+            logger.error(f"发送音频到百度异常: {e}")
+
+    def _play_audio(self):
+        """音频播放线程"""
+        try:
+            # 获取输出设备 - 修复设备选择逻辑
+            try:
+                devices = sd.query_devices()
+                output_device = None
+
+                # 查找默认输出设备
+                for i, device in enumerate(devices):
+                    if device['max_outputs'] > 0:  # 有输出能力的设备
+                        output_device = i
+                        break
+
+                if output_device is None:
+                    logger.warning("未找到输出设备,使用默认设备")
+                    output_device = None
+                else:
+                    logger.info(f"使用输出设备: {devices[output_device]['name']}")
+
+            except Exception as e:
+                logger.warning(f"设备查询失败,使用默认设备: {e}")
+                output_device = None
+
+            # 获取音频配置
+            output_audio_config = self.baidu_config.get('output_audio', {})
+            sample_rate = output_audio_config.get('sample_rate', 24000)
+            channels = output_audio_config.get('channels', 1)
+
+            logger.info(
+                f"启动音频播放线程,采样率: {sample_rate}, 声道: {channels}, 设备: {output_device}")
+
+            with sd.RawOutputStream(
+                samplerate=sample_rate,
+                channels=channels,
+                dtype='int16',
+                blocksize=1024,
+                device=output_device
+            ) as stream:
+                while self.is_running:
+                    try:
+                        chunk = self.play_buffer.get(timeout=1)
+                        if chunk == 'EOF':
+                            break
+                        stream.write(chunk)
+                    except queue.Empty:
+                        continue
+                    except Exception as e:
+                        logger.error(f"音频播放异常: {e}")
+                        break
+
+        except Exception as e:
+            logger.error(f"音频播放线程异常: {e}")
+
+    def _start_baidu_realtime(self):
+        """启动百度实时语音识别"""
+        try:
+            if not self.access_token:
+                logger.error("百度access token未初始化")
+                return False
+
+            # 获取百度配置
+            streaming_url = self.baidu_config.get('streaming_url')
+            model_name = self.baidu_config.get('model_name', 'audio-realtime')
+
+            url = f'{streaming_url}?model={model_name}&access_token={self.access_token}'
+
+            self.baidu_ws = connect(url)
+            self.is_running = True
+
+            # 启动音频播放线程
+            play_thread = threading.Thread(target=self._play_audio)
+            play_thread.daemon = True
+            play_thread.start()
+
+            # 启动接收线程
+            receive_thread = threading.Thread(target=self._receive_baidu_data)
+            receive_thread.daemon = True
+            receive_thread.start()
+
+            # 启动音频处理线程
+            audio_thread = threading.Thread(
+                target=self._process_audio_to_baidu)
+            audio_thread.daemon = True
+            audio_thread.start()
+
+            # 更新会话配置 - 确保启用音频输出
+            session_config = {
+                "type": "session.update",
+                "session": {
+                    "input_audio_transcription": {
+                        "model": "default"
+                    },
+                    "output_audio": {
+                        "format": "pcm16",
+                        "sample_rate": 24000
+                    },
+                    "max_output_tokens": "inf",
+                    "voice": "default"
+                }
+            }
+
+            self.baidu_ws.send(json.dumps(session_config))
+
+            logger.info("百度实时语音识别启动成功")
+            return True
+
+        except Exception as e:
+            logger.error(f"启动百度实时语音识别失败: {e}")
+            return False
+
+    def _handle_baidu_response_done(self, data):
+        """处理百度response.done消息"""
+        try:
+            if 'response' in data:
+                response = data['response']
+                status = response.get('status', '')
+                status_details = response.get('status_details', {})
+
+                logger.info(f"百度响应状态: {status}")
+
+                if status == 'completed':
+                    # 正常完成的响应
+                    if 'output' in response:
+                        output = response['output']
+                        for item in output:
+                            if item.get('type') == 'message' and 'content' in item:
+                                content = item['content']
+                                for content_item in content:
+                                    if content_item.get('type') == 'audio':
+                                        # 处理音频内容
+                                        transcript = content_item.get(
+                                            'transcript', '')
+                                        if transcript:
+                                            logger.info(
+                                                f"百度识别结果: {transcript}")
+                                            # 检查是否有音频数据
+                                            if 'audio' in content_item:
+                                                audio_data = base64.b64decode(
+                                                    content_item['audio'])
+                                                self.play_buffer.put(
+                                                    audio_data)
+                                                logger.info(
+                                                    f"从content_item中提取音频数据,大小: {len(audio_data)} 字节")
+                                            else:
+                                                logger.warning(
+                                                    "content_item中没有找到音频数据")
+
+                                            # 直接播放百度返回的音频,不使用TTS
+                                            self._play_baidu_audio_response(
+                                                response)
+
+                elif status == 'incomplete':
+                    # 不完整的响应,可能是内容过滤等原因
+                    reason = status_details.get('reason', 'unknown')
+                    logger.warning(f"百度响应不完整,原因: {reason}")
+
+                    if reason == 'content_filter':
+                        logger.info("内容被过滤,使用TTS播放提示音")
+                        # 播放提示音告知用户内容被过滤
+                        # play_text_async("抱歉,我无法回答这个问题", use_cache=True)
+                    else:
+                        logger.warning(f"未知的不完整原因: {reason}")
+
+                else:
+                    logger.warning(f"未知的响应状态: {status}")
+
+        except Exception as e:
+            logger.error(f"处理百度response.done异常: {e}")
+
+    def _play_baidu_audio_response(self, response):
+        """播放百度返回的音频响应"""
+        try:
+            # 检查响应中是否包含音频数据
+            if 'output' in response:
+                output = response['output']
+                for item in output:
+                    if item.get('type') == 'message' and 'content' in item:
+                        content = item['content']
+                        for content_item in content:
+                            if content_item.get('type') == 'audio':
+                                # 检查是否有音频数据
+                                if 'audio' in content_item:
+                                    audio_data = base64.b64decode(
+                                        content_item['audio'])
+                                    self.play_buffer.put(audio_data)
+                                    logger.info(
+                                        f"从响应中提取音频数据,大小: {len(audio_data)} 字节")
+                                else:
+                                    logger.warning("响应中没有找到音频数据")
+
+            logger.info("等待百度音频数据播放...")
+
+        except Exception as e:
+            logger.error(f"播放百度音频响应异常: {e}")
+
+    def _receive_baidu_data(self):
+        """接收百度返回的数据"""
+        try:
+            while self.is_running:
+                data = self.baidu_ws.recv()
+                if isinstance(data, str):
+                    data = json.loads(data)
+
+                    # 记录所有消息类型,帮助调试
+                    msg_type = data.get('type', 'unknown')
+                    logger.info(f"收到百度消息类型: {msg_type}")
+
+                    if data['type'] == 'response.audio.delta':
+                        # 处理音频输出 - 这是百度返回的实际音频数据
+                        audio = base64.b64decode(data['delta'])
+                        self.play_buffer.put(audio)
+                        logger.info(f"收到百度音频数据,大小: {len(audio)} 字节")
+                        data['delta'] = '...'
+
+                    elif data['type'] == 'response.created':
+                        # 清空播放缓冲区
+                        while True:
+                            try:
+                                self.play_buffer.get(block=False)
+                            except queue.Empty:
+                                break
+                        logger.info("清空播放缓冲区,准备接收新的音频")
+
+                    elif data['type'] == 'input_audio_buffer.speech_started':
+                        self.chatting.set()
+                        logger.info("语音开始")
+
+                    elif data['type'] == 'response.done':
+                        self.chatting.clear()
+                        logger.info("响应完成")
+                        # 处理完整的响应,包括音频数据
+                        self._handle_baidu_response_done(data)
+
+                    elif data['type'] == 'input_audio_buffer.speech_ended':
+                        # 处理语音识别结果
+                        self._handle_baidu_recognition_result(data)
+                        logger.info("语音结束")
+
+                    elif data['type'] == 'response.audio':
+                        # 处理完整的音频响应
+                        logger.info("收到完整的音频响应")
+                        if 'audio' in data:
+                            audio = base64.b64decode(data['audio'])
+                            self.play_buffer.put(audio)
+                            logger.info(f"收到完整音频数据,大小: {len(audio)} 字节")
+
+                    elif data['type'] == 'response.text':
+                        # 处理文本响应
+                        logger.info("收到文本响应")
+                        if 'text' in data:
+                            logger.info(f"文本内容: {data['text']}")
+
+                    elif data['type'] == 'session.created':
+                        # 处理会话创建
+                        logger.info("会话创建成功")
+                        if 'session' in data:
+                            session = data['session']
+                            logger.info(f"会话ID: {session.get('id')}")
+
+                    elif data['type'] == 'conversation.created':
+                        # 处理对话创建
+                        logger.info("对话创建成功")
+                        if 'conversation' in data:
+                            conversation = data['conversation']
+                            logger.info(f"对话ID: {conversation.get('id')}")
+
+                    elif data['type'] == 'error':
+                        # 处理错误消息
+                        logger.error("收到错误消息")
+                        if 'error' in data:
+                            error = data['error']
+                            error_type = error.get('type', 'unknown')
+                            error_code = error.get('code', 'unknown')
+                            error_message = error.get('message', 'unknown')
+                            logger.error(
+                                f"错误类型: {error_type}, 代码: {error_code}, 消息: {error_message}")
+
+                    else:
+                        # 记录其他类型的消息
+                        logger.info(f"收到其他类型消息: {msg_type}")
+
+                    logger.info(json.dumps(data, ensure_ascii=False))
+
+        except ConnectionClosedOK:
+            logger.info("百度WebSocket连接已关闭")
+        except Exception as e:
+            logger.error(f"接收百度数据异常: {e}")
+
+    def _handle_baidu_recognition_result(self, data):
+        """处理百度语音识别结果"""
+        try:
+            if 'result' in data:
+                result_text = data['result'].get('text', '')
+                if result_text:
+                    logger.info(f"百度识别结果: {result_text}")
+                    # 不播放思考语气词,因为百度会直接返回音频
+
+        except Exception as e:
+            logger.error(f"处理百度识别结果异常: {e}")
+
+    def play_thinking_phrase(self, text_length: int = 0, type="thinking"):
+        """根据文字长度播放合适的语气助词(使用缓存)"""
+        # 百度实时语音识别不使用TTS播放思考语气词
+        # 因为百度会直接返回音频响应
+        logger.info("百度实时语音识别模式,跳过TTS思考语气词播放")
+        return
+
+    def start_recognition(self):
+        """启动百度实时语音识别服务"""
+        if not self._init_audio_socket():
+            return False
+
+        return self._start_baidu_realtime()
+
+    def stop_recognition(self):
+        """停止百度实时语音识别服务"""
+        self.is_running = False
+
+        # 发送结束信号给音频播放线程
+        self.play_buffer.put('EOF')
+
+        if self.audio_socket:
+            self.audio_socket.close()
+            self.audio_socket = None
+
+        if self.baidu_ws:
+            self.baidu_ws.close()
+            self.baidu_ws = None
+
+        logger.info("百度实时语音识别服务已停止")

+ 0 - 0
handlers/dify/__init__.py


+ 35 - 0
handlers/dify/recognize_intention.py

@@ -0,0 +1,35 @@
+import requests
+import time
+
+from config.config.dify_config import difyconfig as dify_config
+
+def chat_with_dify(question='你看到了什么', image_path=''):
+    s1_time = time.time()
+    headers = {
+        "Authorization": f"Bearer {dify_config.get_recognize_api_key()}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "inputs": {},  # 可以留空,或者传其他上下文
+        "query": question,
+        "response_mode": "blocking",
+        "conversation_id": "",  # 如果是新会话可以留空
+        "user": dify_config.get_user(),
+    }
+    # 发送请求
+    try:
+        s3_time = time.time()
+        resp = requests.post(dify_config.get_recognize_url(), headers=headers, json=payload)
+        s4_time = time.time()
+        res = resp.json().get("answer", '请求出错')
+        print(f"请求完成耗时: {s4_time - s3_time:.2f}秒")
+        print(f"总耗时: {s4_time - s1_time:.2f}秒")
+        return res
+    except Exception as e:
+        print(f"请求出错: {e}")
+        return ''
+
+if __name__ == '__main__':
+    # chat_with_dify(question='请用 30 字以内分析你看到的',image_path='demo_pic.png')
+    res = chat_with_dify(question='你看到了什么', image_path='')
+    print(11111,res)

+ 39 - 0
handlers/dify/recognize_models.py

@@ -0,0 +1,39 @@
+import requests
+import time
+
+from config.config.dify_config import difyconfig as dify_config
+
+
+def chat_with_dify(question='你看到了什么', image_path=''):
+    s1_time = time.time()
+    model = dify_config.get_current_mode()
+    headers = {
+        "Authorization": f"Bearer {dify_config.get_models_api_key()}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "inputs": {"type": model},  # 可以留空,或者传其他上下文
+        "query": question,
+        "response_mode": "blocking",
+        "conversation_id": "",  # 如果是新会话可以留空
+        "user": dify_config.get_user(),
+    }
+    # 发送请求
+    try:
+        s3_time = time.time()
+        resp = requests.post(dify_config.get_recognize_url(), headers=headers, json=payload)
+        s4_time = time.time()
+        res = resp.json().get("answer", '请求出错')
+        print(f"请求完成耗时: {s4_time - s3_time:.2f}秒")
+        print(f"总耗时: {s4_time - s1_time:.2f}秒")
+        return res
+    except Exception as e:
+        print(f"请求出错: {e}")
+        return ''
+
+
+if __name__ == '__main__':
+    # chat_with_dify(question='请用 30 字以内分析你看到的',image_path='demo_pic.png')
+    dify_config.set_current_model('ernie')
+    res = chat_with_dify(question='你看到了什么', image_path='')
+    print(11111, res)

+ 59 - 0
handlers/speech_handler.py

@@ -0,0 +1,59 @@
+'''
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-23 14:34:11
+LastEditTime: 2025-08-27 15:06:43
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: \robot_ai\handlers\speech_handler.py
+Description: 头部注释配置模板
+'''
+"""
+语音识别处理模块 - 工厂类
+"""
+from utils.logger import logger
+from utils.load_config import load_config
+from handlers.baidu.speech_handler import BaiduSpeechHandler
+from handlers.xunfei.speech_handler import XunfeiSpeechHandler
+
+
+class SpeechHandlerFactory:
+    """语音识别处理器工厂类"""
+
+    @staticmethod
+    def create_handler():
+        """
+        根据配置创建相应的语音识别处理器
+
+        Returns:
+            BaiduSpeechHandler or XunfeiSpeechHandler: 语音识别处理器实例
+        """
+        try:
+            # 加载配置
+            config = load_config()
+            speech_config = config.get('speech_recognition', {})
+            service_type = speech_config.get('service', 'xunfei_aiui')
+
+            logger.info(f"创建语音识别处理器,服务类型: {service_type}")
+
+            if service_type == 'baidu_realtime':
+                return BaiduSpeechHandler()
+            elif service_type == 'xunfei_aiui':
+                return XunfeiSpeechHandler()
+            else:
+                logger.error(f"不支持的语音识别服务类型: {service_type}")
+                raise ValueError(f"不支持的语音识别服务类型: {service_type}")
+
+        except Exception as e:
+            logger.error(f"创建语音识别处理器失败: {e}")
+            raise
+
+
+# 为了保持向后兼容,保留原有的SpeechHandler类名
+class SpeechHandler:
+    """语音识别处理类 - 兼容性包装器"""
+
+    def __init__(self):
+        self.handler = SpeechHandlerFactory.create_handler()
+
+    def __getattr__(self, name):
+        """代理所有方法调用到实际的处理器"""
+        return getattr(self.handler, name)

+ 321 - 0
handlers/xunfei/intent_handler.py

@@ -0,0 +1,321 @@
+import threading
+
+from config.config.dify_config import difyconfig
+from handlers.dify.recognize_intention import chat_with_dify
+import hashlib
+import pymysql
+from utils.pc2_requests import _send_qa_task, _send_led_color_task
+from utils.tts_client import play_text_async
+import json5
+import random
+import json
+from utils.logger import logger
+import requests
+import paramiko
+from sshtunnel import SSHTunnelForwarder
+from config.config.settings import config
+from strategies.vision.qwenv import process_base64_image
+from strategies.action.execute_actions import ActionExecutor
+from utils.logger import logger
+"""
+意图处理模块
+"""
+
+
+class IntentHandler:
+    """意图处理类"""
+
+    def __init__(self):
+        self.detected_intent = None
+        self.tts_text = ""
+        self.cbm_semantic_processed = False  # 标记是否已处理视觉内容
+        self.executor = ActionExecutor()
+        self.vision_dealing_words = [
+            "我将从多个角度分析",  # 日常聊天标配,简单自然
+            "我将从多维视角分析",  # 像说"我瞅瞅相关内容",带着行动感
+            "我将从不同层面分析",  # 随口的礼貌,特生活化
+            "我将从多方面评估",  # 直白展现正在动脑筋
+            "我将从多个维度剖析",  # 比"稍等下"正式点,又不刻意
+        ]
+
+    def play_vision_phrase(self, type="thinking"):
+        """播放语气助词"""
+        phrase = random.choice(self.vision_dealing_words)
+        logger.info(f"播放视觉语气助词: {phrase}")
+        play_text_async(phrase, use_cache=True)
+
+    def handle_intent_result(self, data: dict) -> None:
+        """
+        处理意图识别结果
+
+        Args:
+            data: 意图数据
+        """
+        try:
+            try:
+                # 异步调用 _send_led_color_task
+                led_thread = threading.Thread(
+                    target=_send_led_color_task,
+                    args=(self, "BREATH", "BLUE"),
+                    daemon=True
+                )
+                led_thread.start()
+            except Exception as e:
+                logger.debug(f"[LED] LED控制失败,不影响意图处理: {e}")
+
+            # logger.info('意图识别data', json.dumps(data, ensure_ascii=False, indent=2))
+            text_value = data.get('content', {}).get(
+                'result', {}).get('cbm_semantic', {}).get('text')
+            if text_value:
+
+                intent = json.loads(text_value)
+                rc = intent.get('rc', -1)
+                if rc == 0:
+                    category = intent.get('category', "")
+                    logger.info(f"技能结果: {category}")
+                    # 排除默认技能
+                    if category == "IFLYTEK.mapU":
+                        logger.info(f"排除默认技能: {category}")
+                        return
+                    # 视觉意图
+                    elif category.split('.')[-1] == "vision":
+                        ###################
+                        # 使用dify识别视觉意图
+                        logger.info(f"使用dify识别视觉意图")
+                        logger.info(
+                            f"difyconfig.get_vision_switch(): {difyconfig.get_vision_switch()}")
+                        if difyconfig.get_vision_switch():
+                            # 判断是否是视觉意图
+                            iat_txt = intent.get('text', "")
+                            res = chat_with_dify(
+                                question=iat_txt, image_path='')
+                            if res != '视觉意图':
+                                return
+                    elif category.split('.')[-1] == "wake_up":
+                        semantics_list = intent.get("semantic", [])
+                        if semantics_list:
+                            semantics = semantics_list[0]
+                            skill_purpose = semantics.get('intent')
+                            if skill_purpose == "WAKEUP":
+                                self.cbm_semantic_processed = False
+                            elif skill_purpose == "RESET_WAKEUP":
+                                self.cbm_semantic_processed = True
+                        ###################
+
+                    iat_txt = intent.get('text', "")
+                    logger.info(f"意图识别文本: {iat_txt}")
+                    answer = intent.get('answer', {}).get('text', "")
+                    logger.info(f"意图识别答案: {answer}")
+                    if answer:
+                        try:
+                            # 异步调用 _send_qa_task
+                            logger.info("发送QA接口===========> [QA]")
+                            if category.split('.')[-1] != "vision":
+                                qa_thread = threading.Thread(
+                                    target=_send_qa_task,
+                                    args=(self, {"result": answer}),
+                                    daemon=True
+                                )
+                                qa_thread.start()
+                        except Exception as e:
+                            logger.error(f"QA接口请求失败: {e}")
+                        if not self.cbm_semantic_processed:
+                            self.cbm_semantic_processed = True
+                            try:
+                                play_text_async(answer, use_cache=True)
+                                logger.info(f"[意图] 答案播放请求已发送")
+                            except Exception as e:
+                                logger.info(f"[意图] 播放请求失败: {e}")
+                                import traceback
+                                traceback.print_exc()
+
+                    semantics_list = intent.get("semantic", [])
+                    if semantics_list:
+                        semantics = semantics_list[0]
+                        skill_purpose = semantics.get('intent')
+                        # 处理具体意图
+                        self._handle_detected_intent(
+                            category, iat_txt, skill_purpose)
+                    else:
+                        logger.warning("未获取到语义信息")
+                        # 处理具体意图,但不传递skill_purpose
+                        self._handle_detected_intent(
+                            category, iat_txt, None)
+            else:
+                logger.warning("未获取到意图文本")
+
+        except Exception as e:
+            logger.error(f"意图处理异常: {e}")
+
+    def _handle_detected_intent(self, intent: str, iat_txt: str, purpose) -> None:
+        """
+        处理检测到的意图
+        Args:
+            intent: 意图名称
+            iat_txt: 原IAT内容
+        """
+        # 读取yaml配置文件
+        config_data = config.get_config()
+        xunfei_config = config_data.get('xunfei', {})
+        categories = xunfei_config.get('category', [])
+
+        if intent in categories:
+            self.executor.execute_actions(intent, purpose)
+
+        # 视觉调用
+        elif intent.split('.')[-1] == "vision":
+            # 在识别开始时播放语气助词
+            logger.info(f"开始播放语气词")
+            self.play_vision_phrase(type="vision")
+            logger.info(f"检测到 [{intent}] 意图, 执行视觉相关")
+            # 标记已处理视觉内容,避免后续NLP重复播放
+            self.cbm_semantic_processed = True
+            answer_text = '摄像头服务异常'
+            try:
+                camera_url = config._config_data.get(
+                    'camera_url', 'http://127.0.0.1:34550/camera_base64'
+                )
+                headers = {"Content-Type": "application/json"}
+                data = {}  # matches the -d '{}' from curl
+
+                response = requests.get(camera_url, headers=headers, json=data)
+
+                resp_data = json5.loads(response.text)
+                # 处理响应
+                if resp_data.get('code') == 200:
+                    base64_pic = resp_data.get('data').get('base64_content')
+                    answer_text = process_base64_image(
+                        base64_pic, question=iat_txt)
+                    logger.info(f"视觉调用结果: {answer_text}")
+                    play_text_async(answer_text)
+                else:
+                    logger.error(f"摄像头服务异常: {str(resp_data.get('code'))}")
+                try:
+                    # 异步调用 _send_qa_task
+                    logger.info("发送QA接口===========> [QA]")
+                    qa_thread = threading.Thread(
+                        target=_send_qa_task,
+                        args=(self, {"result": answer_text}),
+                        daemon=True
+                    )
+                    qa_thread.start()
+                except Exception as e:
+                    logger.error(f"QA接口请求失败: {e}")
+            except Exception as e:
+                logger.error(f"摄像头服务异常: {str(e)}")
+        elif intent.split('.')[-1] == "BASE_STATION":
+            try:
+                match purpose:
+                    case "Base_Stations":
+                        sum1, sum2 = self.sql_query()
+                        play_text_async(f"实时客流为{sum1}人次,累计客流为{sum2}人次")
+            except requests.exceptions.RequestException as e:
+                logger.error(f"区域基站客流查询API调用失败: {e}")
+        elif intent.split('.')[-1] == "Game_Guess":
+            try:
+                match purpose:
+                    case "Ready":
+                        pass
+                    case "Start_Game":
+                        try:
+                            response = requests.post(url="http://192.168.123.164:9002/game",
+                                                     json={"target": "game", "cmd": "start_game", "id": "cq"})
+                            result = response.json()
+                            if result and result["code"] == 0:
+                                play_text_async("游戏开始")
+                            else:
+                                play_text_async("游戏开始失败, 请稍后重试")
+                        except requests.exceptions.RequestException as e:
+                            logger.error(f"猜拳游戏异常(开始): {e}")
+                    case "Stop_Game":
+                        try:
+                            response = requests.post(url="http://192.168.123.164:9002/game",
+                                                     json={"target": "game", "cmd": "stop_game", "id": "cq"})
+                            result = response.json()
+                            if result and result["code"] == 0:
+                                play_text_async("游戏结束")
+                            else:
+                                play_text_async("游戏结束失败, 请稍后重试")
+                        except requests.exceptions.RequestException as e:
+                            logger.error(f"猜拳游戏异常(结束): {e}")
+            except requests.exceptions.RequestException as e:
+                logger.error(f"猜拳游戏执行异常: {e}")
+        elif intent.split('.')[-1] == "switch_model":
+            if difyconfig.get_models_switch():
+                # 切换模型
+                model_dify_type = difyconfig.get_model_dify_type(purpose)
+                if model_dify_type:
+                    difyconfig.set_current_model(model_dify_type)
+        else:
+            logger.info(f"检测到未知意图: {intent}")
+
+    def get_detected_intent(self) -> str:
+        """获取检测到的意图"""
+        return self.detected_intent or ""
+
+    def get_tts_text(self) -> str:
+        """获取TTS文本"""
+        return self.tts_text or ""
+
+    def is_cbm_semantic_processed(self) -> bool:
+        """检查是否已处理cbm_semantic"""
+        return self.cbm_semantic_processed
+
+    def reset_cbm_semantic_processed(self) -> None:
+        """重置cbm_semantic处理标记"""
+        self.cbm_semantic_processed = False
+
+    def generate_md5(self, userId: str, key: str, full_identifier: str) -> str:
+        """MD5加密"""
+        combined = f"{userId}{key}{full_identifier}"
+        md5_hash = hashlib.md5(combined.encode('utf-8')).hexdigest()
+        return md5_hash.upper()
+
+    def sql_query(self):
+        try:
+            with SSHTunnelForwarder(
+                    (config._config_data.get('ssh_host'),
+                     config._config_data.get('ssh_port')),
+                    ssh_username=config._config_data.get('ssh_user'),
+                    ssh_password=config._config_data.get('ssh_pass'),
+                    remote_bind_address=(
+                        config._config_data.get('db_host'),
+                        config._config_data.get('db_port')
+                    )
+            ) as tunnel:
+                logger.info(
+                    f"SSH 隧道已建立,开始更新数据库: {config._config_data.get('table')}")
+                conn = pymysql.connect(
+                    host=config._config_data.get('db_host'),
+                    port=tunnel.local_bind_port,
+                    user=config._config_data.get('db_user'),
+                    password=config._config_data.get('db_pass'),
+                    db=config._config_data.get('db_name'),
+                    autocommit=True
+                )
+                try:
+                    with conn.cursor() as cur:
+                        cur.execute("SELECT 1;")
+                        result = cur.fetchone()
+                        if result and result[0] == 1:
+                            logger.info("数据库连接成功!")
+                        else:
+                            logger.error("数据库连接失败!")
+                    select_sql = f"""
+                        SELECT flow_sum1, flow_sum2 
+                        FROM {config._config_data.get('table')} 
+                        WHERE identity = 'Current'
+                    """
+                    with conn.cursor() as cur:
+                        cur.execute(select_sql)
+                        row = cur.fetchone()
+                        if row:
+                            # flow_sum1, flow_sum2 = row
+                            return row
+                        else:
+                            logger.warning("未查询到数据!")
+                finally:
+                    conn.close()
+                    logger.info("数据库连接已关闭")
+        except Exception as e:
+            logger.error(f"更新数据库失败: {e}")

+ 177 - 0
handlers/xunfei/knowledge_handler.py

@@ -0,0 +1,177 @@
+"""
+知识库处理器模块
+"""
+from utils.logger import logger
+import json
+from typing import Dict, Any, Optional
+from utils.logger import logger
+
+
+class KnowledgeHandler:
+    """知识库处理器类"""
+
+    def __init__(self):
+        """初始化知识库处理器"""
+        self.logger = logger.bind(name=__name__)
+
+    def handle_knowledge_result(self, data: Dict[str, Any]) -> bool:
+        """
+        处理知识库检索结果
+
+        Args:
+            data: AIUI知识库数据
+
+        Returns:
+            bool: 处理结果
+        """
+        try:
+            self.logger.info("开始处理知识库检索结果")
+
+            # 提取知识库内容
+            knowledge_content = self._extract_knowledge_content(data)
+
+            if knowledge_content:
+                self.logger.info(f"知识库检索到内容: {knowledge_content[:100]}...")
+
+                # 这里可以添加TTS播放逻辑
+                # 暂时只记录日志,后续可以集成TTS播放
+                self.logger.info(f"知识库内容: {knowledge_content}")
+
+                return True
+            else:
+                self.logger.warning("知识库检索结果为空")
+                return False
+
+        except Exception as e:
+            self.logger.error(f"处理知识库结果异常: {e}")
+            return False
+
+    def _extract_knowledge_content(self, data: Dict[str, Any]) -> Optional[str]:
+        """
+        从AIUI数据中提取知识库内容
+
+        Args:
+            data: AIUI数据
+
+        Returns:
+            Optional[str]: 提取的知识库内容
+        """
+        try:
+            # 解析AIUI数据结构
+            if 'content' not in data:
+                self.logger.warning("数据中缺少content字段")
+                return None
+
+            content = data['content']
+
+            # 检查是否有知识库结果
+            if 'info' not in content:
+                self.logger.warning("content中缺少info字段")
+                return None
+
+            info = content['info']
+
+            # 检查是否有数据
+            if 'data' not in info or not isinstance(info['data'], list):
+                self.logger.warning("info中缺少data字段或格式不正确")
+                return None
+
+            data_list = info['data']
+            if not data_list:
+                self.logger.warning("data列表为空")
+                return None
+
+            # 获取第一个结果
+            first_result = data_list[0]
+
+            # 检查结果类型
+            if 'type' not in first_result:
+                self.logger.warning("结果中缺少type字段")
+                return None
+
+            result_type = first_result['type']
+
+            if result_type == 'text':
+                # 文本类型结果
+                if 'content' in first_result:
+                    return first_result['content']
+                else:
+                    self.logger.warning("文本结果中缺少content字段")
+                    return None
+
+            elif result_type == 'json':
+                # JSON类型结果
+                if 'content' in first_result:
+                    try:
+                        json_content = json.loads(first_result['content'])
+                        return self._extract_from_json_content(json_content)
+                    except json.JSONDecodeError as e:
+                        self.logger.error(f"JSON解析失败: {e}")
+                        return None
+                else:
+                    self.logger.warning("JSON结果中缺少content字段")
+                    return None
+
+            else:
+                self.logger.warning(f"未知的结果类型: {result_type}")
+                return None
+
+        except Exception as e:
+            self.logger.error(f"提取知识库内容异常: {e}")
+            return None
+
+    def _extract_from_json_content(self, json_content: Dict[str, Any]) -> Optional[str]:
+        """
+        从JSON内容中提取知识库文本
+
+        Args:
+            json_content: JSON内容
+
+        Returns:
+            Optional[str]: 提取的文本内容
+        """
+        try:
+            # 检查是否有nested字段(知识库结果通常在这里)
+            if 'nested' in json_content:
+                nested = json_content['nested']
+
+                if isinstance(nested, list) and len(nested) > 0:
+                    # 取第一个最相关的结果
+                    first_result = nested[0]
+
+                    if isinstance(first_result, dict):
+                        # 检查score阈值
+                        score = first_result.get("score", 0.0)
+                        self.logger.info(f"知识库检索score: {score}")
+
+                        if score > 0.5:
+                            # 提取内容
+                            knowledge_content = first_result.get("content", "")
+                            knowledge_detail = first_result.get("detail", "")
+                            knowledge_summary = first_result.get("summary", "")
+                            knowledge_title = first_result.get("title", "")
+
+                            # 优先使用detail,其次summary,最后content
+                            if knowledge_detail and len(knowledge_detail.strip()) > 10:
+                                return knowledge_detail
+                            elif knowledge_summary and len(knowledge_summary.strip()) > 10:
+                                return knowledge_summary
+                            else:
+                                return knowledge_content
+                        else:
+                            self.logger.info(
+                                f"知识库检索score({score:.3f}) <= 0.5,跳过知识库内容")
+                            return None
+                    else:
+                        self.logger.warning("知识库结果格式不正确")
+                        return None
+                else:
+                    self.logger.warning("知识库nested结果为空")
+                    return None
+            else:
+                self.logger.warning("JSON内容中缺少nested字段")
+                return None
+
+        except Exception as e:
+            self.logger.error(f"从JSON内容提取文本异常: {e}")
+            return None

+ 339 - 0
handlers/xunfei/nlp_handler.py

@@ -0,0 +1,339 @@
+"""
+NLP处理模块
+"""
+import json
+import threading
+import time
+
+from config.config.dify_config import difyconfig
+from handlers.dify.recognize_models import chat_with_dify
+from utils.logger import logger
+from strategies.vision.qwenv import qwen_nlp
+from utils.pc2_requests import _send_qa_task, _send_led_color_task
+
+
+class NLPHandler:
+    """NLP处理类"""
+
+    def __init__(self):
+        self.detected_intent = None
+        self.tts_text = ""
+        self.accumulated_text = ""  # 累积的文本片段
+        self.is_complete = False  # 是否完成
+        self.intent_handler = None  # 意图cbm处理器引用
+        self.iat_handler = None  # IAT处理器引用
+
+    def handle_nlp_result(self, data: dict) -> None:
+        """
+        处理NLP结果
+
+        Args:
+            data: NLP数据
+        """
+
+        try:
+            try:
+                # 异步调用 _send_led_color_task
+                led_thread = threading.Thread(
+                    target=_send_led_color_task,
+                    args=(self, "BREATH", "BLUE"),
+                    daemon=True
+                )
+                led_thread.start()
+            except Exception as e:
+                logger.debug(f"[LED] LED控制失败,不影响NLP处理: {e}")
+
+            # logger.info('NLPdata', json.dumps(data, ensure_ascii=False, indent=2))
+            # 提取 text 字段
+            text_value = data.get('content', {}).get(
+                'result', {}).get('nlp', {}).get('text')
+
+            status_value = data.get('content', {}).get(
+                'result', {}).get('nlp', {}).get('status')
+
+            logger.info(
+                f"🔍 NLP数据解析: text='{text_value}', status={status_value}")
+
+            if text_value is not None and status_value is not None:
+                logger.info(f"讯飞大模型回答结果是: {text_value} {status_value}")
+
+                # 如果是新的对话开始(status为0且accumulated_text为空),重置状态
+                if status_value == 0 and not self.accumulated_text:
+                    logger.info("🔄 新对话开始,重置状态")
+                    self._reset_state()
+
+                ###################################
+                # 调用dify模型
+                logger.info(
+                    f"🔍 检查dify配置: switch={difyconfig.get_models_switch()}, current_mode={difyconfig.get_current_mode()}")
+                if difyconfig.get_models_switch():
+                    logger.info("✅ dify模型已启用")
+                    # nlp所有文字发完了,调用dify模型
+                    if status_value == 2 and difyconfig.get_current_mode() != "xunfei":
+                        logger.info("🚀 调用dify模型处理NLP")
+                        # 从意图处理器获取iat_txt
+                        if self.iat_handler and hasattr(self.iat_handler, 'iat_txt'):
+                            question = self.iat_handler.iat_txt
+                        else:
+                            question = "请帮我分析一下"
+                        # 调用dify模型处理nlp
+                        self._handle_nlp_by_dify(question)
+                        return
+                    else:
+                        logger.info(
+                            f"📝 不调用dify模型: status={status_value}, current_mode={difyconfig.get_current_mode()}")
+                else:
+                    logger.info("❌ dify模型未启用")
+                ##################################
+
+                logger.info("🎯 准备调用_parse_nlp_result方法")
+                # 解析NLP结果
+                self._parse_nlp_result(text_value, status_value)
+
+                # 输出处理结果
+                if self.tts_text:
+                    logger.info(f"成功提取回答: {self.tts_text}")
+                else:
+                    logger.info("未成功提取回答")
+
+                if self.detected_intent:
+                    logger.info(f"成功提取意图: {self.detected_intent}")
+                    self._handle_detected_intent(self.detected_intent)
+                else:
+                    logger.info("未检测到预设动作指令意图")
+            else:
+                logger.warning("NLP数据不完整")
+
+        except Exception as e:
+            logger.error(f"NLP处理异常: {e}")
+            import traceback
+            logger.error(f"异常堆栈: {traceback.format_exc()}")
+
+    def _parse_nlp_result(self, text_value: str, status_value: int) -> None:
+        """
+        解析NLP结果并合并文本片段
+
+        Args:
+            text_value: NLP文本结果
+            status_value: 状态值(0:继续, 1:结束, 2:完成)
+        """
+        try:
+            # 检查是否完成(status为2表示最终完成)
+            if status_value == 2:
+                logger.info(f"🎯 检测到结束状态,当前累积文本: '{self.accumulated_text}'")
+
+                # 即使文本为空,也要处理结束逻辑
+                if text_value and len(text_value.strip()) > 0:
+                    # 累积文本片段(包括标点符号)
+                    self.accumulated_text += text_value.strip()
+                    logger.debug(f"累积文本: {self.accumulated_text}")
+
+                # 验证累积文本的有效性
+                if len(self.accumulated_text.strip()) < 1:
+                    logger.warning(f"累积文本过短,可能无效: '{self.accumulated_text}'")
+                    # 重置状态,不处理无效结果
+                    self._reset_state()
+                    return
+
+                self.tts_text = self.accumulated_text.strip()
+                self.is_complete = True
+                logger.info(f"🎉 回答完成,合并结果: {self.tts_text}")
+
+                # 去除所有空格
+                # self.tts_text = self.tts_text.replace(" ", "")
+                # logger.info(f"🔧 去除空格后: {self.tts_text}")
+
+                # 定义可能的触发词组(讯飞回答不出,qwen补充回答)
+                error_phrases = [
+                    "对不起",
+                    "没有明确的含义",
+                    "上下文",
+                    "无法理解你的问题",
+                    "无法理解",
+                    "无法提供",
+                    "无法回答",
+                    "没有明确",
+                    "没有理解",
+                    "没有理解你的问题",
+                    "没有理解你的意图",
+                    "没有理解你的需求",
+                    "没有理解你的请求",
+                    "没有理解你的问题",
+                    "没有明确的解释",
+                    "没有明确的含义"
+                ]
+                # 检查是否包含任一词组
+                if any(phrase in self.tts_text for phrase in error_phrases):
+                    logger.info(f"🚨 检测到错误词组,调用千问模型")
+                    # 从IAT处理器获取iat_txt
+                    if self.iat_handler and hasattr(self.iat_handler, 'iat_txt'):
+                        question = self.iat_handler.iat_txt
+                    else:
+                        question = "请帮我分析一下"
+                    answer_text = qwen_nlp(question=question)
+                    logger.info('千问大模型回答', answer_text)
+                    self.tts_text = answer_text
+
+                # 检查是否已处理cbm_semantic,避免重复发送
+                if not (self.intent_handler and self.intent_handler.is_cbm_semantic_processed()):
+                    logger.info("📤 准备发送QA任务")
+                    # 异步调用 _send_qa_task,添加异常处理
+                    try:
+                        qa_thread = threading.Thread(
+                            target=_send_qa_task,
+                            args=(self, {"result": self.tts_text}),
+                            daemon=True
+                        )
+                        qa_thread.start()
+                        logger.info("✅ QA任务已启动")
+                    except Exception as e:
+                        logger.debug(f"[QA] QA请求失败,不影响NLP处理: {e}")
+                else:
+                    logger.info("检测到已处理cbm_semantic,跳过QA发送")
+
+                logger.info(f"🎵 准备调用播放方法,TTS文本: '{self.tts_text}'")
+                # 智能分割并播放完整文本
+                self._play_complete_text()
+                # 重置累积状态
+                self._reset_state()
+                return
+
+            # 非结束状态的处理
+            # 验证文本有效性
+            if not text_value or len(text_value.strip()) == 0:
+                logger.debug("文本为空,跳过处理")
+                return
+
+            # 检查是否只是标点符号
+            stripped_text = text_value.strip()
+            if len(stripped_text) == 1 and stripped_text in '。,!?;:""''()【】':
+                logger.debug(f"跳过纯标点符号: {stripped_text}")
+                return
+
+            # 累积文本片段
+            self.accumulated_text += stripped_text
+            logger.debug(f"累积文本: {self.accumulated_text}")
+            logger.debug(f"回答未完成,继续等待: {self.accumulated_text}")
+
+        except Exception as e:
+            logger.error(f"NLP结果解析异常: {e}")
+            # 异常情况下也重置状态
+            self._reset_state()
+
+    def _play_complete_text(self) -> None:
+        """播放完整的合并文本"""
+        try:
+            logger.info(f"🎯 开始播放完整文本,TTS文本: '{self.tts_text}'")
+
+            if not self.tts_text:
+                logger.warning("⚠️ TTS文本为空,无法播放")
+                return
+
+            # 检查是否已处理cbm_semantic,避免重复播放
+            if self.intent_handler and self.intent_handler.is_cbm_semantic_processed():
+                logger.info("检测到已处理cbm_semantic,跳过NLP播放")
+                # 重置cbm_semantic处理标记,为下次处理做准备
+                self.intent_handler.reset_cbm_semantic_processed()
+                return
+
+            logger.info(f"🎵 准备播放文本: {self.tts_text}")
+
+            # 直接播放完整文本,不进行分割
+            from utils.tts_client import play_text_async
+            play_text_async(self.tts_text, use_cache=True)
+            logger.info(f"✅ 已调用TTS播放: {self.tts_text}")
+
+            # 播放完成后立即重置状态,防止重复处理
+            self.tts_text = ""
+            self.is_complete = False
+
+        except Exception as e:
+            logger.error(f"❌ 完整文本播放失败: {e}")
+            # 降级处理:如果播放失败,记录错误
+            logger.error(f"TTS播放失败,无法播放文本: {self.tts_text}")
+            # 异常情况下也重置状态
+            self.tts_text = ""
+            self.is_complete = False
+
+    def _handle_nlp_by_dify(self, question: str) -> None:
+        """
+        解析NLP结果并合并文本片段
+
+        Args:
+            text_value: NLP文本结果
+            status_value: 状态值(0:继续, 1:结束, 2:完成)
+        """
+        try:
+            self.is_complete = True
+            # 调用dify模型回答
+            print("dify模型问题::::::::::::::::::::::", question)
+            answer_text = chat_with_dify(question=question)
+            print("dify模型回答::::::::::::::::::::::", answer_text)
+            self.tts_text = answer_text
+
+            # 检查是否已处理cbm_semantic,避免重复发送
+            if not self.intent_handler.is_cbm_semantic_processed():
+                # 异步调用 _send_qa_task,添加异常处理
+                try:
+                    qa_thread = threading.Thread(
+                        target=_send_qa_task,
+                        args=(self, {"result": self.tts_text}),
+                        daemon=True
+                    )
+                    qa_thread.start()
+                except Exception as e:
+                    logger.debug(f"[QA] QA请求失败,不影响NLP处理: {e}")
+            else:
+                logger.info("检测到已处理cbm_semantic,跳过QA发送")
+
+            # 智能分割并播放完整文本
+            self._play_complete_text()
+            # 重置累积状态
+            self.accumulated_text = ""
+            self.is_complete = False
+        except Exception as e:
+            logger.error(f"NLP结果解析异常: {e}")
+
+    def _reset_state(self) -> None:
+        """重置状态,准备处理新的对话"""
+        self.tts_text = ""
+        self.accumulated_text = ""
+        self.is_complete = False
+        self.detected_intent = None
+
+        # 重置语音处理器的语气助词状态
+        try:
+            from handlers.speech_handler import SpeechHandler
+            # 这里我们需要通过消息处理器来重置语音处理器状态
+            # 由于模块间依赖关系,我们通过日志提示需要重置
+            logger.debug("需要重置语音处理器语气助词状态")
+        except Exception as e:
+            logger.debug(f"重置语音处理器状态时出现异常: {e}")
+
+        logger.debug("已重置NLP处理器状态,准备处理新对话")
+
+    def _handle_detected_intent(self, intent: str) -> None:
+        """
+        处理检测到的意图
+
+        Args:
+            intent: 意图名称
+        """
+        if intent == "hi":
+            logger.info(f"检测到 [{intent}] 意图, 执行打招呼动作")
+        elif intent == "hand":
+            logger.info(f"检测到 [{intent}] 意图, 执行握手动作")
+        elif intent == "tour":
+            logger.info(f"检测到 [{intent}] 意图, 执行实验室游览动作")
+        elif intent == "Bow":
+            logger.info(f"检测到 [{intent}] 意图, 执行鞠躬欢送动作")
+        elif intent == "Nod":
+            logger.info(f"检测到 [{intent}] 意图, 执行点头动作")
+
+    def get_detected_intent(self) -> str:
+        """获取检测到的意图"""
+        return self.detected_intent or ""
+
+    def get_tts_text(self) -> str:
+        """获取TTS文本"""
+        return self.tts_text or ""

+ 141 - 0
handlers/xunfei/speech_handler.py

@@ -0,0 +1,141 @@
+"""
+讯飞AIUI语音识别处理模块
+"""
+import json5
+import requests
+
+from utils.logger import logger
+import json
+import random
+import threading
+from strategies.vision.qwenv import process_base64_image
+
+from utils.pc2_requests import _send_qa_task, _send_led_color_task
+from utils.tts_client import play_text_async, is_playing
+from utils.logger import logger
+from handlers.xunfei.intent_handler import IntentHandler
+
+
+class XunfeiSpeechHandler:
+    """讯飞AIUI语音识别处理类"""
+
+    def __init__(self):
+        # 根据文字长度分类的语气助词列表
+        self.short_thinking_phrases = [
+            "嗯", "呃", "哦"
+        ]
+
+        self.medium_thinking_phrases = [
+            "嗯…", "呃…", "哦…", "那个", "这个"
+        ]
+
+        self.long_thinking_phrases = [
+            "嗯…", "呃…", "哦…", "那个", "这个"
+        ]
+
+        # 添加IntentHandler引用
+        self.intent_handler = None
+        self.iat_txt = ""
+
+    def play_thinking_phrase(self, text_length: int = 0, type="thinking"):
+        """根据文字长度播放合适的语气助词(使用缓存)"""
+        if text_length < 3:
+            return
+        elif 3 <= text_length < 7:
+            phrase = random.choice(self.short_thinking_phrases)
+        elif 7 <= text_length < 15:
+            phrase = random.choice(self.medium_thinking_phrases)
+        elif 15 <= text_length:
+            phrase = random.choice(self.long_thinking_phrases)
+        else:
+            return
+
+        logger.info(f"播放语气助词: {phrase}")
+        play_text_async(phrase, use_cache=True)
+        logger.info(f"{type}文本({text_length}字),播放语气助词: {phrase}")
+
+    def handle_iat_result(self, data: dict) -> None:
+        """
+        处理讯飞AIUI语音识别结果
+
+        Args:
+            data: 语音识别数据
+        """
+        try:
+
+            # 异步调用 _send_led_color_task,添加异常处理
+            try:
+
+                led_thread = threading.Thread(
+                    target=_send_led_color_task,
+                    args=(self, "DEFAULT", "GREEN"),
+                    daemon=True
+                )
+                led_thread.start()
+            except Exception as e:
+                logger.debug(f"[LED] LED控制失败,不影响语音识别: {e}")
+
+            # logger.info('IAT数据', json.dumps(data, ensure_ascii=False, indent=2))
+            # 使用get_recognition_text方法提取文本
+            result_string = self.get_recognition_text(data)
+            self.iat_txt = result_string
+            # 获取状态信息
+            sn_value = data.get('content', {}).get(
+                'result', {}).get('text', {}).get('sn')
+            ls_value = data.get('content', {}).get(
+                'result', {}).get('text', {}).get('ls')
+
+            # 确定状态值
+            if sn_value == 1:
+                status_value = 0
+            elif ls_value is True:
+                status_value = 2
+            else:
+                status_value = 1
+
+            # 输出识别结果
+            if result_string or status_value == 2:
+                logger.info(f"讯飞识别到IAT结果是: {result_string}")
+
+                # 异步调用 _send_qa_task,添加异常处理
+                try:
+                    qa_thread = threading.Thread(
+                        target=_send_qa_task,
+                        args=(self, {"question": result_string}),
+                        daemon=True
+                    )
+                    qa_thread.start()
+                except Exception as e:
+                    logger.debug(f"[QA] QA请求失败,不影响语音识别: {e}")
+
+                # 使用改进的播放逻辑
+                logger.info(f"开始播放语气词")
+                self.play_thinking_phrase(len(result_string), type="thinking")
+
+        except Exception as e:
+            logger.error(f"讯飞语音识别处理异常: {e}")
+
+    def get_recognition_text(self, data: dict) -> str:
+        """
+        获取讯飞识别文本
+
+        Args:
+            data: 语音识别数据
+
+        Returns:
+            str: 识别文本
+        """
+        try:
+            words = []
+            ws_list = data.get('content', {}).get(
+                'result', {}).get('text', {}).get('ws', [])
+
+            for item in ws_list:
+                cw_list = item.get('cw', [])
+                for cw in cw_list:
+                    words.append(cw.get('w', ''))
+
+            return ''.join(words)
+        except Exception as e:
+            logger.error(f"获取讯飞识别文本异常: {e}")
+            return ""

+ 159 - 0
main_robot.py

@@ -0,0 +1,159 @@
+"""
+机器人AI语音识别主程序入口
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-19
+LastEditTime: 2025-08-24
+"""
+
+import time
+from datetime import datetime
+import signal
+from typing import Optional
+from utils.load_config import load_config
+from utils.logger import setup_logger, logger
+from handlers.aiui.EventListener import EventListener
+from handlers.aiui.AIui_node import AIUINode
+from handlers.baidu.speech_handler import BaiduSpeechHandler
+from handlers.speech_handler import SpeechHandler
+from core.message_processor import MessageProcessor
+from core.socket_client import SocketClient
+from utils.init_system import initialize_robot_system
+import sys
+
+
+class RobotAI:
+    """机器人AI主类"""
+
+    def __init__(self):
+        self.run = True
+        self.socket_client: Optional[SocketClient] = None
+        self.message_processor: Optional[MessageProcessor] = None
+        self.speech_handler: Optional[SpeechHandler] = None
+        self.baidu_handler: Optional[BaiduSpeechHandler] = None
+        self.xunfei_linux_node: Optional[AIUINode] = None
+
+        signal.signal(signal.SIGINT, self._stop_handler)
+
+        setup_logger('robot_ai', 'logs')
+        self.config = load_config()
+
+        self.speech_config = self.config.get('speech_recognition', {})
+        self.service_type = self.speech_config.get('service', 'xunfei_aiui')
+        self.is_xunfei_linux = self.service_type == 'xunfei_linux'
+
+        self._init_components()
+
+    def _stop_handler(self, signum, frame):
+        """信号处理函数"""
+        logger.info(f"收到信号 {signum},正在停止...")
+        self.run = False
+
+    def _init_components(self):
+        """初始化语音识别组件"""
+        logger.info("正在初始化机器人AI语音识别组件...")
+        try:
+            if self.service_type == 'baidu_realtime':
+                self._init_baidu()
+            elif self.is_xunfei_linux:
+                self._init_xunfei_linux()
+            else:
+                self._init_xunfei_aiui()
+            logger.info("机器人AI语音识别初始化完成")
+        except Exception as e:
+            logger.error(f"系统初始化失败: {e}")
+            raise
+
+    def _init_baidu(self):
+        """初始化百度语音识别"""
+        logger.info("使用百度实时语音识别服务")
+        self.baidu_handler = BaiduSpeechHandler()
+        if not self.baidu_handler.start_recognition():
+            raise RuntimeError("百度实时语音识别启动失败")
+        logger.info("百度实时语音识别启动成功")
+
+    def _init_xunfei_linux(self):
+        """初始化讯飞 Linux SDK"""
+        logger.info("使用讯飞 Linux SDK 语音识别服务")
+        event_listener = EventListener(skills_dict={}, debug=False)
+        self.xunfei_linux_node = AIUINode(event_listener, debug=False)
+        logger.info("讯飞 Linux SDK 初始化成功")
+
+    def _init_xunfei_aiui(self):
+        """初始化讯飞 AIUI"""
+        logger.info("使用讯飞 AIUI 语音识别服务")
+        self.speech_handler = SpeechHandler()
+        self.socket_client = SocketClient()
+        self.message_processor = MessageProcessor(self.socket_client)
+
+    def start(self):
+        """启动系统"""
+        logger.info("机器人AI语音识别启动")
+        try:
+            if self.service_type == 'baidu_realtime':
+                self._run_baidu()
+            elif self.is_xunfei_linux:
+                self._run_xunfei_linux()
+            else:
+                self._run_xunfei_aiui()
+        except Exception as e:
+            logger.error(f"系统运行异常: {e}")
+        finally:
+            self.stop()
+
+    def _run_baidu(self):
+        logger.info("百度实时语音识别服务运行中...")
+        while self.run:
+            time.sleep(1)
+
+    def _run_xunfei_linux(self):
+        logger.info("讯飞 Linux SDK 服务运行中...")
+        if self.xunfei_linux_node:
+            self.xunfei_linux_node.start(spin=True)
+
+    def _run_xunfei_aiui(self):
+        logger.info("讯飞 AIUI 服务运行中...")
+        while self.run and self.message_processor:
+            self.message_processor.process()
+
+    def stop(self):
+        """停止系统"""
+        logger.info("正在停止机器人AI语音识别...")
+        try:
+            if self.baidu_handler and self.service_type == 'baidu_realtime':
+                self.baidu_handler.stop_recognition()
+
+            if self.xunfei_linux_node and self.is_xunfei_linux:
+                self.xunfei_linux_node.shutdown()
+
+            if self.socket_client:
+                self.socket_client.close()
+
+            logger.info("机器人AI语音识别已停止")
+        except Exception as e:
+            logger.error(f"停止过程中出现异常: {e}")
+
+
+def main():
+    """主函数"""
+    logger.info("=== 机器人AI主程序启动 ===")
+    logger.info(f"启动时间: {datetime.now()}")
+
+    try:
+        # 先进行系统级初始化(音频/组件等)
+        initialize_robot_system()
+        robot = RobotAI()
+        robot.start()
+        logger.info("机器人AI主程序正常退出")
+        return 0
+    except KeyboardInterrupt:
+        logger.info("收到键盘中断信号,程序退出")
+        return 0
+    except Exception as e:
+        import traceback
+        logger.error(f"程序启动失败: {e}")
+        logger.error(f"错误详情: {traceback.format_exc()}")
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())

+ 109 - 0
requirements.txt

@@ -0,0 +1,109 @@
+annotated-types==0.7.0
+anyio==4.10.0
+baidu-aip==4.16.13
+bcrypt==3.2.0
+blinker==1.9.0
+Brlapi==0.8.3
+certifi==2020.6.20
+cffi==1.17.1
+chardet==4.0.0
+click==8.2.1
+colorama==0.4.4
+cryptography==3.4.8
+cupshelpers==1.0
+dbus-python==1.2.18
+defer==1.0.6
+distro==1.7.0
+distro-info==1.1+ubuntu0.2
+duplicity==0.8.21
+et_xmlfile==2.0.0
+exceptiongroup==1.3.0
+fastapi==0.116.1
+fasteners==0.14.1
+Flask==3.1.2
+flask-cors==6.0.1
+future
+h11==0.16.0
+httplib2==0.20.2
+idna==3.3
+importlib-metadata==4.6.4
+iotop==0.6
+itsdangerous==2.2.0
+jeepney==0.7.1
+Jinja2==3.1.6
+keyring==23.5.0
+language-selector==0.1
+launchpadlib==1.10.16
+lazr.restfulclient==0.14.4
+lazr.uri==1.0.6
+lockfile==0.12.2
+loguru==0.7.3
+louis==3.20.0
+macaroonbakery==1.3.1
+Mako==1.1.3
+MarkupSafe==3.0.2
+meson==0.61.2
+monotonic==1.6
+more-itertools==8.10.0
+netifaces==0.11.0
+numpy==2.2.6
+oauthlib==3.2.0
+olefile==0.46
+opencv-python==4.12.0.88
+openpyxl==3.1.5
+packaging==21.3
+paramiko==2.9.3
+pexpect==4.8.0
+Pillow==9.0.1
+protobuf==3.12.4
+psutil==7.0.0
+ptyprocess==0.7.0
+PyAudio==0.2.14
+pycairo==1.20.1
+pycparser==2.22
+pycups==2.0.1
+pydantic==2.11.7
+pydantic_core==2.33.2
+pydub==0.25.1
+PyGObject==3.42.1
+PyJWT==2.3.0
+pymacaroons==0.13.0
+PyNaCl==1.5.0
+pyOpenSSL==21.0.0
+pyparsing==2.4.7
+pyRFC3339==1.1
+python-apt==2.4.0+ubuntu4
+python-dateutil==2.8.1
+python-debian==0.1.43+ubuntu1.1
+python-magic==0.4.24
+python-multipart==0.0.20
+pytz==2022.1
+pyxdg==0.27
+PyYAML==5.4.1
+reportlab==3.6.8
+requests==2.25.1
+SecretStorage==3.3.1
+simpleaudio==1.0.4
+six==1.16.0
+sniffio==1.3.1
+sos==4.8.2
+ssh-import-id==5.11
+starlette==0.47.3
+systemd-python==234
+typing-inspection==0.4.1
+typing_extensions==4.15.0
+ubuntu-drivers-common==0.0.0
+ubuntu-pro-client==8001
+unattended-upgrades==0.1
+urllib3==1.26.5
+uvicorn==0.35.0
+wadllib==1.3.6
+watchdog==6.0.0
+websockets==15.0.1
+Werkzeug==3.1.3
+ws4py==0.6.0
+xdg==5
+xkit==0.0.0
+zipp==1.0.0
+PyMySQL==1.1.1
+nuitka==2.7.13

+ 1 - 0
strategies/__init__.py

@@ -0,0 +1 @@
+# 策略模式模块

+ 6 - 0
strategies/action/__init__.py

@@ -0,0 +1,6 @@
+def main():
+    pass
+
+
+if __name__ == "__main__":
+    pass

+ 151 - 0
strategies/action/action_strategies.py

@@ -0,0 +1,151 @@
+import re
+from utils.logger import logger
+import threading
+
+import requests
+
+from config.config.settings import config
+from .base_strategy import ActionStrategy
+
+from config.config.action_config import get_action_dict, get_point_dict
+
+"""
+文件:action_strategies.py
+功能: 定义具体的策略类
+"""
+__Author__ = "xueYang"
+
+
+# 使用全局配置实例,无需重复加载
+API_ACTION_URL = config.get_pc2_url('robot_action')
+API_WAYPOINT_URL = config.get_pc2_url('robot_waypoint')
+API_CMD_URL = config.get_pc2_url('robot_cmd_action')
+API_TTS_URL = config.get_config().get('music_url')
+
+class RealTimeActionStrategy(ActionStrategy):
+
+    def execute(self, skill: str):
+        """执行实时交互动作"""
+        logger.info(f"执行动作:{skill}")
+        thread = threading.Thread(
+            target=self._execute_real_time_action,
+            args=(skill,),
+            daemon=True
+        )
+        thread.start()
+
+    def _execute_real_time_action(self, skill: str):
+        """实际执行实时交互动作的方法"""
+        # 这里实现具体的实时交互逻辑
+        try:
+            logger.info(f"执行动作:{skill}")
+            action_dict = get_action_dict()
+            logger.info(f"动作字典: {action_dict}")
+            param = action_dict.get(skill)
+            logger.info(f"获取到的动作参数: {param}")
+            logger.info(
+                "========================API接口===========================: %s", API_ACTION_URL)
+            # if skill
+            if param is not None:
+                if re.fullmatch(r'PLAY_MUSIC', skill):
+                    logger.info(f"检测到音乐播放指令: {skill}")
+                    print(f"检测到音乐播放指令: {skill}")
+                    self.send_tts_request("play", "dodge.mp3")
+
+                if re.fullmatch(r'STOP_MUSIC', skill):
+                    logger.info(f"检测到音乐停止指令: {skill}")
+                    self.send_tts_request("stop", "dodge.mp3")
+
+                try:
+                    response = requests.post(API_ACTION_URL, json={"data": param}, timeout=5)
+                    logger.info(f"API响应状态: {response.status_code}")
+                except requests.exceptions.RequestException as e:
+                    logger.error(f"请求执行动作失败: {str(e)}")
+            else:
+                logger.error(f"未找到动作 {skill} 对应的参数")
+        except Exception as e:
+            logger.error(f"执行动作时发生错误: {str(e)}")
+
+    def send_tts_request(self, control_type: str, tts_file: str):
+        payload = {"control_type": control_type, "tts_file": tts_file}
+        try:
+            response = requests.post(API_TTS_URL, json=payload, timeout=5)
+            if response.status_code == 200:
+                logger.info(f"TTS请求成功: {control_type}, 文件: {tts_file}")
+            else:
+                logger.warning(f"TTS请求返回异常状态码: {response.status_code}, 文件: {tts_file}")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"TTS请求失败: {str(e)}, 文件: {tts_file}")
+
+
+class NaviWayPointStrategy(ActionStrategy):
+    def execute(self, skill: str):
+        """执行点控动作"""
+        logger.info(f"执行点控:{skill}")
+        thread = threading.Thread(
+            target=self._execute_navigation,
+            args=(skill,),
+            daemon=True
+        )
+        thread.start()
+
+    def _execute_navigation(self, skill: str):
+        """实际执行点位动作的方法"""
+        # 这里实现具体的点控逻辑
+        try:
+            logger.info(f"执行点控:{skill}")
+            param = get_point_dict().get(skill)
+            requests.post(API_WAYPOINT_URL, json={"data": param})
+
+        except requests.exceptions.RequestException as e:
+            logger.error(f"请求API失败: {str(e)}")
+
+
+class CMDActionStrategy(ActionStrategy):
+    def execute(self, skill: str):
+        """执行CMD动作"""
+        logger.info(f"执行CMD动作:{skill}")
+        thread = threading.Thread(
+            target=self._execute_cmd_action,
+            args=(skill,),
+            daemon=True
+        )
+        thread.start()
+
+    def _execute_cmd_action(self, skill: str):
+        try:
+            logger.info(f"执行CMD动作:{skill}")
+            param = skill
+            #判断 skill 值为ZWJS时,发送参数为17;JSWRJZQ 时,发送参数为18
+            if skill == "ZWJS":
+                param = "17"
+            elif skill == "JSWRJZQ":
+                param = "18"
+            requests.post(API_CMD_URL, json={"data": param})
+        except requests.exceptions.RequestException as e:
+            logger.error(f"请求API失败: {str(e)}")
+
+# class AiotSceneStrategy(ActionStrategy):
+#     def execute(self, skill: str):
+#         """执行AIOT场景动作"""
+#         logger.info(f"执行AIOT场景动作:{skill}")
+#         thread = threading.Thread(
+#             target=aiot_controller.execute_scene,
+#             args=(skill,),
+#             daemon=True
+#         )
+#         thread.start()
+#
+# class AiotDeviceStrategy(ActionStrategy):
+#     def execute(self, skills: str):
+#         """执行AIOT设备控制动作"""
+#         state = skills.split("_", 1)[0]
+#         device_name = skills.split("_", 1)[1]
+#         logger.info(f"执行设备控制:{device_name} 状态:{state}")
+#
+#         thread = threading.Thread(
+#             target=aiot_controller.control_device_power,
+#             args=(device_name, state),
+#             daemon=True
+#         )
+#         thread.start()

+ 17 - 0
strategies/action/base_strategy.py

@@ -0,0 +1,17 @@
+from abc import ABC, abstractmethod
+
+
+
+"""
+文件:base_strategy.py
+功能:定义策略接口
+"""
+__Author__ = "xueYang"
+
+class ActionStrategy(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def execute(self,skill: str):
+        pass

+ 96 - 0
strategies/action/execute_actions.py

@@ -0,0 +1,96 @@
+'''
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-17 16:59:19
+LastEditTime: 2025-08-24 14:11:12
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: \robot_ai\strategies\action\execute_actions.py
+Description: 头部注释配置模板
+'''
+from utils.control_aiot import aiot_controller
+from utils.logger import logger
+import threading
+
+
+from .action_strategies import (
+    RealTimeActionStrategy,
+    NaviWayPointStrategy,
+    # AiotSceneStrategy,
+    # AiotDeviceStrategy,
+    CMDActionStrategy
+)
+"""
+文件:action_executor.py
+功能: 执行动作的类
+"""
+__Author__ = "xueYang"
+
+
+class ActionExecutor:
+    def __init__(self):
+        self._strategies = {
+            f"OS20015785810.ACTIONS": RealTimeActionStrategy(),
+            f"OS20015785810.ACTIONS_TZ": RealTimeActionStrategy(),
+            f"OS20015785810.VoiceNavigation": NaviWayPointStrategy(),
+            # "scene_": AiotSceneStrategy(),
+            # ("open_", "close_"): AiotDeviceStrategy(),
+            f"OS20015785810.CMD_ACTIONS": CMDActionStrategy()
+        }
+
+    def execute_actions(self, category, skill):
+        """
+        使用策略模式执行不同类型的动作 - 异步执行
+        :param category: 动作分类
+        :param skill: 具体指令
+        """
+        try:
+            if not skill:
+                return
+            # 实时交互、语音导航
+            if category in (f"OS20015785810.ACTIONS",
+                            f"OS20015785810.VoiceNavigation",
+                            f"OS20015785810.ACTIONS_TZ",
+                            f"OS20015785810.CMD_ACTIONS"):
+                strategy = self._strategies.get(category)
+                if strategy:
+                    strategy.execute(skill)
+                else:
+                    raise ValueError(f"未知的动作分类: {category}")
+
+            # 智能家居控制动作
+            elif category.split('.')[-1] == f"control_aiot":
+                logger.info(f"AIOT分类是:{category}执行AIOT动作:{skill}")
+                # 根据指令类型选择策略
+                if skill.startswith("scene_"):
+                    thread = threading.Thread(
+                        target=aiot_controller.execute_scene,
+                        args=(skill,),
+                        daemon=True
+                    )
+                    thread.start()
+                elif skill.startswith(("open_", "close_")):
+                    state = skill.split("_", 1)[0]
+                    device_name = skill.split("_", 1)[1]
+                    logger.info(f"执行设备控制:{device_name} 状态:{state}")
+                    thread = threading.Thread(
+                        target=aiot_controller.control_device_power,
+                        args=(device_name, state),
+                        daemon=True
+                    )
+                    thread.start()
+                else:
+                    raise ValueError(f"未知的AIOT指令: {skill}")
+        except Exception as e:
+            logger.error(f"执行动作时发生错误: {str(e)}")
+
+    def execute_actions_async(self, category, skill):
+        """
+        异步执行动作的方法
+        :param category: 动作分类
+        :param skill: 具体指令
+        """
+        thread = threading.Thread(
+            target=self.execute_actions,
+            args=(category, skill),
+            daemon=True
+        )
+        thread.start()

+ 33 - 0
strategies/aiui_process.py

@@ -0,0 +1,33 @@
+"""
+AIUI消息处理策略
+"""
+import zlib
+from utils.logger import logger
+from .base_strategy import ProcessStrategy
+
+
+class AiuiMessageProcess(ProcessStrategy):
+    """AIUI消息处理类"""
+
+    def process(self, client_socket, data: bytes) -> tuple[bool, bytes]:
+        """
+        处理AIUI消息(解压缩)
+
+        Args:
+            client_socket: 客户端socket(未使用)
+            data: 压缩的数据
+
+        Returns:
+            tuple[bool, bytes]: (成功标志, 解压后的数据)
+        """
+        if not data:
+            return False, b''
+
+        try:
+            decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
+            output = decompressor.decompress(data)
+            output += decompressor.flush()
+            return True, output
+        except zlib.error as e:
+            logger.info(f"AIUI消息解压失败: {e}")
+            return False, b''

+ 67 - 0
strategies/base_strategy.py

@@ -0,0 +1,67 @@
+"""
+基础策略类
+"""
+from utils.logger import logger
+from abc import abstractmethod, ABC
+from typing import Union, Tuple
+
+
+class ProcessStrategy(ABC):
+    """处理策略基类"""
+
+    def makepacket(self, sid: int, msg_type: int, content: Union[str, bytes]) -> bytearray:
+        """
+        构建消息包
+
+        Args:
+            sid: 会话ID
+            msg_type: 消息类型
+            content: 消息内容
+
+        Returns:
+            bytearray: 完整的消息包
+        """
+        if isinstance(content, str):
+            content = content.encode('utf-8')
+
+        size = len(content)
+        temp = bytearray()
+        temp.append(0xa5)  # 同步头
+        temp.append(0x01)  # 用户ID
+        temp.append(msg_type)  # 消息类型
+        temp.append(size & 0xff)  # 长度低字节
+        temp.append((size >> 8) & 0xff)  # 长度高字节
+        temp.append(sid & 0xff)  # 会话ID低字节
+        temp.append((sid >> 8) & 0xff)  # 会话ID高字节
+        temp.extend(content)  # 消息内容
+        temp.append(self.checkcode(temp))  # 校验码
+
+        return temp
+
+    def checkcode(self, data: bytearray) -> int:
+        """
+        计算校验码
+
+        Args:
+            data: 数据字节数组
+
+        Returns:
+            int: 校验码
+        """
+        total = sum(data)
+        checkcode = (~total + 1) & 0xFF
+        return checkcode
+
+    @abstractmethod
+    def process(self, client_socket, data) -> Union[bool, Tuple[bool, bytes]]:
+        """
+        处理消息的抽象方法
+
+        Args:
+            client_socket: 客户端socket
+            data: 要处理的数据
+
+        Returns:
+            Union[bool, Tuple[bool, bytes]]: 处理结果
+        """
+        pass

+ 34 - 0
strategies/confirm_process.py

@@ -0,0 +1,34 @@
+"""
+确认消息处理策略
+"""
+from utils.logger import logger
+from .base_strategy import ProcessStrategy
+
+
+class ConfirmProcess(ProcessStrategy):
+    """确认消息处理类"""
+
+    def process(self, client_socket, msg_id: int) -> bool:
+        """
+        发送确认消息
+
+        Args:
+            client_socket: 客户端socket
+            msg_id: 消息ID
+
+        Returns:
+            bool: 处理结果
+        """
+        try:
+            temp = bytearray()
+            temp.append(0xa5)
+            temp.append(0x00)
+            temp.append(0x00)
+            temp.append(0x00)
+
+            send_data = self.makepacket(msg_id, 0xff, temp)
+            client_socket.send(send_data)
+            return True
+        except Exception as e:
+            logger.info(f"发送确认消息失败: {e}")
+            return False

BIN
strategies/vision/demo_pic.png


+ 64 - 0
strategies/vision/dify_vision_demo.py

@@ -0,0 +1,64 @@
+import base64
+import requests
+import time
+
+# dify 密钥
+api_key = "app-wzRAbuWYxm9WiyY6fpQzA7Cu"
+user = "howsoGQ@qq.com"
+upload_url = "http://robot.yun36.com:8066/v1/files/upload"
+chat_url = "http://robot.yun36.com:8066/v1/chat-messages"
+
+
+def upload_image_to_dify(image_path='demo_pic.png'):
+    files = {
+        "file": ("demo_pic.png", open("demo_pic.png", "rb"), "image/[png|jpeg|jpg|webp|gif]")
+    }
+    data = {
+        "user": user
+    }
+    headers = {
+        "Authorization": f"Bearer {api_key}"
+    }
+    resp = requests.post(upload_url, headers=headers, files=files, data=data)
+    return resp.json().get("id")
+
+
+def chat_with_dify(question='请用 30 字以内分析你看到的', image_path=''):
+    s1_time = time.time()
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    if image_path != '':
+        image_id = upload_image_to_dify(image_path)
+        s2_time = time.time()
+        print(f"图片上传完成耗时: {s2_time - s1_time:.2f}秒")
+        files_val = [
+            {
+                "type": "image",
+                "transfer_method": "local_file",
+                "upload_file_id": image_id
+                # "url": "https://cloud.dify.ai/logo/logo-site.png"
+            }
+        ]
+    else:
+        files_val = []
+    payload = {
+        "inputs": {},  # 可以留空,或者传其他上下文
+        "query": question,
+        "response_mode": "blocking",
+        "conversation_id": "",  # 如果是新会话可以留空
+        "user": user,
+        "files": files_val
+    }
+    # 发送请求
+    s3_time = time.time()
+    resp = requests.post(chat_url, headers=headers, json=payload)
+    s4_time = time.time()
+    print(resp.json().get("answer",'请求出错'))
+    print(f"请求完成耗时: {s4_time - s3_time:.2f}秒")
+    print(f"总耗时: {s4_time - s1_time:.2f}秒")
+
+if __name__ == '__main__':
+    # chat_with_dify(question='请用 30 字以内分析你看到的',image_path='demo_pic.png')
+    chat_with_dify(question='苏超最近的比赛', image_path='')

+ 98 - 0
strategies/vision/qwenv.py

@@ -0,0 +1,98 @@
+'''
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-16 16:55:13
+LastEditTime: 2025-08-16 18:31:31
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: \robot_ai\strategies\vision\qwenv.py
+Description: 头部注释配置模板
+'''
+import os
+import dashscope
+from utils.logger import logger
+
+
+# 处理base64图片
+def process_base64_image(base64_pic: str = '', question: str = '描述下你看到了什么'):
+    # 示例参照
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {
+                    "text": "你是一个具身智能机器人,你具备详尽观察和描述环境的能力,能够准确捕捉场景中的人物、物体、颜色、动作,以及它们的空间位置和相互关系。描述内容必须从\"我面前的是\"开始,以第一人称视角表达,语句完整流畅,并保留必要的逗号和句号。禁止出现\"图中\"、\"图片中\"等类似表达,不使用markdown、列表、代码块、引号,并且不得出现多余的换行。回复字数在20~40个字符"}
+            ]
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "image": ""},
+                {"text": "描述下你看到了什么"}]
+        }]
+    if base64_pic:
+        pic_content = "data:image/jpeg;base64," + base64_pic
+        messages[1]["content"][0]["image"] = pic_content
+    messages[1]["content"][1]["text"] = question
+    try:
+        from config.config.settings import config
+        baolian_api_key = config._config_data.get(
+            'baolian_api_key', 'sk-1dcf9fc95ee54c3bbb2a3fdc9ebaa49f')
+        baolian_model = config._config_data.get(
+            'baolian_model', 'qwen-vl-max-latest')
+
+        dashscope.api_key = baolian_api_key
+
+        # 发送图片到模型
+        response = dashscope.MultiModalConversation.call(
+            model=baolian_model,
+            messages=messages
+        )
+        # 解析响应(描述内容)
+        result = response.output.choices[0].message.content[0]["text"]
+        return result
+    except Exception as e:
+        logger.info(e)
+
+
+def qwen_nlp(question: str = '描述下你看到了什么'):
+    # 示例参照
+    messages = [
+        {
+            "role": "system",
+            "content": [
+                {
+                    "text": "你是一个具身智能机器人,快速理解意图,并用口语化的方式作出回答,语气自然、语速适中,回答简洁明了,适合语音播放,不使用markdown、列表、代码块、引号,并且不得出现多余的换行。回复字数在20~40个字符"}
+            ]
+        },
+        {
+            "role": "user",
+            "content": "你是谁?"
+        }]
+    messages[1]["content"] = question
+    try:
+        from config.config.settings import config
+        baolian_api_key = config._config_data.get(
+            'baolian_api_key', 'sk-1dcf9fc95ee54c3bbb2a3fdc9ebaa49f')
+        baolian_nlp_model = config._config_data.get(
+            'baolian_nlp_model', 'qwen-plus-latest')
+
+        dashscope.api_key = baolian_api_key
+
+        # 发送图片到模型
+        response = dashscope.Generation.call(
+            model=baolian_nlp_model,
+            messages=messages
+        )
+        if response and response.get("output") and response["output"].get("text"):
+            result = response["output"]["text"]
+            return result
+        else:
+            return "抱歉,我当前无法处理这个问题,请再说一遍"
+
+    except Exception as e:
+        logger.info(f"千问NLP处理异常: {e}")
+        return "抱歉,处理出现异常"
+
+
+if __name__ == '__main__':
+    pass

+ 496 - 0
utils/control_aiot.py

@@ -0,0 +1,496 @@
+# -*- coding: utf-8 -*-
+import sys
+import os
+
+"""
+功能: 与物联中控平台交互服务
+"""
+__Author__ = "torjean(陶)"
+
+import base64
+import hashlib
+import hmac
+import os
+import random
+import time
+import requests
+import urllib.parse
+import traceback
+from utils.logger import logger
+from config.config.aiot_config import get_machine_id, get_aiot_host, get_aiot_app_id, get_aiot_app_secret, get_aiot_union_id, get_scenes, get_devices
+
+home_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+class ControlConf():
+    # 物联AIOT - 从配置文件读取
+    aiot_unionId = get_aiot_union_id()
+    aiot_platform_host = get_aiot_host()
+    aiot_appid = get_aiot_app_id()
+    aiot_appSecret = get_aiot_app_secret()
+
+
+# 机器ID直接从配置文件读取,不再需要本地文件操作
+
+
+# 物联交互相关类
+class ControlAIotPlatform:
+    def __init__(self, comm_instance=None):
+        self.logger = logger
+        self.comm_instance = comm_instance
+        # 机器id作为userId - 直接从配置文件读取
+        self.machine_id = get_machine_id()
+        # 用户密钥
+        self.user_secretkey = None
+        # access-token
+        self.access_token = None
+        # refresh-token
+        self.refresh_token = None
+
+        # 从配置文件加载场景和设备信息
+        self._load_config_from_file()
+
+        self.connect()
+
+    def _load_config_from_file(self):
+        """从配置文件加载场景和设备信息"""
+        try:
+            # 加载场景配置
+            scenes_config = get_scenes()
+            self.scenes_id = scenes_config
+
+            # 加载设备配置并转换为兼容格式
+            devices_config = get_devices()
+            self.resource_info = {}
+
+            for device_name, device_config in devices_config.items():
+                self.resource_info[device_name] = {
+                    'devName': device_config.get('dev_name', ''),
+                    'productId': device_config.get('product_id', ''),
+                    'deviceId': device_config.get('device_id', ''),
+                    'resourceId': device_config.get('resource_id', ''),
+                    'open': device_config.get('open_value'),
+                    'close': device_config.get('close_value'),
+                }
+
+            logger.info(
+                f"从配置文件加载了 {len(self.scenes_id)} 个场景和 {len(self.resource_info)} 个设备")
+
+        except Exception as e:
+            logger.error(f"加载配置文件失败: {e}")
+            # 使用默认配置作为后备
+            self.scenes_id = {
+                "scene_001": '500',
+                "scene_002": '501'
+            }
+            self.resource_info = {}
+
+    # 一、连接物联平台中控系统,获取用户密钥
+    def _get_platform_user_secretkey(self):
+        current_timestamp = int(time.time())
+        try:
+            url = ControlConf.aiot_platform_host + '/cloud-api/app/active'
+            to_encode_str = (ControlConf.aiot_appid + str(
+                current_timestamp) + ControlConf.aiot_unionId + self.machine_id).encode('utf-8')
+            # Base64解码appSecretKey
+            decode_app_secret = base64.b64decode(ControlConf.aiot_appSecret)
+            # hmac_sha256签名
+            hmac_encode_sign = hmac.new(
+                decode_app_secret, to_encode_str, hashlib.sha256)
+            # Base64编码
+            base64_encode_sign = base64.b64encode(
+                hmac_encode_sign.digest()).decode('utf-8')
+
+            response = requests.post(url, json={
+                "appId": ControlConf.aiot_appid,
+                "timestamp": current_timestamp,
+                "unionId": ControlConf.aiot_unionId,
+                "userId": self.machine_id,
+                "sign": base64_encode_sign
+            }, timeout=3)
+            response_json = response.json()
+            # logger.info("1.1 获取用户密钥userSecretKey:", response_json)
+            if response_json['code'] == 0:
+                return True, response_json['data']['userSecret']
+            else:
+                logger.info("响应失败:", response_json['msg'])
+                return False, response_json['msg']
+        except Exception as e:
+            logger.info(e)
+            return False, e
+
+    # 二、连接物联平台中控系统,获取access-token
+    def _get_access_token(self):
+        url = ControlConf.aiot_platform_host + '/cloud-api/oauth2/token'
+
+        try:
+            authorization = self._generate_authorization()
+            request_headers = {
+                'Authorization': authorization,
+                "Content-Type": "application/x-www-form-urlencoded",
+            }
+            password = self._generate_password()
+            requests_data = {
+                "grant_type": 'hmac_sign',
+                "username": ControlConf.aiot_appid + '.' + self.machine_id,
+                "password": password,
+                # 权限
+                "scope": 'cloud_write'
+            }
+            try:
+                # logger.info(1111111,request_headers)
+                # logger.info(2222222,requests_data)
+                encoded_data = urllib.parse.urlencode(requests_data)
+                response = requests.post(
+                    url, headers=request_headers, data=encoded_data, timeout=3)
+                response_json = response.json()
+                # logger.info("1.2 获取access-token:", response_json)
+                if response_json['code'] == 0:
+                    return True, response_json['data']
+                else:
+                    logger.info("响应失败:", response_json['msg'])
+                    return False, response_json['msg']
+            except requests.exceptions.RequestException as e:
+                logger.info("请求失败:", e)
+        except Exception as e:
+            traceback.print_exc()
+            logger.info(e)
+
+    # 生成初始Authorization
+    def _generate_authorization(self):
+        try:
+            ########## 测试案例###########
+            # md5_hex_app_secret = hashlib.md5("/uUMzm79BK0RPzI8VBgomSRjngXb5/sH".encode('utf-8')).hexdigest()
+            # authorization = 'Basic ' + base64.b64encode(
+            #     ("NVOPaEwo8S61ytrZ" + ':' + md5_hex_app_secret).encode('utf-8')).decode('utf-8')
+            ############################
+            # md5 appSecret
+            md5_hex_app_secret = hashlib.md5(
+                ControlConf.aiot_appSecret.encode('utf-8')).hexdigest()
+            # 生成Authorization
+            authorization = 'Basic ' + base64.b64encode(
+                (ControlConf.aiot_appid + ':' + md5_hex_app_secret).encode('utf-8')).decode('utf-8')
+            return authorization
+        except Exception as e:
+            logger.info(e)
+            return None
+
+    # 生成password
+    def _generate_password(self):
+        try:
+            appid = ControlConf.aiot_appid
+            userid = self.machine_id
+            secure_mode = 'hmac_sign'
+            timestamp = int(time.time())
+            to_encode_str = (appid + secure_mode +
+                             str(timestamp) + userid).encode('utf-8')
+            # Base64解码userSecretKey
+            decoded_user_secret = base64.b64decode(self.user_secretkey)
+            # hmac_sha256签名
+            hmac_encode_sign = hmac.new(
+                decoded_user_secret, to_encode_str, hashlib.sha256)
+            # Base64编码
+            base64_encode_sign = base64.b64encode(
+                hmac_encode_sign.digest()).decode('utf-8')
+            # URL 编码
+            url_encoded_sign = urllib.parse.quote(base64_encode_sign, safe='')
+            password = f'appId={appid}&secureMode={secure_mode}&timestamp={timestamp}&userId={userid}&sign={url_encoded_sign}'
+            return password
+        except Exception as e:
+            logger.info(e)
+            traceback.print_exc()
+            return None
+
+    # 刷新access-token
+    def refresh_access_token(self):
+        """
+            如果后续接口返回401,则调用该接口刷新access-token
+        """
+        url = ControlConf.aiot_platform_host + '/cloud-api/oauth2/token'
+        try:
+            authorization = self._generate_authorization()
+            request_headers = {
+                'Authorization': authorization,
+                "Content-Type": "application/x-www-form-urlencoded",
+            }
+            requests_data = {
+                "grant_type": 'refresh_token',
+                "refresh_token": self.refresh_token
+            }
+            try:
+                # logger.info(1111111,request_headers)
+                # logger.info(2222222,requests_data)
+                encoded_data = urllib.parse.urlencode(requests_data)
+                response = requests.post(
+                    url, headers=request_headers, data=encoded_data, timeout=3)
+                response_json = response.json()
+                logger.info("1.3 刷新access-token:", response_json)
+                if response_json['code'] == 0:
+                    self.access_token = response_json['data']['access_token']
+                    self.refresh_token = response_json['data']['refresh_token']
+                    return True, ''
+                else:
+                    logger.info("响应失败:", response_json['msg'])
+                    return False, response_json['msg']
+            except requests.exceptions.RequestException as e:
+                logger.info("请求失败:", e)
+        except Exception as e:
+            traceback.print_exc()
+            logger.info(e)
+
+    # 连接物联平台中控系统,获取access-token
+    def connect(self):
+        # 一、获取用户密钥
+
+        connect_flag, user_secret = self._get_platform_user_secretkey()
+        if connect_flag is False:
+            self.logger.error("1.1、获取用户密钥userSecretKey失败: %s" % user_secret)
+            return
+        else:
+            # logger.info("1.1 成功获取用户密钥userSecretKey:%s" % str(user_secret))
+            self.logger.info("1.1、成功获取用户密钥userSecretKey: %s" %
+                             str(user_secret))
+            self.user_secretkey = user_secret
+            # self.logger.info("一、获取用户密钥userSecretKey:%s"%self.user_secretkey)
+        # 二、获取access-token和refresh-token
+        connect_flag, access_token_data = self._get_access_token()
+        if connect_flag is False:
+            # self.logger.error("二、获取access-token失败:%s"%access_token_data)
+            return
+        else:
+            # logger.info("1.2 成功获取access-token:%s" % str(access_token_data))
+            self.logger.info("1.2 成功获取access-token: %s" %
+                             str(access_token_data))
+            self.access_token = access_token_data['access_token']
+            self.refresh_token = access_token_data['refresh_token']
+            # self.logger.info("二、获取refresh-token:%s"%self.refresh_token)
+
+    # 获取设备列表
+    def get_device_list(self):
+        url = ControlConf.aiot_platform_host + '/cloud-api/device/list'
+        try:
+            authorization = "Bearer " + self.access_token
+            request_headers = {
+                'Authorization': authorization,
+            }
+            response = requests.get(url, headers=request_headers, timeout=3)
+            response_json = response.json()
+            logger.info("2.1 获取设备列表:", response_json)
+            if response_json['code'] == 0:
+                return True, response_json['data']
+            elif response_json['code'] == 401:
+                # 如果返回401,则刷新access-token
+                self.refresh_access_token()
+                return False, "token过期,已刷新,重新调用"
+            else:
+                logger.info("响应失败:", response_json['msg'])
+                return False, response_json['msg']
+        except requests.exceptions.RequestException as e:
+            logger.info("请求失败:", e)
+
+    # 获取产品资源
+    def get_product_resource(self, product_id=''):
+        url = ControlConf.aiot_platform_host + '/cloud-api/product/resource'
+        try:
+            authorization = "Bearer " + self.access_token
+            request_headers = {
+                'Authorization': authorization,
+            }
+            params = {
+                "productId": product_id
+            }
+            response = requests.get(
+                url, headers=request_headers, params=params, timeout=3)
+            response_json = response.json()
+            logger.info("2.2 获取产品资源:", response_json)
+            if response_json['code'] == 0:
+                return True, response_json['data']
+            elif response_json['code'] == 401:
+                # 如果返回401,则刷新access-token
+                self.refresh_access_token()
+                return False, "token过期,已刷新,重新调用"
+            else:
+                logger.info("响应失败:", response_json['msg'])
+                return False, response_json['msg']
+        except requests.exceptions.RequestException as e:
+            logger.info("请求失败:", e)
+
+    # 控制设备
+    def control_device(self, control_detailed_data, device_id='', product_id=''):
+        """
+            控制设备
+            :param device_id: 设备id
+            :param product_id: 产品id
+            :param control_detailed_data: 控制数据
+                示例:[{"resourceId":"property.power1","value":1},{"resourceId":"property.power2","value":1}]
+            :return: True,''
+        """
+        url = ControlConf.aiot_platform_host + '/cloud-api/device/control'
+        authorization = "Bearer " + self.access_token
+        headers = {
+            'Authorization': authorization,
+            'Content-Type': 'application/json',
+        }
+        requests_data = {
+            "productId": product_id,
+            "deviceId": device_id,
+            "data": control_detailed_data
+        }
+
+        def _send_request():
+            response = requests.post(
+                url, headers=headers, json=requests_data, timeout=3)
+            return response.json()
+
+        try:
+            response_json = _send_request()
+            logger.info("2.3 控制设备:", response_json)
+            if response_json['code'] == 401:
+                # 如果返回401,则刷新access-token
+                self.refresh_access_token()
+                logger.info("token过期,已刷新,重试中")
+                response_json = _send_request()
+            if response_json['code'] == 0:
+                return True, response_json
+            else:
+                logger.info("响应失败:", response_json['msg'])
+                return False, response_json
+        except requests.exceptions.RequestException as e:
+            logger.info("请求失败:", e)
+
+    # 场景列表
+    def get_scene_list(self):
+        """
+            :return: True,[{'acId': '338', 'name': '离家模式'}, {'acId': '389', 'name': '回家模式'}]
+        """
+        url = ControlConf.aiot_platform_host + '/cloud-api/scene/list'
+        try:
+            authorization = "Bearer " + self.access_token
+            requests_headers = {
+                'Authorization': authorization,
+            }
+            response = requests.get(url, headers=requests_headers, timeout=3)
+            response_json = response.json()
+            logger.info("2.4 场景列表为:", response_json)
+            if response_json['code'] == 0:
+                return True, response_json['data']
+            elif response_json['code'] == 401:
+                # 如果返回401,则刷新access-token
+                self.refresh_access_token()
+                return False, "token过期,已刷新,重新调用"
+            else:
+                logger.info("响应失败:", response_json['msg'])
+                return False, response_json['msg']
+        except Exception as e:
+            logger.info("请求失败:", e)
+            return False, e
+
+    # 对外接口,场景执行
+    def execute_scene(self, acid_name=''):
+        """
+            :param acid_name: 场景名字
+            :return: True,''
+        """
+        if self.scenes_id.get(acid_name, '') == '':
+            return False, "场景不存在"
+        acid = self.scenes_id.get(acid_name, '')
+        url = ControlConf.aiot_platform_host + '/cloud-api/scene/control'
+        authorization = "Bearer " + self.access_token
+        headers = {
+            'Authorization': authorization,
+            "Content-Type": "application/json",
+        }
+        data = {"acId": acid}
+
+        def send_request():
+            _response_json = requests.post(
+                url, headers=headers, json=data, timeout=3)
+            return _response_json.json()
+
+        try:
+            response_json = send_request()
+            # logger.info("2.5 执行场景:", response_json)
+            if response_json['code'] == 401:
+                # 如果返回401,则刷新access-token
+                self.refresh_access_token()
+                # return False, "token过期,已刷新,重新调用"
+                logger.info("token过期,已刷新,重试中...")
+                response = send_request()
+                response_json = response.json()
+            if response_json['code'] == 0:
+                return True, response_json['data']
+            else:
+                logger.info("响应失败:", response_json['msg'])
+                return False, response_json['msg']
+        except Exception as e:
+            logger.info("请求失败:", e)
+            return False, e
+
+    # 对外接口,控制设备开关
+    def control_device_power(self, device_name='', state=''):
+        """
+            对外接口,控制设备开关
+            :param device_name: 设备名称
+            :param state: 开关状态
+            :return: True,''
+        """
+        try:
+            dev_dict = self.resource_info.get(device_name, '')
+            logger.info("-----------%s-%s-----------" % (state, device_name))
+            if dev_dict == '' or state not in ['open', 'close']:
+                return False, "设备名称或状态不存在"
+            else:
+                # 控制数据
+                control_detailed_data = [
+                    {"resourceId": dev_dict['resourceId'],
+                        "value": dev_dict[state]}
+                ]
+                res, res_data = self.control_device(device_id=dev_dict['deviceId'], product_id=dev_dict['productId'],
+                                                    control_detailed_data=control_detailed_data)
+                return res, res_data
+        except Exception as e:
+            logger.info(e)
+            return False, e
+
+    def main(self, stop_event):
+        try:
+            logger.info("!!!!!!!!!!!可以控制家具!!!!!!!!!!!")
+        except Exception as e:
+            self.logger.error(f"与物联平台中控系统交互出错: {e}")
+
+
+aiot_controller = ControlAIotPlatform()
+
+if __name__ == "__main__":
+    print(home_path)
+    # 获取设备列表
+    # aiot_controller.get_device_list()
+    # 获取具体
+    # res, resdata = aiot_controller.get_product_resource("RSD00005")
+    # if res is True:
+    #     logger.info("获取资源RSD00003成功!",resdata)
+    # else:
+    #     logger.info("获取资源RSD00003失败!",resdata)
+    # aiot_controller.get_scene_list()
+    # aiot_controller.execute_scene('scene_001')
+    # 控制设备先写死测试
+    # {
+    #       'productId': 'KXYP79V2',
+    #       'deviceId': '0001200d90395efffe80c4ee',
+    #       'deviceName': '卧室灯带',
+    #       'place': '卧室',
+    #       'resourceId': 'power2',
+    #       'connected': True
+    #     }
+    # control_data = [
+    #     # {"resourceId": "power2", "value": False},
+    #     # {"resourceId": "power1", "value": False},
+    #     # 窗帘
+    #     {"resourceId": "work1.work1", "value": "2"}
+    # ]
+    # res, resdata = aiot_controller.control_device(device_id='ATARWSA40001B8D61AA720B0', product_id='RSD00005',
+    #                                               control_detailed_data=control_data)
+    # if res is True:
+    #     print("客厅筒灯!",resdata)
+    # else:
+    #     print("客厅筒灯!",resdata)

+ 1297 - 0
utils/echo_cancellation.py

@@ -0,0 +1,1297 @@
+# -*- coding: utf-8 -*-
+"""
+自我声音检测和回声消除模块
+防止系统在播放TTS时误触发语音识别
+"""
+import threading
+import time
+import numpy as np
+from typing import Optional, Callable, Dict, List, Tuple
+from utils.logger import logger
+import hashlib
+import json
+from collections import deque
+from dataclasses import dataclass
+import struct
+from config.config.echo_cancellation_conf import EchoCancellationConf
+
+
+@dataclass
+class AudioFingerprint:
+    """音频指纹数据结构"""
+    fingerprint: str
+    timestamp: float
+    duration: float
+    energy: float
+    spectral_features: List[float]
+
+
+class EchoCancellationEngine:
+    """回声消除引擎"""
+
+    def __init__(self):
+        self.is_enabled = EchoCancellationConf.ENABLE_ECHO_CANCELLATION
+        self.is_playing_tts = False
+        self._current_playing_status = False  # 添加这个属性初始化
+        self.tts_audio_fingerprints: deque = deque(
+            maxlen=EchoCancellationConf.MAX_TTS_FINGERPRINTS)
+        self.recording_buffer: deque = deque(
+            maxlen=EchoCancellationConf.MAX_RECORDING_BUFFER)
+
+        # 配置参数
+        self.sample_rate = EchoCancellationConf.SAMPLE_RATE
+        self.frame_size = EchoCancellationConf.FRAME_SIZE
+        self.overlap_ratio = EchoCancellationConf.OVERLAP_RATIO
+        self.energy_threshold = EchoCancellationConf.ENERGY_THRESHOLD
+        self.correlation_threshold = EchoCancellationConf.CORRELATION_THRESHOLD
+        self.time_window = EchoCancellationConf.TIME_WINDOW
+
+        # 状态管理
+        self._lock = threading.RLock()
+        self.last_tts_end_time = 0
+        self.tts_fade_out_duration = EchoCancellationConf.TTS_FADE_OUT_DURATION
+
+        # 频谱分析参数
+        self.fft_size = EchoCancellationConf.FFT_SIZE
+        self.mel_filters = EchoCancellationConf.MEL_FILTERS
+
+        # 用户语音检测配置
+        self.voice_detection_config = EchoCancellationConf.get_voice_detection_config()
+
+        # TTS过滤配置
+        self.tts_filtering_config = EchoCancellationConf.TTS_FILTERING.copy()
+
+        # 统计信息
+        self.stats = {
+            'total_processed': 0,
+            'echo_detected': 0,
+            'false_positives': 0,
+            'processing_time_avg': 0.0,
+            'user_interrupts_detected': 0
+        }
+
+        logger.info("🔇 回声消除引擎已初始化")
+
+        # 验证配置
+        config_errors = EchoCancellationConf.validate_config()
+        if config_errors:
+            logger.warning(f"⚠️ 配置验证发现问题: {config_errors}")
+        else:
+            logger.info("✅ 回声消除配置验证通过")
+
+    def set_tts_playing_status(self, is_playing: bool, audio_data: Optional[bytes] = None):
+        """设置TTS播放状态并记录音频指纹"""
+        with self._lock:
+            # 避免重复设置相同状态
+            if hasattr(self, '_current_playing_status') and self._current_playing_status == is_playing:
+                return
+
+            self._current_playing_status = is_playing
+            self.is_playing_tts = is_playing
+            current_time = time.time()
+
+            if is_playing:
+                # 记录TTS开始时间
+                self._tts_start_time = current_time
+
+                if audio_data:
+                    # 生成TTS音频指纹
+                    fingerprint = self._generate_audio_fingerprint(
+                        audio_data, current_time)
+                    if fingerprint:
+                        self.tts_audio_fingerprints.append(fingerprint)
+                        logger.debug(
+                            f"🎵 记录TTS音频指纹: {fingerprint.fingerprint[:16]}...")
+            else:
+                self.last_tts_end_time = current_time
+                # logger.debug("🔇 TTS播放结束,开始淡出期")
+
+    def is_echo_audio(self, audio_data: bytes) -> bool:
+        """检测音频是否为回声(自我声音)"""
+        if not self.is_enabled or not audio_data:
+            return False
+
+        start_time = time.time()
+
+        try:
+            with self._lock:
+                current_time = time.time()
+
+                # 如果正在播放TTS,采用更严格的过滤策略
+                if self.is_playing_tts:
+                    # 检查TTS播放是否刚刚开始,给音频指纹建立一些时间
+                    tts_start_time = getattr(self, '_tts_start_time', 0)
+                    if current_time - tts_start_time < 1.2:  # TTS开始1.2秒内,延长过滤时间
+                        # 直接认为是回声,给系统时间建立指纹,防止误触发打断
+                        self.stats['echo_detected'] += 1
+                        # logger.debug("🚨 TTS刚开始播放,预防性过滤音频")
+                        return True
+
+                    # 生成当前音频的指纹进行分析
+                    current_fingerprint = self._generate_audio_fingerprint(
+                        audio_data, current_time)
+                    if not current_fingerprint:
+                        # 无法生成指纹时默认认为是回声
+                        self.stats['echo_detected'] += 1
+                        # logger.debug("🚨 无法生成音频指纹,默认过滤")
+                        return True
+
+                    # TTS播放期间,采用更严格的过滤策略
+                    # 只有非常明显的用户语音才允许通过
+                    if self._is_very_strong_user_voice(current_fingerprint):
+                        # 进一步检查与TTS的差异,使用更严格的标准
+                        if self._has_extreme_difference_from_tts(current_fingerprint):
+                            # logger.debug("🎤 检测到非常强烈的用户语音特征且与TTS有极大差异,允许通过")
+                            return False
+                        else:
+                            # 即使能量很强,但与TTS差异不够大,仍然过滤
+                            self.stats['echo_detected'] += 1
+                            logger.debug("🚨 强用户语音但与TTS差异不够,仍然过滤")
+                            return True
+
+                    # 其他情况全部过滤,防止回声
+                    self.stats['echo_detected'] += 1
+                    # logger.debug("🚨 TTS播放期间严格过滤音频,防止回声")
+                    return True
+
+                # 检查是否在TTS结束后的淡出期内
+                if current_time - self.last_tts_end_time < self.tts_fade_out_duration:
+                    # 在TTS淡出期内,采用更宽松的过滤策略
+                    # 生成当前音频的指纹进行分析
+                    current_fingerprint = self._generate_audio_fingerprint(
+                        audio_data, current_time)
+                    if not current_fingerprint:
+                        # 无法生成指纹时默认过滤
+                        self.stats['echo_detected'] += 1
+                        # logger.debug("🚨 TTS淡出期内无法生成音频指纹,默认过滤")
+                        return True
+
+                    # 在淡出期内,允许明显的用户语音通过
+                    if self._has_obvious_voice_characteristics(current_fingerprint):
+                        # 检查与TTS的差异,但使用更宽松的标准
+                        if self._has_basic_difference_from_tts(current_fingerprint):
+                            # logger.debug("🎤 TTS淡出期内检测到明显用户语音,允许通过")
+                            return False
+
+                    # 其他情况仍然过滤,但记录更详细的信息
+                    self.stats['echo_detected'] += 1
+                    logger.debug(
+                        f"🚨 TTS淡出期内过滤音频(距离TTS结束 {current_time - self.last_tts_end_time:.1f}s)")
+                    return True
+
+                # 生成当前音频的指纹
+                current_fingerprint = self._generate_audio_fingerprint(
+                    audio_data, current_time)
+                if not current_fingerprint:
+                    return False
+
+                # 与最近的TTS指纹进行比较
+                is_echo = self._compare_with_tts_fingerprints(
+                    current_fingerprint)
+
+                if is_echo:
+                    self.stats['echo_detected'] += 1
+
+                # 更新统计信息
+                self.stats['total_processed'] += 1
+
+                processing_time = time.time() - start_time
+                self.stats['processing_time_avg'] = (
+                    self.stats['processing_time_avg'] * (self.stats['total_processed'] - 1) +
+                    processing_time
+                ) / self.stats['total_processed']
+
+                return is_echo
+
+        except Exception as e:
+            logger.error(f"❌ 回声检测失败: {e}")
+            # 出错时默认不认为是回声,避免影响正常功能
+            return False
+
+    def _generate_audio_fingerprint(self, audio_data: bytes, timestamp: float) -> Optional[AudioFingerprint]:
+        """生成音频指纹"""
+        try:
+            # 转换为numpy数组
+            samples = np.frombuffer(
+                audio_data, dtype=np.int16).astype(np.float32)
+            if len(samples) == 0:
+                return None
+
+            # 计算能量
+            energy = float(np.mean(samples ** 2))
+            if energy < self.energy_threshold:
+                return None
+
+            # 计算频谱特征
+            spectral_features = self._extract_spectral_features(samples)
+
+            # 生成更详细的指纹哈希 - 包含更多特征
+            feature_str = f"{energy:.2f}_{len(samples)}_{np.mean(spectral_features):.4f}_{np.std(spectral_features):.4f}"
+            fingerprint = hashlib.md5(feature_str.encode()).hexdigest()
+
+            duration = len(samples) / self.sample_rate
+
+            # 记录更详细的调试信息
+            if EchoCancellationConf.should_log_audio_fingerprints():
+                logger.debug(
+                    f"🎵 生成音频指纹: 能量={energy:.1f}, 时长={duration:.3f}s, 特征均值={np.mean(spectral_features):.3f}")
+
+            return AudioFingerprint(
+                fingerprint=fingerprint,
+                timestamp=timestamp,
+                duration=duration,
+                energy=energy,
+                spectral_features=spectral_features
+            )
+
+        except Exception as e:
+            logger.error(f"❌ 生成音频指纹失败: {e}")
+            return None
+
+    def _extract_spectral_features(self, samples: np.ndarray) -> List[float]:
+        """提取频谱特征"""
+        try:
+            # 确保样本长度足够进行FFT
+            if len(samples) < self.fft_size:
+                # 零填充
+                padded_samples = np.zeros(self.fft_size)
+                padded_samples[:len(samples)] = samples
+                samples = padded_samples
+
+            # 应用窗函数
+            windowed = samples[:self.fft_size] * np.hanning(self.fft_size)
+
+            # FFT变换
+            fft_result = np.fft.fft(windowed)
+            magnitude_spectrum = np.abs(fft_result[:self.fft_size//2])
+
+            # 计算mel频率特征
+            mel_features = self._compute_mel_features(magnitude_spectrum)
+
+            return mel_features.tolist()
+
+        except Exception as e:
+            logger.error(f"❌ 提取频谱特征失败: {e}")
+            return [0.0] * self.mel_filters
+
+    def _compute_mel_features(self, magnitude_spectrum: np.ndarray) -> np.ndarray:
+        """计算Mel频率特征"""
+        try:
+            # 简化的Mel滤波器组
+            mel_filters = np.linspace(
+                0, len(magnitude_spectrum), self.mel_filters + 2)
+            mel_features = np.zeros(self.mel_filters)
+
+            for i in range(self.mel_filters):
+                start_idx = int(mel_filters[i])
+                end_idx = int(mel_filters[i + 2])
+                if end_idx > start_idx:
+                    mel_features[i] = np.mean(
+                        magnitude_spectrum[start_idx:end_idx])
+
+            # 对数变换
+            mel_features = np.log(mel_features + 1e-10)
+
+            return mel_features
+
+        except Exception as e:
+            logger.error(f"❌ 计算Mel特征失败: {e}")
+            return np.zeros(self.mel_filters)
+
+    def _compare_with_tts_fingerprints(self, current_fingerprint: AudioFingerprint) -> bool:
+        """与TTS指纹进行比较"""
+        try:
+            current_time = current_fingerprint.timestamp
+
+            for tts_fingerprint in self.tts_audio_fingerprints:
+                # 检查时间窗口
+                time_diff = current_time - tts_fingerprint.timestamp
+                if time_diff > self.time_window:
+                    continue
+
+                # 比较指纹哈希
+                if current_fingerprint.fingerprint == tts_fingerprint.fingerprint:
+                    return True
+
+                # 比较能量和频谱特征
+                if self._is_similar_audio(current_fingerprint, tts_fingerprint):
+                    return True
+
+            return False
+
+        except Exception as e:
+            logger.error(f"❌ 指纹比较失败: {e}")
+            return False
+
+    def _is_similar_audio(self, fp1: AudioFingerprint, fp2: AudioFingerprint) -> bool:
+        """判断两个音频指纹是否相似"""
+        try:
+            # 能量相似性检查
+            energy_ratio = min(fp1.energy, fp2.energy) / \
+                max(fp1.energy, fp2.energy)
+            if energy_ratio < 0.5:
+                return False
+
+            # 频谱特征相似性检查
+            if len(fp1.spectral_features) != len(fp2.spectral_features):
+                return False
+
+            # 计算余弦相似度
+            features1 = np.array(fp1.spectral_features)
+            features2 = np.array(fp2.spectral_features)
+
+            norm1 = np.linalg.norm(features1)
+            norm2 = np.linalg.norm(features2)
+
+            if norm1 == 0 or norm2 == 0:
+                return False
+
+            cosine_similarity = np.dot(features1, features2) / (norm1 * norm2)
+
+            return cosine_similarity > self.correlation_threshold
+
+        except Exception as e:
+            logger.error(f"❌ 音频相似性计算失败: {e}")
+            return False
+
+    def _is_likely_user_voice(self, fingerprint: AudioFingerprint) -> bool:
+        """判断是否可能是用户语音(用于打断检测)"""
+        try:
+            # 1. 能量阈值检查 - 用户语音通常有足够的能量
+            user_voice_threshold = self.energy_threshold * \
+                self.voice_detection_config['energy_multiplier']
+            if fingerprint.energy < user_voice_threshold:
+                return False
+
+            # 2. 频谱特征分析 - 人声有特定的频谱特征
+            features = np.array(fingerprint.spectral_features)
+
+            # 检查频谱分布是否符合人声特征
+            # 人声通常在中低频有较强的能量
+            if len(features) >= 8:
+                low_freq_energy = np.mean(features[:4])    # 低频部分
+                mid_freq_energy = np.mean(features[4:8])   # 中频部分
+                high_freq_energy = np.mean(features[8:]) if len(
+                    features) > 8 else 0  # 高频部分
+
+                # 人声特征:中低频能量较强,高频相对较弱
+                low_freq_weight = self.voice_detection_config['low_freq_weight']
+                if mid_freq_energy > low_freq_energy * low_freq_weight and mid_freq_energy > high_freq_energy:
+                    # 3. 与最近TTS音频的差异检查
+                    if self._has_significant_difference_from_tts(fingerprint):
+                        self.stats['user_interrupts_detected'] += 1
+                        return True
+
+            return False
+
+        except Exception as e:
+            logger.error(f"❌ 用户语音判断失败: {e}")
+            return False
+
+    def _has_significant_difference_from_tts(self, fingerprint: AudioFingerprint) -> bool:
+        """检查与TTS音频是否有显著差异"""
+        try:
+            if not self.tts_audio_fingerprints:
+                return True  # 没有TTS参考,认为是用户语音
+
+            current_time = fingerprint.timestamp
+            tts_reference_window = EchoCancellationConf.TTS_REFERENCE_WINDOW
+
+            # 找到最近的TTS指纹进行比较
+            recent_tts_fingerprints = [
+                fp for fp in self.tts_audio_fingerprints
+                if current_time - fp.timestamp < tts_reference_window
+            ]
+
+            if not recent_tts_fingerprints:
+                return True  # 没有最近的TTS参考
+
+            # 与最近的TTS指纹比较
+            check_count = self.voice_detection_config['recent_tts_check_count']
+            energy_diff_threshold = self.voice_detection_config['energy_diff_threshold']
+            spectral_diff_threshold = self.voice_detection_config['spectral_diff_threshold']
+
+            for tts_fp in recent_tts_fingerprints[-check_count:]:
+                # 能量差异检查
+                energy_diff = abs(fingerprint.energy - tts_fp.energy) / \
+                    max(fingerprint.energy, tts_fp.energy)
+                if energy_diff > energy_diff_threshold:
+                    continue
+
+                # 频谱特征差异检查
+                if len(fingerprint.spectral_features) == len(tts_fp.spectral_features):
+                    features1 = np.array(fingerprint.spectral_features)
+                    features2 = np.array(tts_fp.spectral_features)
+
+                    # 计算频谱差异
+                    spectral_diff = np.mean(np.abs(features1 - features2))
+                    if spectral_diff < spectral_diff_threshold:
+                        return False  # 频谱过于相似,可能是回声
+
+            return True  # 与TTS有显著差异,可能是用户语音
+
+        except Exception as e:
+            logger.error(f"❌ TTS差异检查失败: {e}")
+            return True  # 出错时倾向于认为是用户语音
+
+    def _update_processing_time(self, processing_time: float):
+        """更新处理时间统计"""
+        if self.stats['total_processed'] > 0:
+            alpha = 0.1  # 平滑因子
+            self.stats['processing_time_avg'] = (
+                alpha * processing_time +
+                (1 - alpha) * self.stats['processing_time_avg']
+            )
+        else:
+            self.stats['processing_time_avg'] = processing_time
+
+    def cleanup_old_fingerprints(self):
+        """清理过期的指纹"""
+        try:
+            with self._lock:
+                current_time = time.time()
+
+                # 清理过期的TTS指纹
+                while (self.tts_audio_fingerprints and
+                       current_time - self.tts_audio_fingerprints[0].timestamp > self.time_window):
+                    self.tts_audio_fingerprints.popleft()
+
+        except Exception as e:
+            logger.error(f"❌ 清理指纹失败: {e}")
+
+    def get_stats(self) -> Dict:
+        """获取统计信息"""
+        with self._lock:
+            total = self.stats['total_processed']
+            return {
+                'total_processed': total,
+                'echo_detected': self.stats['echo_detected'],
+                'user_interrupts_detected': self.stats['user_interrupts_detected'],
+                'echo_detection_rate': self.stats['echo_detected'] / max(total, 1),
+                'interrupt_success_rate': self.stats['user_interrupts_detected'] / max(total, 1),
+                'processing_time_avg_ms': self.stats['processing_time_avg'] * 1000,
+                'fingerprints_stored': len(self.tts_audio_fingerprints),
+                'config': {
+                    'is_enabled': self.is_enabled,
+                    'interrupt_during_playback': EchoCancellationConf.ENABLE_INTERRUPT_DURING_PLAYBACK,
+                    'energy_threshold': self.energy_threshold,
+                    'user_voice_threshold': self.energy_threshold * self.voice_detection_config['energy_multiplier']
+                }
+            }
+
+    def enable(self):
+        """启用回声消除"""
+        self.is_enabled = True
+        logger.info("✅ 回声消除已启用")
+
+    def disable(self):
+        """禁用回声消除"""
+        self.is_enabled = False
+        logger.info("❌ 回声消除已禁用")
+
+    def _is_tts_variant_audio(self, fingerprint: AudioFingerprint) -> bool:
+        """检测是否为TTS音频的变种(经过扬声器-麦克风传输后的音频)"""
+        try:
+            if not self.tts_audio_fingerprints:
+                return False
+
+            current_time = fingerprint.timestamp
+            detection_window = self.tts_filtering_config['variant_detection_window']
+
+            # 检查最近的TTS指纹
+            recent_tts_fingerprints = [
+                fp for fp in self.tts_audio_fingerprints
+                if current_time - fp.timestamp < detection_window
+            ]
+
+            if not recent_tts_fingerprints:
+                return False
+
+            energy_range = self.tts_filtering_config['energy_attenuation_range']
+            similarity_threshold = self.tts_filtering_config['variant_similarity_threshold']
+            correlation_threshold = self.tts_filtering_config['frequency_correlation_threshold']
+
+            # 检查所有最近的TTS指纹,而不仅仅是最后几个
+            for tts_fp in recent_tts_fingerprints:
+                # 1. 时间相关性检查 - 播放开始后短时间内的音频很可能是回声
+                time_diff = current_time - tts_fp.timestamp
+                if time_diff < 1.0:  # 1秒内
+
+                    # 2. 能量衰减检查 - 扬声器播放的音频通过麦克风录制会有能量衰减
+                    if tts_fp.energy > 0:  # 避免除零
+                        energy_ratio = fingerprint.energy / tts_fp.energy
+                        if energy_range[0] <= energy_ratio <= energy_range[1]:
+
+                            # 3. 频谱形状相似性检查
+                            if len(fingerprint.spectral_features) == len(tts_fp.spectral_features):
+                                features1 = np.array(
+                                    fingerprint.spectral_features)
+                                features2 = np.array(tts_fp.spectral_features)
+
+                                # 归一化频谱特征以消除能量差异的影响
+                                if np.linalg.norm(features1) > 0 and np.linalg.norm(features2) > 0:
+                                    features1_norm = features1 / \
+                                        np.linalg.norm(features1)
+                                    features2_norm = features2 / \
+                                        np.linalg.norm(features2)
+
+                                    # 计算归一化后的相似度
+                                    similarity = np.dot(
+                                        features1_norm, features2_norm)
+
+                                    # 如果归一化后的相似度高,很可能是TTS音频的变种
+                                    if similarity > similarity_threshold:
+                                        if EchoCancellationConf.should_log_detection_details():
+                                            logger.debug(
+                                                f"🚨 TTS变种检测: 时间差={time_diff:.3f}s, 能量比={energy_ratio:.3f}, 相似度={similarity:.3f}")
+                                        return True
+
+                # 4. 频率分布相关性检查
+                if self._has_similar_frequency_distribution(fingerprint, tts_fp, correlation_threshold):
+                    if EchoCancellationConf.should_log_detection_details():
+                        logger.debug(f"🚨 TTS变种检测: 频率分布相似,时间差={time_diff:.3f}s")
+                    return True
+
+            return False
+
+        except Exception as e:
+            logger.error(f"❌ TTS变种检测失败: {e}")
+            return False
+
+    def _has_similar_frequency_distribution(self, fp1: AudioFingerprint, fp2: AudioFingerprint, threshold: float = 0.5) -> bool:
+        """检查两个音频指纹是否有相似的频率分布"""
+        try:
+            if len(fp1.spectral_features) != len(fp2.spectral_features):
+                return False
+
+            features1 = np.array(fp1.spectral_features)
+            features2 = np.array(fp2.spectral_features)
+
+            # 计算频率分布的相关系数
+            if len(features1) > 1:
+                correlation = np.corrcoef(features1, features2)[0, 1]
+                return not np.isnan(correlation) and correlation > threshold
+
+            return False
+
+        except Exception as e:
+            logger.error(f"❌ 频率分布比较失败: {e}")
+            return False
+
+    def _is_definitely_user_voice(self, fingerprint: AudioFingerprint) -> bool:
+        """严格判断是否为确定的用户语音(用于播放中的打断检测)"""
+        try:
+            # 1. 更高的能量阈值 - 用户打断时通常会更大声
+            energy_multiplier = self.tts_filtering_config['definite_voice_energy_multiplier']
+            high_energy_threshold = self.energy_threshold * \
+                self.voice_detection_config['energy_multiplier'] * \
+                energy_multiplier
+            if fingerprint.energy < high_energy_threshold:
+                return False
+
+            # 2. 严格的频谱特征分析
+            features = np.array(fingerprint.spectral_features)
+
+            if len(features) >= 8:
+                low_freq_energy = np.mean(features[:4])    # 低频部分
+                mid_freq_energy = np.mean(features[4:8])   # 中频部分
+                high_freq_energy = np.mean(features[8:]) if len(
+                    features) > 8 else 0  # 高频部分
+
+                # 人声特征检查 - 更严格的标准
+                # 中频能量应该明显高于低频和高频
+                if not (mid_freq_energy > low_freq_energy * 0.8 and
+                        mid_freq_energy > high_freq_energy * 1.2):
+                    return False
+
+                # 3. 检查频谱的动态范围 - 人声通常有较大的动态范围
+                min_spectral_range = self.tts_filtering_config['min_spectral_range']
+                spectral_range = np.max(features) - np.min(features)
+                if spectral_range < min_spectral_range:
+                    return False
+
+            # 4. 与所有TTS音频的差异检查 - 必须与所有TTS音频都有显著差异
+            if not self._has_significant_difference_from_all_tts(fingerprint):
+                return False
+
+            # 5. 检查音频的复杂度 - 人声通常比TTS更复杂
+            if not self._has_sufficient_complexity(fingerprint):
+                return False
+
+            # 所有检查都通过,认为是确定的用户语音
+            self.stats['user_interrupts_detected'] += 1
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 确定用户语音判断失败: {e}")
+            return False
+
+    def _has_significant_difference_from_all_tts(self, fingerprint: AudioFingerprint) -> bool:
+        """检查与所有TTS音频是否都有显著差异"""
+        try:
+            if not self.tts_audio_fingerprints:
+                return True
+
+            current_time = fingerprint.timestamp
+            max_similarity = self.tts_filtering_config['max_similarity_with_tts']
+
+            # 检查所有最近的TTS指纹
+            recent_tts_fingerprints = [
+                fp for fp in self.tts_audio_fingerprints
+                if current_time - fp.timestamp < 3.0  # 3秒内的所有TTS
+            ]
+
+            if not recent_tts_fingerprints:
+                return True
+
+            # 必须与所有TTS音频都有显著差异
+            for tts_fp in recent_tts_fingerprints:
+                # 能量差异检查 - 更严格
+                energy_ratio = min(fingerprint.energy, tts_fp.energy) / \
+                    max(fingerprint.energy, tts_fp.energy)
+                if energy_ratio > 0.7:  # 能量过于相似
+                    return False
+
+                # 频谱相似度检查 - 更严格
+                if len(fingerprint.spectral_features) == len(tts_fp.spectral_features):
+                    features1 = np.array(fingerprint.spectral_features)
+                    features2 = np.array(tts_fp.spectral_features)
+
+                    if np.linalg.norm(features1) > 0 and np.linalg.norm(features2) > 0:
+                        # 归一化比较
+                        features1_norm = features1 / np.linalg.norm(features1)
+                        features2_norm = features2 / np.linalg.norm(features2)
+                        similarity = np.dot(features1_norm, features2_norm)
+
+                        if similarity > max_similarity:  # 相似度过高
+                            return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 全TTS差异检查失败: {e}")
+            return False
+
+    def _has_sufficient_complexity(self, fingerprint: AudioFingerprint) -> bool:
+        """检查音频是否有足够的复杂度(人声特征)"""
+        try:
+            features = np.array(fingerprint.spectral_features)
+            min_variation = self.tts_filtering_config['min_spectral_variation']
+
+            # 1. 频谱变化检查 - 人声通常有更多的频谱变化
+            if len(features) > 1:
+                spectral_variation = np.std(features)
+                if spectral_variation < min_variation:
+                    return False
+
+            # 2. 频谱分布检查 - 人声应该有特定的频率分布
+            if len(features) >= 6:
+                # 检查是否有明显的共振峰特征
+                # 人声通常在某些频段有能量集中
+                max_energy_idx = np.argmax(features)
+                if max_energy_idx < 2 or max_energy_idx > len(features) - 2:
+                    # 能量峰值在边缘,可能不是人声
+                    return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 复杂度检查失败: {e}")
+            return False
+
+    def _is_likely_user_voice_relaxed(self, fingerprint: AudioFingerprint) -> bool:
+        """宽松判断是否为用户语音(用于非严格模式的打断检测)"""
+        try:
+            # 1. 极严格的能量阈值 - 避免嘈杂环境误判
+            user_voice_threshold = self.energy_threshold * 5.0  # 大幅提高阈值
+            if fingerprint.energy < user_voice_threshold:
+                return False
+
+            # 2. 如果音频能量足够,需要极严格的检查
+            moderate_energy_threshold = self.energy_threshold * 20.0  # 极大幅提高阈值
+            if fingerprint.energy > moderate_energy_threshold:
+                # 能量足够时,需要通过更严格的差异检查
+                if self._has_strict_difference_from_tts(fingerprint):
+                    logger.debug(
+                        f"🎤 高能量({fingerprint.energy:.1f}),通过严格差异检查,认为是用户语音")
+                    return True
+                else:
+                    logger.debug(
+                        f"🚫 高能量({fingerprint.energy:.1f}),但未通过严格差异检查,可能是噪音")
+                    return False
+
+            # 3. 基本的频谱特征检查(更宽松)
+            features = np.array(fingerprint.spectral_features)
+
+            if len(features) >= 4:  # 降低要求
+                # 检查是否有人声的基本特征
+                if len(features) >= 6:
+                    mid_freq_energy = np.mean(features[2:5])  # 中频部分
+                    total_energy = np.mean(features)
+
+                    # 中频能量占比检查(更宽松)
+                    if mid_freq_energy > total_energy * 0.3:  # 从0.5降低到0.3
+                        # 简单的与TTS差异检查
+                        if self._has_basic_difference_from_tts(fingerprint):
+                            return True
+
+            # 4. 如果音频足够大声,需要极严格的检查
+            high_energy_threshold = self.energy_threshold * 50.0  # 提高到50.0,极严格
+            if fingerprint.energy > high_energy_threshold:
+                # 即使能量很高,也要检查与TTS的差异,并且需要更严格的条件
+                if self._has_strict_difference_from_tts(fingerprint):
+                    logger.debug(
+                        f"🎤 极高能量({fingerprint.energy:.1f})且通过严格差异检查,认为是用户语音")
+                    return True
+                else:
+                    logger.debug(
+                        f"🚫 极高能量({fingerprint.energy:.1f}),但未通过严格差异检查,可能是强噪音")
+                    return False
+
+            # 5. 时间窗口检查 - 距离TTS较远时更容易认为是用户语音
+            if self.tts_audio_fingerprints:
+                last_tts_time = max(
+                    fp.timestamp for fp in self.tts_audio_fingerprints)
+                time_since_last_tts = fingerprint.timestamp - last_tts_time
+                if time_since_last_tts > 1.5:  # 从1.0提高到1.5秒
+                    logger.debug(
+                        f"🎤 距离最后TTS较远({time_since_last_tts:.1f}s),认为是用户语音")
+                    return True
+
+            return False
+
+        except Exception as e:
+            logger.error(f"❌ 宽松用户语音判断失败: {e}")
+            # 出错时倾向于认为是用户语音,允许打断
+            return True
+
+    def _has_basic_difference_from_tts(self, fingerprint: AudioFingerprint) -> bool:
+        """基本的与TTS差异检查(更宽松)"""
+        try:
+            if not self.tts_audio_fingerprints:
+                return True  # 没有TTS参考,认为是用户语音
+
+            current_time = fingerprint.timestamp
+
+            # 检查是否在TTS播放期间
+            if self.is_playing_tts:
+                # TTS播放期间采用更严格的标准
+                # 只检查最近0.3秒内的TTS,缩短时间窗口
+                recent_tts_fingerprints = [
+                    fp for fp in self.tts_audio_fingerprints
+                    if current_time - fp.timestamp < 0.3
+                ]
+
+                if not recent_tts_fingerprints:
+                    return True  # 没有最近的TTS参考
+
+                # 与最近的TTS指纹比较(TTS播放期间更严格的标准)
+                for tts_fp in recent_tts_fingerprints[-1:]:  # 只检查最近1个
+                    # 能量差异检查(TTS播放期间更严格)
+                    energy_ratio = min(fingerprint.energy, tts_fp.energy) / \
+                        max(fingerprint.energy, tts_fp.energy)
+                    if energy_ratio > 0.6:  # TTS播放期间需要更大的能量差异
+                        logger.debug(
+                            f"🚨 TTS播放期间能量过于相似({energy_ratio:.2f}),可能是回声")
+                        return False
+
+                    # 频谱特征差异检查(TTS播放期间更严格)
+                    if len(fingerprint.spectral_features) == len(tts_fp.spectral_features):
+                        features1 = np.array(fingerprint.spectral_features)
+                        features2 = np.array(tts_fp.spectral_features)
+
+                        # 计算简单的欧氏距离
+                        distance = np.linalg.norm(features1 - features2)
+                        if distance < 0.8:  # TTS播放期间需要更大的频谱差异
+                            logger.debug(
+                                f"🚨 TTS播放期间频谱距离过小({distance:.2f}),可能是回声")
+                            return False
+
+                        # 检查相关性
+                        if np.linalg.norm(features1) > 0 and np.linalg.norm(features2) > 0:
+                            features1_norm = features1 / \
+                                np.linalg.norm(features1)
+                            features2_norm = features2 / \
+                                np.linalg.norm(features2)
+                            correlation = np.dot(
+                                features1_norm, features2_norm)
+                            if correlation > 0.5:  # TTS播放期间相关性要求更严格
+                                logger.debug(
+                                    f"🚨 TTS播放期间频谱相关性过高({correlation:.2f}),可能是回声")
+                                return False
+
+                return True  # 通过TTS播放期间的严格检查
+            else:
+                # 非TTS播放期间采用原来的宽松标准
+                # 只检查最近0.5秒内的TTS,缩短时间窗口
+                recent_tts_fingerprints = [
+                    fp for fp in self.tts_audio_fingerprints
+                    if current_time - fp.timestamp < 0.5
+                ]
+
+                if not recent_tts_fingerprints:
+                    return True  # 没有最近的TTS参考
+
+                # 与最近的TTS指纹比较(非常宽松的标准)
+                for tts_fp in recent_tts_fingerprints[-1:]:  # 只检查最近1个
+                    # 能量差异检查(更宽松)
+                    energy_ratio = min(fingerprint.energy, tts_fp.energy) / \
+                        max(fingerprint.energy, tts_fp.energy)
+                    if energy_ratio > 0.8:  # 从0.5提高到0.8,需要更相似才认为可疑
+
+                        # 频谱特征差异检查(更宽松)
+                        if len(fingerprint.spectral_features) == len(tts_fp.spectral_features):
+                            features1 = np.array(fingerprint.spectral_features)
+                            features2 = np.array(tts_fp.spectral_features)
+
+                            # 计算简单的欧氏距离
+                            distance = np.linalg.norm(features1 - features2)
+                            if distance < 0.5:  # 从1.0降低到0.5,需要更相似才认为是回声
+                                logger.debug(f"🚨 频谱距离过小({distance:.2f}),可能是回声")
+                                return False
+
+                return True  # 通过基本检查,认为是用户语音
+
+        except Exception as e:
+            logger.error(f"❌ 基本TTS差异检查失败: {e}")
+            return True  # 出错时倾向于认为是用户语音
+
+    def _has_strict_difference_from_tts(self, fingerprint: AudioFingerprint) -> bool:
+        """严格的与TTS差异检查(用于高能量音频)"""
+        try:
+            if not self.tts_audio_fingerprints:
+                return True  # 没有TTS参考,认为是用户语音
+
+            current_time = fingerprint.timestamp
+
+            # 检查最近1秒内的TTS,时间窗口更严格
+            recent_tts_fingerprints = [
+                fp for fp in self.tts_audio_fingerprints
+                if current_time - fp.timestamp < 1.0
+            ]
+
+            if not recent_tts_fingerprints:
+                return True  # 没有最近的TTS参考
+
+            # 与最近的TTS指纹比较(非常严格的标准)
+            for tts_fp in recent_tts_fingerprints:
+                # 1. 时间相关性检查 - 如果在TTS播放后很短时间内出现,更可能是回声
+                time_diff = current_time - tts_fp.timestamp
+                if time_diff < 0.3:  # 300ms内
+                    logger.debug(f"🚫 时间过近({time_diff:.2f}s),可能是回声")
+                    return False
+
+                # 2. 能量差异检查(非常严格)
+                energy_ratio = min(fingerprint.energy, tts_fp.energy) / \
+                    max(fingerprint.energy, tts_fp.energy)
+                if energy_ratio > 0.9:  # 需要能量差异很大才认为不是回声
+                    logger.debug(f"🚫 能量过于相似({energy_ratio:.2f}),可能是回声")
+                    return False
+
+                # 3. 频谱特征差异检查(非常严格)
+                if len(fingerprint.spectral_features) == len(tts_fp.spectral_features):
+                    features1 = np.array(fingerprint.spectral_features)
+                    features2 = np.array(tts_fp.spectral_features)
+
+                    # 计算欧氏距离和相关性
+                    distance = np.linalg.norm(features1 - features2)
+                    if distance < 0.3:  # 需要频谱差异很大
+                        logger.debug(f"🚫 频谱过于相似({distance:.2f}),可能是回声")
+                        return False
+
+                    # 检查相关性
+                    if np.linalg.norm(features1) > 0 and np.linalg.norm(features2) > 0:
+                        features1_norm = features1 / np.linalg.norm(features1)
+                        features2_norm = features2 / np.linalg.norm(features2)
+                        correlation = np.dot(features1_norm, features2_norm)
+                        if correlation > 0.7:  # 相关性过高
+                            logger.debug(f"🚫 频谱相关性过高({correlation:.2f}),可能是回声")
+                            return False
+
+            return True  # 通过严格检查,认为是用户语音
+
+        except Exception as e:
+            logger.error(f"❌ 严格TTS差异检查失败: {e}")
+            return False  # 出错时倾向于认为是回声,减少误判
+
+    def _is_definitely_echo(self, fingerprint: AudioFingerprint) -> bool:
+        """确定判断是否为回声(用于非严格模式)"""
+        try:
+            if not self.tts_audio_fingerprints:
+                return False  # 没有TTS参考,不能确定是回声
+
+            current_time = fingerprint.timestamp
+
+            # 检查最近的TTS指纹
+            recent_tts_fingerprints = [
+                fp for fp in self.tts_audio_fingerprints
+                if current_time - fp.timestamp < 1.5  # 1.5秒内的TTS
+            ]
+
+            if not recent_tts_fingerprints:
+                return False  # 没有最近的TTS参考
+
+            for tts_fp in recent_tts_fingerprints:
+                # 1. 时间相关性检查 - 如果在TTS播放后很短时间内出现
+                time_diff = current_time - tts_fp.timestamp
+                if time_diff < 0.3:  # 300ms内
+
+                    # 2. 能量衰减检查 - 符合扬声器到麦克风的衰减特征
+                    energy_ratio = fingerprint.energy / tts_fp.energy
+                    if 0.1 <= energy_ratio <= 0.7:  # 能量衰减10%-70%
+
+                        # 3. 频谱相似性检查 - 高度相似
+                        if len(fingerprint.spectral_features) == len(tts_fp.spectral_features):
+                            features1 = np.array(fingerprint.spectral_features)
+                            features2 = np.array(tts_fp.spectral_features)
+
+                            if np.linalg.norm(features1) > 0 and np.linalg.norm(features2) > 0:
+                                # 归一化比较
+                                features1_norm = features1 / \
+                                    np.linalg.norm(features1)
+                                features2_norm = features2 / \
+                                    np.linalg.norm(features2)
+                                similarity = np.dot(
+                                    features1_norm, features2_norm)
+
+                                # 如果相似度很高,几乎确定是回声
+                                if similarity > 0.8:
+                                    return True
+
+                # 4. 完全匹配检查
+                if fingerprint.fingerprint == tts_fp.fingerprint:
+                    return True
+
+            return False  # 不能确定是回声
+
+        except Exception as e:
+            logger.error(f"❌ 确定回声判断失败: {e}")
+            return False  # 出错时不确定是回声
+
+    def _trigger_interrupt(self):
+        """已移除:不再触发打断信号,语音正常播放进行消音"""
+        try:
+            # 防止重复触发
+            current_time = time.time()
+            if hasattr(self, '_last_trigger_time') and current_time - self._last_trigger_time < 0.5:
+                logger.debug("🔇 打断信号防抖:跳过重复触发")
+                return
+
+            self._last_trigger_time = current_time
+
+            # 不再创建待验证的打断请求,只进行消音处理
+            logger.debug("🔇 回声消除检测到音频,进行消音处理但不打断播放")
+
+        except Exception as e:
+            logger.error(f"❌ 消音处理失败: {e}")
+            import traceback
+            logger.error(f"消音处理异常详情: {traceback.format_exc()}")
+
+    def _is_very_strong_user_voice(self, fingerprint: AudioFingerprint) -> bool:
+        """
+        检测是否为非常强的用户语音
+        用于在TTS播放期间让明显的用户语音通过,触发VAD和IAT
+        """
+        try:
+            # 1. 适中的能量阈值 - 让更多用户语音通过
+            high_energy_threshold = self.energy_threshold * 3.0  # 降低到3倍基础阈值
+            if fingerprint.energy < high_energy_threshold:
+                return False
+
+            # 2. 检查是否有明显的人声特征
+            if not self._has_obvious_voice_characteristics(fingerprint):
+                return False
+
+            # 3. 与TTS音频有足够差异
+            if not self._has_significant_difference_from_tts(fingerprint):
+                return False
+
+            # logger.debug(f"🔊 检测到强用户语音,能量: {fingerprint.energy:.1f}")
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 强用户语音检测失败: {e}")
+            return False
+
+    def _has_extreme_difference_from_tts(self, fingerprint: AudioFingerprint) -> bool:
+        """检查与TTS音频是否有极大差异"""
+        try:
+            if not self.tts_audio_fingerprints:
+                return True
+
+            current_time = fingerprint.timestamp
+
+            # 检查最近的TTS指纹
+            recent_tts_fingerprints = [
+                fp for fp in self.tts_audio_fingerprints
+                if current_time - fp.timestamp < 3.0  # 3秒内的TTS,延长检查时间
+            ]
+
+            if not recent_tts_fingerprints:
+                return True
+
+            # 必须与所有TTS音频都有极大的能量差异
+            for tts_fp in recent_tts_fingerprints:
+                energy_ratio = min(fingerprint.energy, tts_fp.energy) / \
+                    max(fingerprint.energy, tts_fp.energy)
+                if energy_ratio > 0.3:  # 能量差异阈值降低到0.3,更严格
+                    return False
+
+            # 进一步检查频谱特征差异
+            for tts_fp in recent_tts_fingerprints:
+                if self._has_similar_frequency_distribution(fingerprint, tts_fp, threshold=0.6):
+                    return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 极大TTS差异检查失败: {e}")
+            return False
+
+    def _has_obvious_voice_characteristics(self, fingerprint: AudioFingerprint) -> bool:
+        """检查是否有明显的人声特征"""
+        try:
+            features = np.array(fingerprint.spectral_features)
+
+            if len(features) < 6:
+                return False
+
+            # 简单但有效的人声检查
+            low_freq = np.mean(features[:2])    # 低频
+            mid_freq = np.mean(features[2:5])   # 中频
+            high_freq = np.mean(features[5:])   # 高频
+
+            # 放宽人声检测条件
+            # 1. 中频能量较强(主要条件)
+            if mid_freq > low_freq * 0.6 and mid_freq > high_freq * 0.6:
+                return True
+
+            # 2. 或者能量分布相对均匀(人声特征)
+            total_energy = low_freq + mid_freq + high_freq
+            if total_energy > 0:
+                mid_ratio = mid_freq / total_energy
+                if 0.25 < mid_ratio < 0.6:  # 中频占比合理范围
+                    return True
+
+            return False
+
+        except Exception as e:
+            logger.error(f"❌ 明显人声特征检测失败: {e}")
+            return False
+
+    # 已移除:_is_very_likely_user_voice_during_tts 方法(不再需要TTS期间的打断检测)
+
+    def _has_strong_voice_characteristics(self, fingerprint: AudioFingerprint) -> bool:
+        """检查是否具有强烈的人声特征"""
+        try:
+            features = np.array(fingerprint.spectral_features)
+
+            if len(features) < 8:
+                return False
+
+            # 更严格的频谱分布检查
+            low_freq = np.mean(features[:2])    # 低频
+            mid_freq = np.mean(features[2:6])   # 中频
+            high_freq = np.mean(features[6:])   # 高频
+
+            # 中频能量必须明显强于低频和高频(人声特征)
+            if mid_freq < low_freq * 1.2 or mid_freq < high_freq * 1.5:
+                return False
+
+            # 检查频谱的平滑度 - 人声通常有特定的共振峰
+            spectral_variance = np.var(features)
+            if spectral_variance < 0.1:  # 频谱过于平滑可能是TTS
+                return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 强人声特征检测失败: {e}")
+            return False
+
+    def _has_high_spectral_complexity(self, fingerprint: AudioFingerprint) -> bool:
+        """检查是否具有高频谱复杂度"""
+        try:
+            features = np.array(fingerprint.spectral_features)
+
+            if len(features) < 4:
+                return False
+
+            # 计算频谱的熵(复杂度指标)
+            normalized_features = features / \
+                np.sum(features) if np.sum(features) > 0 else features
+            entropy = -np.sum(normalized_features *
+                              np.log(normalized_features + 1e-10))
+
+            # 人声通常有较高的熵值
+            min_entropy = 2.0  # 根据实际情况调整
+            if entropy < min_entropy:
+                return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 频谱复杂度检测失败: {e}")
+            return False
+
+    def _is_possibly_user_voice_during_tts(self, fingerprint: AudioFingerprint) -> bool:
+        """
+        在TTS播放期间判断是否可能是用户语音
+        使用更宽松的标准,主要用于创建待验证的打断请求
+        """
+        try:
+            # 1. 基本能量检查 - 需要足够的能量
+            min_energy_threshold = self.energy_threshold * 2.0  # 降低能量要求
+            if fingerprint.energy < min_energy_threshold:
+                return False
+
+            # 2. 与TTS音频的基本差异检查
+            if not self._has_basic_difference_from_tts(fingerprint):
+                return False
+
+            # 3. 检查是否有人声特征
+            if not self._has_voice_characteristics(fingerprint):
+                return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ TTS期间用户语音检测失败: {e}")
+            return False
+
+    def _has_voice_characteristics(self, fingerprint: AudioFingerprint) -> bool:
+        """检查音频是否具有人声特征"""
+        try:
+            features = np.array(fingerprint.spectral_features)
+
+            if len(features) < 4:
+                return False
+
+            # 检查频谱分布 - 人声通常在中频有较强能量
+            if len(features) >= 8:
+                low_freq = np.mean(features[:3])
+                mid_freq = np.mean(features[3:7])
+                high_freq = np.mean(features[7:]) if len(features) > 7 else 0
+
+                # 中频能量应该相对较强
+                if mid_freq < low_freq * 0.5:
+                    return False
+
+            return True
+
+        except Exception as e:
+            logger.error(f"❌ 人声特征检测失败: {e}")
+            return False
+
+    # 已移除:_create_pending_interrupt_request 方法(不再需要创建待验证的打断请求)
+
+
+class SelfVoiceDetector:
+    """自我声音检测器"""
+
+    def __init__(self):
+        self.echo_engine = EchoCancellationEngine()
+        self.voice_callbacks: Dict[str, Callable] = {}
+        self._lock = threading.RLock()
+
+        # 定期清理线程
+        self._cleanup_thread = threading.Thread(
+            target=self._periodic_cleanup, daemon=True)
+        self._stop_cleanup = threading.Event()
+        self._cleanup_thread.start()
+
+        logger.info("🎯 自我声音检测器已初始化")
+
+    def register_voice_callback(self, name: str, callback: Callable):
+        """注册声音检测回调"""
+        with self._lock:
+            self.voice_callbacks[name] = callback
+            logger.info(f"📝 注册声音检测回调: {name}")
+
+    def unregister_voice_callback(self, name: str):
+        """注销声音检测回调"""
+        with self._lock:
+            if name in self.voice_callbacks:
+                del self.voice_callbacks[name]
+                logger.info(f"🗑️ 注销声音检测回调: {name}")
+
+    def set_tts_playing(self, is_playing: bool, audio_data: Optional[bytes] = None):
+        """设置TTS播放状态"""
+        self.echo_engine.set_tts_playing_status(is_playing, audio_data)
+
+        # 通知回调
+        with self._lock:
+            for name, callback in self.voice_callbacks.items():
+                try:
+                    callback('tts_status_changed', {
+                        'is_playing': is_playing,
+                        'timestamp': time.time()
+                    })
+                except Exception as e:
+                    logger.error(f"❌ 声音检测回调 {name} 执行失败: {e}")
+
+    def should_ignore_audio(self, audio_data: bytes) -> bool:
+        """判断是否应该忽略音频(回声检测)"""
+        return self.echo_engine.is_echo_audio(audio_data)
+
+    def process_recording_audio(self, audio_data: bytes) -> bool:
+        """处理录音音频,返回是否应该继续处理"""
+        if self.should_ignore_audio(audio_data):
+            # logger.debug("🚫 检测到自我声音,忽略音频数据")
+            return False
+        return True
+
+    def _periodic_cleanup(self):
+        """定期清理过期数据"""
+        while not self._stop_cleanup.is_set():
+            try:
+                self.echo_engine.cleanup_old_fingerprints()
+                time.sleep(0.1)  # 每秒清理一次
+            except Exception as e:
+                logger.error(f"❌ 定期清理失败: {e}")
+                time.sleep(1.0)  # 出错时等待更长时间
+
+    def get_detection_stats(self) -> Dict:
+        """获取检测统计信息"""
+        return self.echo_engine.get_stats()
+
+    def enable_echo_cancellation(self):
+        """启用回声消除"""
+        self.echo_engine.enable()
+
+    def disable_echo_cancellation(self):
+        """禁用回声消除"""
+        self.echo_engine.disable()
+
+    def shutdown(self):
+        """关闭检测器"""
+        logger.info("🔄 关闭自我声音检测器...")
+        self._stop_cleanup.set()
+        if self._cleanup_thread.is_alive():
+            self._cleanup_thread.join(timeout=2.0)
+        logger.info("✅ 自我声音检测器已关闭")
+
+
+# 全局实例
+_self_voice_detector: Optional[SelfVoiceDetector] = None
+_detector_lock = threading.Lock()
+
+
+def get_self_voice_detector() -> SelfVoiceDetector:
+    """获取全局自我声音检测器实例"""
+    global _self_voice_detector
+
+    with _detector_lock:
+        if _self_voice_detector is None:
+            _self_voice_detector = SelfVoiceDetector()
+        return _self_voice_detector
+
+
+def cleanup_self_voice_detector():
+    """清理全局检测器实例"""
+    global _self_voice_detector
+
+    with _detector_lock:
+        if _self_voice_detector is not None:
+            _self_voice_detector.shutdown()
+            _self_voice_detector = None

+ 173 - 0
utils/init_system.py

@@ -0,0 +1,173 @@
+"""
+系统初始化模块
+负责机器人AI系统的初始化工作
+"""
+import platform
+import subprocess
+from utils.logger import logger
+import time
+import asyncio
+from typing import Optional
+from core.socket_client import SocketClient
+from core.xunfei.message_processor import XunfeiMessageProcessor
+from core.baidu.message_processor import BaiduMessageProcessor
+from handlers.speech_handler import SpeechHandler
+from utils.logger import setup_logger
+from utils.time_sync import sync_system_time
+from utils.load_config import load_config
+
+
+class SystemInitializer:
+    """系统初始化器"""
+
+    def __init__(self):
+        self.config = None
+        self.speech_config = None
+        self.service_type = None
+
+    def initialize_system(self):
+        """初始化整个系统"""
+        try:
+            # 设置日志
+            setup_logger()
+            logger.info("开始系统初始化...")
+
+            # 加载配置
+            self._load_configuration()
+
+            # 初始化音频系统
+            # self._init_audio_system()
+
+            # 初始化组件
+            components = self._init_components()
+
+            logger.info("系统初始化完成")
+            return components
+
+        except Exception as e:
+            logger.error(f"系统初始化失败: {e}")
+            raise
+
+    def _load_configuration(self):
+        """加载配置"""
+        try:
+            self.config = load_config()
+            self.speech_config = self.config.get('speech_recognition', {})
+            self.service_type = self.speech_config.get(
+                'service', 'xunfei_aiui')
+            logger.info(f"配置加载完成,使用服务类型: {self.service_type}")
+        except Exception as e:
+            logger.error(f"配置加载失败: {e}")
+            raise
+
+    def _init_audio_system(self):
+        """初始化音频系统"""
+        try:
+            if platform.system() == "Linux":
+                logger.info("检测到Linux系统,正在初始化音频系统...")
+
+                try:
+                    # 杀掉当前 PulseAudio 进程
+                    logger.info("正在关闭PulseAudio进程...")
+                    subprocess.run(
+                        ["pulseaudio", "--kill"],
+                        capture_output=True,
+                        text=True,
+                        timeout=5
+                    )
+                    time.sleep(1)
+
+                    # 检查是否仍有进程占用声卡
+                    result = subprocess.run(
+                        ["fuser", "-v", "/dev/snd/*"],
+                        capture_output=True,
+                        text=True
+                    )
+                    if result.stdout.strip():
+                        logger.warning("检测到音频设备仍被占用:\n" + result.stdout)
+                    else:
+                        logger.info("音频设备已释放,可由ALSA独占使用")
+
+                    # 重启 PulseAudio
+                    logger.info("正在重新启动PulseAudio...")
+                    subprocess.run(
+                        ["pulseaudio", "--start"],
+                        capture_output=True,
+                        text=True,
+                        timeout=5
+                    )
+                    logger.info("PulseAudio已重新启动")
+
+                except subprocess.TimeoutExpired:
+                    logger.warning("PulseAudio操作超时,继续执行")
+                except FileNotFoundError:
+                    logger.warning("未找到 pulseaudio 命令,请确认已安装 PulseAudio")
+                except Exception as e:
+                    logger.error(f"音频系统初始化异常: {e}")
+                    raise
+            else:
+                logger.info(f"当前系统: {platform.system()},跳过音频系统初始化")
+
+        except Exception as e:
+            logger.error(f"音频系统初始化失败: {e}")
+            raise
+
+
+    def _init_components(self):
+        """初始化组件"""
+        try:
+            logger.info("正在初始化机器人AI语音识别组件...")
+
+            components = {
+                'socket_client': None,
+                'xunfei_processor': None,
+                'speech_handler': None,
+                'baidu_processor': None
+            }
+
+            # 根据配置选择语音识别服务
+            if self.service_type == 'baidu_realtime':
+                logger.info("使用百度实时语音识别服务")
+                # 使用百度消息处理器
+                components['baidu_processor'] = BaiduMessageProcessor()
+                if components['baidu_processor'].start():
+                    logger.info("百度实时语音识别启动成功")
+                else:
+                    logger.error("百度实时语音识别启动失败")
+                    raise Exception("百度实时语音识别启动失败")
+            elif self.service_type == 'xunfei_aiui':
+                logger.info("使用讯飞AIUI语音识别服务")
+                # 初始化语音处理器(工厂模式)
+                components['speech_handler'] = SpeechHandler()
+                # 初始化Socket客户端(用于讯飞AIUI)
+                components['socket_client'] = SocketClient()
+                # 初始化消息处理器
+                components['xunfei_processor'] = XunfeiMessageProcessor(
+                    components['socket_client'])
+
+            logger.info("机器人AI语音识别组件初始化完成")
+            return components
+
+        except Exception as e:
+            logger.error(f"组件初始化失败: {e}")
+            raise
+
+    async def sync_time(self):
+        """同步系统时间"""
+        try:
+            logger.info("正在同步系统时间...")
+            # 在事件循环中运行同步函数
+            loop = asyncio.get_event_loop()
+            result = await loop.run_in_executor(None, sync_system_time)
+            if result:
+                logger.info("系统时间同步成功")
+            else:
+                logger.warning("系统时间同步失败,继续运行")
+        except Exception as e:
+            logger.error(f"时间同步异常: {e}")
+
+
+def initialize_robot_system():
+    """初始化机器人系统的便捷函数"""
+    initializer = SystemInitializer()
+    return initializer.initialize_system()

+ 36 - 0
utils/load_config.py

@@ -0,0 +1,36 @@
+from utils.logger import logger
+
+import yaml
+from typing import Dict, Any, Optional
+
+# 配置常量
+CONFIG_PATH = 'config/config.yaml'
+
+# 加载配置
+
+
+def load_config() -> Dict[str, Any]:
+    """加载配置文件"""
+    try:
+        with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
+            return yaml.safe_load(f)
+    except Exception as e:
+        logger.error(f"加载配置文件失败: {e}")
+        return {}
+
+
+def get_pc2_url(endpoint: str) -> str:
+    """
+    获取PC2 API URL
+    :param endpoint: 端点名称 (robot_action/robot_waypoint/robot_cmd_action)
+    :return: 完整的API URL
+    """
+    config = load_config()
+    pc2_config = config.get('pc2', {})
+    return f"{pc2_config.get('base_url', '')}{pc2_config.get('endpoints', {}).get(endpoint, '')}"
+
+
+def get_pc2_timeout() -> int:
+    """获取PC2 API超时时间"""
+    config = load_config()
+    return config.get('pc2', {}).get('timeout', 30)

+ 87 - 0
utils/logger.py

@@ -0,0 +1,87 @@
+"""
+统一日志工具模块
+使用loguru实现按天生成日志文件,统一项目日志格式
+"""
+import sys
+from pathlib import Path
+from loguru import logger
+from config.config.settings import config
+
+# 全局logger实例
+_global_logger = None
+
+
+def setup_logger(name='robot_ai', log_dir='logs'):
+    """
+    设置统一的日志记录器
+
+    Args:
+        name: 日志记录器名称
+        log_dir: 日志文件目录
+
+    Returns:
+        logger: 配置好的日志记录器
+    """
+    global _global_logger
+
+    # 创建日志目录
+    log_path = Path(log_dir)
+    log_path.mkdir(exist_ok=True)
+
+    # 移除默认的处理器
+    logger.remove()
+
+    # 添加控制台处理器
+    logger.add(
+        sys.stdout,
+        format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+        level=config.LOG_LEVEL,
+        colorize=True
+    )
+
+    # 添加文件处理器(按天轮转)
+    log_file = log_path / f"{name}.log"
+    logger.add(
+        log_file,
+        format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}",
+        level=config.LOG_LEVEL,
+        rotation="00:00",  # 每天午夜轮转
+        retention="30 days",  # 保留30天
+        compression="zip",  # 压缩旧日志
+        encoding="utf-8",
+        enqueue=True  # 异步写入
+    )
+
+    _global_logger = logger
+    return logger
+
+
+def get_logger(name=None):
+    """
+    获取日志记录器
+
+    Args:
+        name: 模块名称,如果为None则使用调用模块的名称
+
+    Returns:
+        logger: 日志记录器
+    """
+    if name is None:
+        # 获取调用模块的名称
+        import inspect
+        frame = inspect.currentframe().f_back
+        name = frame.f_globals.get('__name__', 'robot_ai')
+
+    # 如果还没有初始化,先初始化
+    if _global_logger is None:
+        setup_logger()
+
+    # 返回绑定模块名的logger
+    return _global_logger.bind(name=name)
+
+
+# 初始化根日志记录器
+setup_logger('robot_ai', 'logs')
+
+# 导出统一的logger接口 - 直接导出loguru的logger
+__all__ = ['setup_logger', 'get_logger', 'logger']

+ 45 - 0
utils/network.py

@@ -0,0 +1,45 @@
+"""
+网络工具模块
+"""
+import subprocess
+from utils.logger import logger
+from typing import Optional
+
+
+def ping_host(host: str) -> bool:
+    """
+    检测主机连通性
+
+    Args:
+        host: 目标主机IP
+
+    Returns:
+        bool: 是否可达
+    """
+    try:
+        response = subprocess.run(
+            ["ping", "-c", "1", host],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=5
+        )
+        return response.returncode == 0
+    except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
+        logger.error(f"Ping检测失败: {e}")
+        return False
+
+
+def check_network_connectivity(host: str) -> Optional[str]:
+    """
+    检查网络连接状态
+
+    Args:
+        host: 目标主机
+
+    Returns:
+        str: 连接状态描述,失败时返回None
+    """
+    if ping_host(host):
+        return f"网络连接正常: {host}"
+    else:
+        return None

+ 149 - 0
utils/pc2_requests.py

@@ -0,0 +1,149 @@
+"""
+Author: zhaoyong 77912776@qq.com
+Date: 2025-08-23 14:37:39
+LastEditTime: 2025-08-23 20:43:45
+LastEditors: zhaoyong 77912776@qq.com
+FilePath: robot_ai/utils/pc2_requests.py
+Description: PC2网络请求模块 - 异步版本
+"""
+import time
+import aiohttp
+import asyncio
+import threading
+from utils.logger import logger
+
+# 延迟加载配置,避免循环导入
+
+
+def _get_config():
+    from config.config.settings import config
+    return config
+
+
+def _get_api_urls():
+    config = _get_config()
+    return config.get_pc2_url('qa_callback'), config.get_pc2_url('led')
+
+
+API_QA_CALLBACK_URL, API_LED_URL = _get_api_urls()
+
+# 防重复调用机制
+_last_qa_request = {"time": 0, "content": ""}
+_last_led_request = {"time": 0, "mode": "", "color": ""}
+_REQUEST_INTERVAL = 0.5  # 请求间隔(秒)
+
+
+async def _send_qa_task_async(result):
+    """将结果异步发送到外部接口--欣网前端"""
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": "Bearer your_access_token"
+    }
+
+    logger.info(f"开始识别内容 result: {result}")
+
+    # 组织 payload
+    payload = {}
+    if result.get("question"):
+        payload = {
+            "question": result["question"],
+            "time": time.time(),
+        }
+    elif result.get("result"):
+        payload = {
+            "answer": result["result"],
+            "time": time.time(),
+        }
+
+    logger.info(f"准备发送识别结果到 QA callBack 接口: {payload}")
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(API_QA_CALLBACK_URL, json=payload, headers=headers, timeout=5) as response:
+                if response.status == 200:
+                    logger.info("结果已成功发送到 QA callBack 接口")
+                    return True
+                else:
+                    text = await response.text()
+                    logger.error(f"接口返回错误状态码: {response.status}, 响应内容: {text}")
+                    return False
+    except Exception as e:
+        logger.error(f"QA callBack 接口请求失败: {e}")
+        return False
+
+
+async def _send_led_color_task_async(mode, color):
+    """异步改变灯带颜色"""
+    payload = {"mode": mode, "color": color}
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(API_LED_URL, json=payload, timeout=5) as response:
+                if response.status == 200:
+                    logger.info("灯带颜色设置成功")
+                    return True
+                else:
+                    text = await response.text()
+                    logger.error(
+                        f"灯带颜色设置失败,状态码: {response.status}, 响应: {text}")
+                    return False
+    except aiohttp.ClientConnectionError:
+        logger.error("LED 接口调用失败,连接错误")
+        return False
+    except Exception as e:
+        logger.error(f"LED 接口调用失败: {e}")
+        return False
+
+
+def _send_qa_task(self, result):
+    """同步包装器,用于兼容现有代码"""
+    global _last_qa_request
+
+    # 防重复调用检查
+    current_time = time.time()
+    content_hash = str(result)
+
+    # 如果内容相同且时间间隔太短,跳过请求
+    if (_last_qa_request["content"] == content_hash and
+            current_time - _last_qa_request["time"] < _REQUEST_INTERVAL):
+        logger.debug(f"跳过重复的QA请求: {result}")
+        return True
+
+    # 更新最后请求记录
+    _last_qa_request["time"] = current_time
+    _last_qa_request["content"] = content_hash
+
+    def run_async():
+        asyncio.run(_send_qa_task_async(result))
+
+    # 在后台线程中运行异步函数,不阻塞主流程
+    thread = threading.Thread(target=run_async, daemon=True)
+    thread.start()
+    return True
+
+
+def _send_led_color_task(self, mode, color):
+    """同步包装器,用于兼容现有代码"""
+    global _last_led_request
+
+    # 防重复调用检查
+    current_time = time.time()
+
+    # 如果参数相同且时间间隔太短,跳过请求
+    if (_last_led_request["mode"] == mode and
+        _last_led_request["color"] == color and
+            current_time - _last_led_request["time"] < _REQUEST_INTERVAL):
+        logger.debug(f"跳过重复的LED请求: mode={mode}, color={color}")
+        return True
+
+    # 更新最后请求记录
+    _last_led_request["time"] = current_time
+    _last_led_request["mode"] = mode
+    _last_led_request["color"] = color
+
+    def run_async():
+        asyncio.run(_send_led_color_task_async(mode, color))
+
+    # 在后台线程中运行异步函数,不阻塞主流程
+    thread = threading.Thread(target=run_async, daemon=True)
+    thread.start()
+    return True

+ 162 - 0
utils/time_sync.py

@@ -0,0 +1,162 @@
+"""
+时间同步工具模块
+"""
+import subprocess
+from utils.logger import logger
+import platform
+import time
+from typing import Optional
+
+
+class TimeSync:
+    """时间同步类"""
+
+    def __init__(self):
+        self.ntp_servers = [
+            "ntp.aliyun.com",
+            "ntp1.aliyun.com",
+            "ntp2.aliyun.com",
+            "time.windows.com",
+            "pool.ntp.org"
+        ]
+
+    def sync_time(self) -> bool:
+        """
+        同步系统时间
+
+        Returns:
+            bool: 同步是否成功
+        """
+        try:
+            logger.info("开始同步系统时间...")
+
+            # 检测操作系统
+            system = platform.system()
+
+            if system == "Linux":
+                return self._sync_linux()
+            elif system == "Windows":
+                return self._sync_windows()
+            else:
+                logger.warning(f"不支持的操作系统: {system}")
+                return False
+
+        except Exception as e:
+            logger.error(f"时间同步失败: {e}")
+            return False
+
+    def _sync_linux(self) -> bool:
+        """Linux系统时间同步"""
+        for server in self.ntp_servers:
+            try:
+                logger.info(f"尝试从 {server} 同步时间...")
+
+                # 使用echo命令自动输入密码
+                cmd = f'echo "12345678" | sudo -S ntpdate {server}'
+                result = subprocess.run(
+                    cmd,
+                    shell=True,
+                    capture_output=True,
+                    text=True,
+                    timeout=30
+                )
+
+                if result.returncode == 0:
+                    logger.info(f"时间同步成功: {result.stdout.strip()}")
+                    return True
+                else:
+                    logger.warning(
+                        f"从 {server} 同步失败: {result.stderr.strip()}")
+
+            except subprocess.TimeoutExpired:
+                logger.warning(f"从 {server} 同步超时")
+            except subprocess.CalledProcessError as e:
+                logger.warning(f"从 {server} 同步失败: {e}")
+            except FileNotFoundError:
+                logger.error("未找到ntpdate命令,请安装ntpdate工具")
+                return False
+            except Exception as e:
+                logger.warning(f"从 {server} 同步异常: {e}")
+
+        logger.error("所有NTP服务器同步失败")
+        return False
+
+    def _sync_windows(self) -> bool:
+        """Windows系统时间同步"""
+        try:
+            logger.info("Windows系统时间同步...")
+
+            # 使用w32tm命令同步时间
+            result = subprocess.run(
+                ["w32tm", "/resync"],
+                capture_output=True,
+                text=True,
+                timeout=30
+            )
+
+            if result.returncode == 0:
+                logger.info("Windows时间同步成功")
+                return True
+            else:
+                logger.warning(f"Windows时间同步失败: {result.stderr.strip()}")
+                return False
+
+        except subprocess.TimeoutExpired:
+            logger.warning("Windows时间同步超时")
+            return False
+        except subprocess.CalledProcessError as e:
+            logger.warning(f"Windows时间同步失败: {e}")
+            return False
+        except Exception as e:
+            logger.error(f"Windows时间同步异常: {e}")
+            return False
+
+    def get_current_time(self) -> str:
+        """
+        获取当前系统时间
+
+        Returns:
+            str: 格式化的当前时间
+        """
+        return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+
+    def check_time_drift(self) -> Optional[float]:
+        """
+        检查时间偏差(需要网络连接)
+
+        Returns:
+            Optional[float]: 时间偏差(秒),失败返回None
+        """
+        try:
+            import requests
+            import datetime
+
+            # 获取网络时间
+            response = requests.get(
+                "http://worldtimeapi.org/api/timezone/Asia/Shanghai", timeout=5)
+            if response.status_code == 200:
+                data = response.json()
+                network_time = datetime.datetime.fromisoformat(
+                    data['datetime'].replace('Z', '+00:00'))
+
+                # 获取本地时间
+                local_time = datetime.datetime.now()
+
+                # 计算时间差
+                time_diff = abs((network_time - local_time).total_seconds())
+                return time_diff
+
+        except Exception as e:
+            logger.debug(f"检查时间偏差失败: {e}")
+            return None
+
+
+def sync_system_time() -> bool:
+    """
+    同步系统时间的便捷函数
+
+    Returns:
+        bool: 同步是否成功
+    """
+    time_sync = TimeSync()
+    return time_sync.sync_time()

+ 176 - 0
utils/tts_cache.py

@@ -0,0 +1,176 @@
+"""
+TTS音频缓存管理器模块
+提供音频数据的本地缓存功能,避免重复调用TTS API
+"""
+import threading
+import time
+import hashlib
+import pickle
+from pathlib import Path
+from utils.logger import logger
+
+
+class TTSCache:
+    """TTS音频缓存管理器"""
+
+    def __init__(self, cache_file: str = "tts_cache/tts_cache.pkl"):
+        self.cache_file = Path(cache_file)
+        self.cache_file.parent.mkdir(parents=True, exist_ok=True)
+        self.cache_data = self._load_cache_data()
+        self._lock = threading.Lock()
+
+    def _load_cache_data(self):
+        """加载缓存数据"""
+        if self.cache_file.exists():
+            try:
+                with open(self.cache_file, 'rb') as f:
+                    return pickle.load(f)
+            except Exception as e:
+                logger.info(f"[缓存] 加载缓存数据失败: {e}")
+        return {}
+
+    def _save_cache_data(self):
+        """保存缓存数据"""
+        try:
+            with open(self.cache_file, 'wb') as f:
+                pickle.dump(self.cache_data, f)
+        except Exception as e:
+            logger.info(f"[缓存] 保存缓存数据失败: {e}")
+
+    def _get_cache_key(self, text: str, voice: str = None) -> str:
+        """生成缓存键"""
+        if voice is None:
+            from config.config.settings import config
+            voice = config.TTS_VOICE
+        # 使用文本内容和语音参数的哈希作为缓存键
+        cache_data = f"{text.strip()}_{voice}"
+        return hashlib.md5(cache_data.encode('utf-8')).hexdigest()
+
+    def get_cached_audio(self, text: str, voice: str = None) -> bytes:
+        """获取缓存的音频数据"""
+        with self._lock:
+            cache_key = self._get_cache_key(text, voice)
+            if cache_key in self.cache_data:
+                cache_info = self.cache_data[cache_key]
+                audio_data = cache_info.get('audio_data')
+                if audio_data:
+                    logger.info(f"[缓存] 命中缓存: {text[:20]}...")
+                    return audio_data
+                else:
+                    # 删除损坏的缓存记录
+                    del self.cache_data[cache_key]
+                    self._save_cache_data()
+
+        return None
+
+    def save_audio_cache(self, text: str, audio_data: bytes, voice: str = None):
+        """保存音频数据到缓存"""
+        with self._lock:
+            cache_key = self._get_cache_key(text, voice)
+
+            try:
+                # 更新缓存数据
+                if voice is None:
+                    from config.config.settings import config
+                    voice = config.TTS_VOICE
+                self.cache_data[cache_key] = {
+                    'text': text,
+                    'voice': voice,
+                    'audio_data': audio_data,
+                    'size': len(audio_data),
+                    'created_time': time.time()
+                }
+
+                self._save_cache_data()
+                logger.info(
+                    f"[缓存] 已缓存: {text[:20]}... (大小: {len(audio_data)} bytes)")
+
+            except Exception as e:
+                logger.info(f"[缓存] 保存缓存失败: {e}")
+
+    def clear_cache(self, max_age_days: int = 30):
+        """清理过期缓存"""
+        with self._lock:
+            current_time = time.time()
+            expired_keys = []
+
+            for cache_key, cache_info in self.cache_data.items():
+                age_days = (current_time -
+                            cache_info['created_time']) / (24 * 3600)
+                if age_days > max_age_days:
+                    expired_keys.append(cache_key)
+
+            for cache_key in expired_keys:
+                del self.cache_data[cache_key]
+
+            if expired_keys:
+                self._save_cache_data()
+                logger.info(f"[缓存] 清理了 {len(expired_keys)} 个过期缓存")
+
+    def get_cache_stats(self):
+        """获取缓存统计信息"""
+        with self._lock:
+            total_size = sum(info['size']
+                             for info in self.cache_data.values())
+            return {
+                'total_entries': len(self.cache_data),
+                'total_size_bytes': total_size,
+                'total_size_mb': total_size / (1024 * 1024)
+            }
+
+    def clear_all_cache(self):
+        """清空所有缓存"""
+        with self._lock:
+            self.cache_data.clear()
+            self._save_cache_data()
+            logger.info("[缓存] 已清空所有缓存")
+
+    def get_cache_info(self, text: str, voice: str = None):
+        """获取指定文本的缓存信息"""
+        with self._lock:
+            cache_key = self._get_cache_key(text, voice)
+            if cache_key in self.cache_data:
+                cache_info = self.cache_data[cache_key].copy()
+                # 不返回音频数据,只返回元信息
+                cache_info.pop('audio_data', None)
+                return cache_info
+        return None
+
+
+# 全局缓存实例
+_tts_cache = TTSCache()
+
+
+def get_cache_instance():
+    """获取缓存实例"""
+    return _tts_cache
+
+
+def get_cached_audio(text: str, voice: str = None) -> bytes:
+    """获取缓存的音频数据"""
+    return _tts_cache.get_cached_audio(text, voice)
+
+
+def save_audio_cache(text: str, audio_data: bytes, voice: str = None):
+    """保存音频数据到缓存"""
+    _tts_cache.save_audio_cache(text, audio_data, voice)
+
+
+def clear_cache(max_age_days: int = 30):
+    """清理过期缓存"""
+    _tts_cache.clear_cache(max_age_days)
+
+
+def get_cache_stats():
+    """获取缓存统计信息"""
+    return _tts_cache.get_cache_stats()
+
+
+def clear_all_cache():
+    """清空所有缓存"""
+    _tts_cache.clear_all_cache()
+
+
+def get_cache_info(text: str, voice: str = None):
+    """获取指定文本的缓存信息"""
+    return _tts_cache.get_cache_info(text, voice)

Some files were not shown because too many files changed in this diff