knowledge_handler.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. """
  2. 知识库处理器模块
  3. """
  4. from utils.logger import logger
  5. import json
  6. from typing import Dict, Any, Optional
  7. from utils.logger import logger
  8. class KnowledgeHandler:
  9. """知识库处理器类"""
  10. def __init__(self):
  11. """初始化知识库处理器"""
  12. self.logger = logger.bind(name=__name__)
  13. def handle_knowledge_result(self, data: Dict[str, Any]) -> bool:
  14. """
  15. 处理知识库检索结果
  16. Args:
  17. data: AIUI知识库数据
  18. Returns:
  19. bool: 处理结果
  20. """
  21. try:
  22. self.logger.info("开始处理知识库检索结果")
  23. # 提取知识库内容
  24. knowledge_content = self._extract_knowledge_content(data)
  25. if knowledge_content:
  26. self.logger.info(f"知识库检索到内容: {knowledge_content[:100]}...")
  27. # 这里可以添加TTS播放逻辑
  28. # 暂时只记录日志,后续可以集成TTS播放
  29. self.logger.info(f"知识库内容: {knowledge_content}")
  30. return True
  31. else:
  32. self.logger.warning("知识库检索结果为空")
  33. return False
  34. except Exception as e:
  35. self.logger.error(f"处理知识库结果异常: {e}")
  36. return False
  37. def _extract_knowledge_content(self, data: Dict[str, Any]) -> Optional[str]:
  38. """
  39. 从AIUI数据中提取知识库内容
  40. Args:
  41. data: AIUI数据
  42. Returns:
  43. Optional[str]: 提取的知识库内容
  44. """
  45. try:
  46. # 解析AIUI数据结构
  47. if 'content' not in data:
  48. self.logger.warning("数据中缺少content字段")
  49. return None
  50. content = data['content']
  51. # 检查是否有知识库结果
  52. if 'info' not in content:
  53. self.logger.warning("content中缺少info字段")
  54. return None
  55. info = content['info']
  56. # 检查是否有数据
  57. if 'data' not in info or not isinstance(info['data'], list):
  58. self.logger.warning("info中缺少data字段或格式不正确")
  59. return None
  60. data_list = info['data']
  61. if not data_list:
  62. self.logger.warning("data列表为空")
  63. return None
  64. # 获取第一个结果
  65. first_result = data_list[0]
  66. # 检查结果类型
  67. if 'type' not in first_result:
  68. self.logger.warning("结果中缺少type字段")
  69. return None
  70. result_type = first_result['type']
  71. if result_type == 'text':
  72. # 文本类型结果
  73. if 'content' in first_result:
  74. return first_result['content']
  75. else:
  76. self.logger.warning("文本结果中缺少content字段")
  77. return None
  78. elif result_type == 'json':
  79. # JSON类型结果
  80. if 'content' in first_result:
  81. try:
  82. json_content = json.loads(first_result['content'])
  83. return self._extract_from_json_content(json_content)
  84. except json.JSONDecodeError as e:
  85. self.logger.error(f"JSON解析失败: {e}")
  86. return None
  87. else:
  88. self.logger.warning("JSON结果中缺少content字段")
  89. return None
  90. else:
  91. self.logger.warning(f"未知的结果类型: {result_type}")
  92. return None
  93. except Exception as e:
  94. self.logger.error(f"提取知识库内容异常: {e}")
  95. return None
  96. def _extract_from_json_content(self, json_content: Dict[str, Any]) -> Optional[str]:
  97. """
  98. 从JSON内容中提取知识库文本
  99. Args:
  100. json_content: JSON内容
  101. Returns:
  102. Optional[str]: 提取的文本内容
  103. """
  104. try:
  105. # 检查是否有nested字段(知识库结果通常在这里)
  106. if 'nested' in json_content:
  107. nested = json_content['nested']
  108. if isinstance(nested, list) and len(nested) > 0:
  109. # 取第一个最相关的结果
  110. first_result = nested[0]
  111. if isinstance(first_result, dict):
  112. # 检查score阈值
  113. score = first_result.get("score", 0.0)
  114. self.logger.info(f"知识库检索score: {score}")
  115. if score > 0.5:
  116. # 提取内容
  117. knowledge_content = first_result.get("content", "")
  118. knowledge_detail = first_result.get("detail", "")
  119. knowledge_summary = first_result.get("summary", "")
  120. knowledge_title = first_result.get("title", "")
  121. # 优先使用detail,其次summary,最后content
  122. if knowledge_detail and len(knowledge_detail.strip()) > 10:
  123. return knowledge_detail
  124. elif knowledge_summary and len(knowledge_summary.strip()) > 10:
  125. return knowledge_summary
  126. else:
  127. return knowledge_content
  128. else:
  129. self.logger.info(
  130. f"知识库检索score({score:.3f}) <= 0.5,跳过知识库内容")
  131. return None
  132. else:
  133. self.logger.warning("知识库结果格式不正确")
  134. return None
  135. else:
  136. self.logger.warning("知识库nested结果为空")
  137. return None
  138. else:
  139. self.logger.warning("JSON内容中缺少nested字段")
  140. return None
  141. except Exception as e:
  142. self.logger.error(f"从JSON内容提取文本异常: {e}")
  143. return None