qwenv.py 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. '''
  2. Author: zhaoyong 77912776@qq.com
  3. Date: 2025-08-16 16:55:13
  4. LastEditTime: 2025-08-16 18:31:31
  5. LastEditors: zhaoyong 77912776@qq.com
  6. FilePath: \robot_ai\strategies\vision\qwenv.py
  7. Description: 头部注释配置模板
  8. '''
  9. import os
  10. import dashscope
  11. from utils.logger import logger
  12. # 处理base64图片
  13. def process_base64_image(base64_pic: str = '', question: str = '描述下你看到了什么'):
  14. # 示例参照
  15. messages = [
  16. {
  17. "role": "system",
  18. "content": [
  19. {
  20. "text": "你是一个具身智能机器人,你具备详尽观察和描述环境的能力,能够准确捕捉场景中的人物、物体、颜色、动作,以及它们的空间位置和相互关系。描述内容必须从\"我面前的是\"开始,以第一人称视角表达,语句完整流畅,并保留必要的逗号和句号。禁止出现\"图中\"、\"图片中\"等类似表达,不使用markdown、列表、代码块、引号,并且不得出现多余的换行。回复字数在20~40个字符"}
  21. ]
  22. },
  23. {
  24. "role": "user",
  25. "content": [
  26. {
  27. "image": ""},
  28. {"text": "描述下你看到了什么"}]
  29. }]
  30. if base64_pic:
  31. pic_content = "data:image/jpeg;base64," + base64_pic
  32. messages[1]["content"][0]["image"] = pic_content
  33. messages[1]["content"][1]["text"] = question
  34. try:
  35. from config.config.settings import config
  36. baolian_api_key = config._config_data.get(
  37. 'baolian_api_key', 'sk-1dcf9fc95ee54c3bbb2a3fdc9ebaa49f')
  38. baolian_model = config._config_data.get(
  39. 'baolian_model', 'qwen-vl-max-latest')
  40. dashscope.api_key = baolian_api_key
  41. # 发送图片到模型
  42. response = dashscope.MultiModalConversation.call(
  43. model=baolian_model,
  44. messages=messages
  45. )
  46. # 解析响应(描述内容)
  47. result = response.output.choices[0].message.content[0]["text"]
  48. return result
  49. except Exception as e:
  50. logger.info(e)
  51. def qwen_nlp(question: str = '描述下你看到了什么'):
  52. # 示例参照
  53. messages = [
  54. {
  55. "role": "system",
  56. "content": [
  57. {
  58. "text": "你是一个具身智能机器人,快速理解意图,并用口语化的方式作出回答,语气自然、语速适中,回答简洁明了,适合语音播放,不使用markdown、列表、代码块、引号,并且不得出现多余的换行。回复字数在20~40个字符"}
  59. ]
  60. },
  61. {
  62. "role": "user",
  63. "content": "你是谁?"
  64. }]
  65. messages[1]["content"] = question
  66. try:
  67. from config.config.settings import config
  68. baolian_api_key = config._config_data.get(
  69. 'baolian_api_key', 'sk-1dcf9fc95ee54c3bbb2a3fdc9ebaa49f')
  70. baolian_nlp_model = config._config_data.get(
  71. 'baolian_nlp_model', 'qwen-plus-latest')
  72. dashscope.api_key = baolian_api_key
  73. # 发送图片到模型
  74. response = dashscope.Generation.call(
  75. model=baolian_nlp_model,
  76. messages=messages
  77. )
  78. if response and response.get("output") and response["output"].get("text"):
  79. result = response["output"]["text"]
  80. return result
  81. else:
  82. return "抱歉,我当前无法处理这个问题,请再说一遍"
  83. except Exception as e:
  84. logger.info(f"千问NLP处理异常: {e}")
  85. return "抱歉,处理出现异常"
  86. if __name__ == '__main__':
  87. pass