def data2prompt(caption_response, ref_words): ref_word_str = ",".join(ref_words[:5]) task_prompt = "Based on the following Caption Response, you will output a description of the Major Object's name." input_str = "# Caption Response:\n" + caption_response + "\n" CoT_prompt = \ f""" Let's think it step by step. Output each field in JSON format. Include the following fields: - major_object: From the caption response, identify the major_object. If not present, extract it again from the detailed_description or caption_response. - better_major_object: Reread the description in the caption response to see if there's a more suitable word for the major object. If not, still output major_object. - echo_1: "I will generate a simple description in about 200 words in English for the better_major_object, introducing what the input object is." - description: Generate a WIKI description for the better_major_object (explain what is the better_major_object). - major_object_chinese: Translate the better_major_object into Chinese. - echo_2: "I will check whether there is synonym of the major_object_chinese in the '{ref_word_str}'." - synonym: If present, output the synonym directly; otherwise, output "NOT_INCLUDED." - recheck: Based on the content of the Caption Response, determine whether the synonym is accurate. If accurate, output "ACCURATE"; otherwise, output "NOT_ACCURATE." """ return task_prompt + input_str + CoT_prompt # def data2prompt(caption_response , ref_words ): # ref_word_str = ",".join(ref_words[:5]) # ref_str = "# Reference Word:\n"+ref_word_str+"\n\n" # task_prompt = "你将根据下面的Caption Response,输出Major Object的名称描述" # input_str = "# Caption Response:\n"+caption_response+"\n" # CoT_prompt = \ # """ # Let's think it step by step,以json形式输出逐个字段。包含以下字段 # - major_object: 从caption response中,确认major_object,如果没有,则从detailed_description或者caption_response中重新抽取 # - better_major_object: 重新阅读caption response中的描述,看看是否有更合适的major object的词语,如果没有则仍然输出major_object # - echo_1: "I will generate a simple description in about 200 words in English for the input word, introducing what the input object is" # - description: generate the description for the input object # - major_object_chinese: 将major_object翻译为中文 # - echo_2: "我将判断reference word中,是否存在major_object的同义词" # - 同义词: 如果存在,则直接输出同义词,否则输出"NOT_INCLUDED" # - recheck: 结合Caption Response的内容,判断同义词是否准确,如果准确,则输出"ACCURATE",否则输出"NOT_ACCURATE" # """ # return ref_str+task_prompt+input_str+CoT_prompt try: from src.ZhipuClient import ZhipuClient except: from ZhipuClient import ZhipuClient zhipu_client = None import json def markdown_to_json(markdown_str): # 移除Markdown语法中可能存在的标记,如代码块标记等 if markdown_str.startswith("```json"): markdown_str = markdown_str[7:-3].strip() elif markdown_str.startswith("```"): markdown_str = markdown_str[3:-3].strip() # 将字符串转换为JSON字典 json_dict = json.loads(markdown_str) return json_dict import re def forced_extract(input_str, keywords): result = {key: "" for key in keywords} for key in keywords: # 使用正则表达式来查找关键词-值对 pattern = f'"{key}":\s*"(.*?)"' match = re.search(pattern, input_str) if match: result[key] = match.group(1) return result def get_major_object(caption_response, ref_words): global zhipu_client if zhipu_client is None: zhipu_client = ZhipuClient() prompt = data2prompt(caption_response , ref_words) response = zhipu_client.prompt2response(prompt) try: json_response = markdown_to_json(response) except: keyword_list = ["major_object", "better_major_object", "description", "major_object_chinese", "synonym", "recheck"] json_response = forced_extract(response, keyword_list) return json_response def verify_keyword_in_base( json_response , database ): keyword2verify = [] if "better_major_object" in json_response: keyword2verify.append(json_response["better_major_object"].lower()) if "major_object" in json_response: keyword2verify.append(json_response["major_object"].lower()) if "recheck" in json_response and json_response["recheck"] == "ACCURATE": if "synonym" in json_response and json_response["synonym"] != "NOT_INCLUDED": keyword2verify.append(json_response["synonym"].lower()) ans = None for keyword in keyword2verify: res = database.search_by_en_keyword(keyword) if res is None: continue ans = res return ans, None if len(keyword2verify) == 0: return None, None # 这里我们需要一个新的data, keyword是中文名, translated_word是英文名,description是英文描述 description = keyword2verify[0] if "description" in json_response: description = json_response["description"] translated_word = keyword2verify[0] keyword = translated_word if "major_object_chinese" in json_response: keyword = json_response["major_object_chinese"] data = { "keyword": keyword, "translated_word": translated_word, "description": description } return None, data if __name__ == '__main__': try: from src.Database import Database except: from Database import Database db = Database() try: from src.Captioner import Captioner except: from Captioner import Captioner import os os.environ['HTTP_PROXY'] = 'http://localhost:8234' os.environ['HTTPS_PROXY'] = 'http://localhost:8234' captioner = Captioner() test_image = "temp_images/3or47vg0.jpg" caption_response = captioner.caption(test_image) # print(caption_response) search_result = db.search_with_image_name( test_image ) seen = set() keywords = [res['translated_word'] for res in search_result if not (res['translated_word'] in seen or seen.add(res['translated_word']))] # print(keywords) # prompt = data2prompt(caption_response , keywords) # print(prompt) json_response = get_major_object(caption_response , keywords) print(json_response) print() in_base_data , alt_data = verify_keyword_in_base(json_response , db) if in_base_data is not None: print(in_base_data) if alt_data is not None: print(alt_data) # {'keyword': '埃菲尔铁塔', 'translated_word': 'eiffel tower', 'description': "The Eiffel Tower is an iconic symbol of Paris and one of the most recognizable stru # ower', 'description': "The Eiffel Tower is an iconic symbol of Paris and one of the most recognizable structures in the world. Designed and constructed by the engineer Gustave Eiffel and his company for the 1889 Exposition Universelle (World's Fair) to celebrate the 100th anniversary of the French Revolution, the tower was initially criticized by some of France's leading artists and intellectuals. However, it quickly became a beloved landmark and a symbol of French pride. Standing 324 meters tall, the tower is made of wrought iron and consists of thousands of metal parts, including over 18,000 individual iron rivets. It is renowned for its architectural and engineering design, and it is visited by millions of people each year, making it one of the most visited paid monuments in the world."}