joaomorossini
commited on
Commit
•
23fee25
1
Parent(s):
0a08480
refactoring: create separate file for the prompt template
Browse files- app.py +5 -39
- prompt_template.py +36 -0
app.py
CHANGED
@@ -9,6 +9,7 @@ from pandas import DataFrame as PandasDataFrame
|
|
9 |
from llm import MessageChatCompletion
|
10 |
from customization import css, js
|
11 |
from examples import example_1, example_2, example_3, example_4
|
|
|
12 |
|
13 |
load_dotenv()
|
14 |
|
@@ -33,46 +34,10 @@ def build_context(row):
|
|
33 |
|
34 |
def click_button(model, api_key, abstract):
|
35 |
labels = df['Subsector'].tolist()
|
36 |
-
|
37 |
language_model = MessageChatCompletion(model=model, api_key=api_key)
|
38 |
-
system_message = (
|
39 |
-
|
40 |
-
Each subsector is defined by a unique set of characteristics:
|
41 |
-
Name: The name of the subsector.
|
42 |
-
Definition: A brief description of the subsector.
|
43 |
-
Keywords: Important words associated with the subsector.
|
44 |
-
Does include: Elements typically found within the subsector.
|
45 |
-
Does not include: Elements typically not found within the subsector.
|
46 |
-
Consider 'nan' values as 'not available' or 'not applicable'.
|
47 |
-
When classifying an abstract, provide the following:
|
48 |
-
## 1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to.
|
49 |
-
## 2. Reasoning:
|
50 |
-
### Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria.
|
51 |
-
### Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract.
|
52 |
-
### Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract.
|
53 |
-
### If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector.
|
54 |
-
## 3. Non-selected Subsectors:
|
55 |
-
- If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion.
|
56 |
-
## 4. Other Subsectors: You MUST ALWAYS SUGGEST NEW SUBSECTOR LABELS, different from the ones provided by the user. They can be new subsectors or subsets the given subsectors. REMEMBER: This is mandatory
|
57 |
-
## 5. Match Score: Inside a markdown code block, provide a PYTHON DICTIONARY containing the match scores for all existing subsector labels and for any new labels suggested in item 4. Each probability should be formatted to show two decimal places.
|
58 |
-
<context>
|
59 |
-
{contexts}
|
60 |
-
</context>
|
61 |
-
""")
|
62 |
-
|
63 |
-
|
64 |
-
user_message = f"""
|
65 |
-
Classify this patent abstract into one or more labels, then format your response as markdown:
|
66 |
-
|
67 |
-
<labels>
|
68 |
-
{labels}
|
69 |
-
</labels>
|
70 |
-
|
71 |
-
<abstract>
|
72 |
-
{abstract}
|
73 |
-
</abstract>
|
74 |
-
"""
|
75 |
-
|
76 |
language_model.new_system_message(content=system_message)
|
77 |
language_model.new_user_message(content=user_message)
|
78 |
language_model.send_message()
|
@@ -94,6 +59,7 @@ def click_button(model, api_key, abstract):
|
|
94 |
|
95 |
return match_score_dict, response_reasoning, logs_df
|
96 |
|
|
|
97 |
def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
|
98 |
selected = df.iloc[[evt.index[0]]].iloc[0]
|
99 |
name, definition, keywords, does_include, does_not_include = selected['Subsector'], selected['Definition'], selected['Keywords'], selected['Does include'], selected['Does not include']
|
|
|
9 |
from llm import MessageChatCompletion
|
10 |
from customization import css, js
|
11 |
from examples import example_1, example_2, example_3, example_4
|
12 |
+
from prompt_template import system_message_template, user_message_template
|
13 |
|
14 |
load_dotenv()
|
15 |
|
|
|
34 |
|
35 |
def click_button(model, api_key, abstract):
|
36 |
labels = df['Subsector'].tolist()
|
37 |
+
prompt_context = [build_context(row) for _, row in df.iterrows()]
|
38 |
language_model = MessageChatCompletion(model=model, api_key=api_key)
|
39 |
+
system_message = system_message_template.format(prompt_context=prompt_context)
|
40 |
+
user_message = user_message_template.format(labels=labels, abstract=abstract)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
language_model.new_system_message(content=system_message)
|
42 |
language_model.new_user_message(content=user_message)
|
43 |
language_model.send_message()
|
|
|
59 |
|
60 |
return match_score_dict, response_reasoning, logs_df
|
61 |
|
62 |
+
|
63 |
def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
|
64 |
selected = df.iloc[[evt.index[0]]].iloc[0]
|
65 |
name, definition, keywords, does_include, does_not_include = selected['Subsector'], selected['Definition'], selected['Keywords'], selected['Does include'], selected['Does not include']
|
prompt_template.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
system_message_template = """
|
2 |
+
You are a system designed to classify patent abstracts into one or more subsectors based on their content.
|
3 |
+
Each subsector is defined by a unique set of characteristics:
|
4 |
+
Name: The name of the subsector.
|
5 |
+
Definition: A brief description of the subsector.
|
6 |
+
Keywords: Important words associated with the subsector.
|
7 |
+
Does include: Elements typically found within the subsector.
|
8 |
+
Does not include: Elements typically not found within the subsector.
|
9 |
+
Consider 'nan' values as 'not available' or 'not applicable'.
|
10 |
+
When classifying an abstract, provide the following:
|
11 |
+
## 1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to.
|
12 |
+
## 2. Reasoning:
|
13 |
+
### Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria.
|
14 |
+
### Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract.
|
15 |
+
### Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract.
|
16 |
+
### If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector.
|
17 |
+
## 3. Non-selected Subsectors:
|
18 |
+
- If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion.
|
19 |
+
## 4. Other Subsectors: You MUST ALWAYS SUGGEST NEW SUBSECTOR LABELS, different from the ones provided by the user. They can be new subsectors or subsets the given subsectors. REMEMBER: This is mandatory
|
20 |
+
## 5. Match Score: Inside a markdown code block, provide a PYTHON DICTIONARY containing the match scores for all existing subsector labels and for any new labels suggested in item 4. Each probability should be formatted to show two decimal places.
|
21 |
+
<context>
|
22 |
+
{prompt_context}
|
23 |
+
</context>
|
24 |
+
"""
|
25 |
+
|
26 |
+
user_message_template = """
|
27 |
+
Classify this patent abstract into one or more labels, then format your response as markdown:
|
28 |
+
|
29 |
+
<labels>
|
30 |
+
{labels}
|
31 |
+
</labels>
|
32 |
+
|
33 |
+
<abstract>
|
34 |
+
{abstract}
|
35 |
+
</abstract>
|
36 |
+
"""
|