tseronni commited on
Commit
6b0dc83
·
1 Parent(s): 09e191e

first commit

Browse files
Files changed (3) hide show
  1. app.py +41 -29
  2. chatgpt.py +42 -25
  3. logo_genome.png +0 -0
app.py CHANGED
@@ -1,7 +1,7 @@
 
 
1
  import gradio as gr
2
  import pandas as pd
3
- from transformers import pipeline
4
- from openai import OpenAI
5
 
6
  from chatgpt import MessageChatCompletion
7
 
@@ -32,29 +32,34 @@ def build_context(row):
32
  return context
33
 
34
 
35
- def click_button(model, abstract):
36
 
37
- classifier = pipeline("zero-shot-classification", model="sileod/deberta-v3-base-tasksource-nli")
38
  labels = df['Subsector'].tolist()
39
- result = classifier(abstract, labels, multi_label=True)
40
- # best_x_labels = [label for label in result["labels"]][0:5]
41
- # df_best = df[df.Subsector.isin(best_x_labels)]
42
- # contexts = [build_context(row) for _, row in df_best.iterrows()]
43
-
44
  contexts = [build_context(row) for _, row in df.iterrows()]
45
-
46
- my_chatgpt = MessageChatCompletion(model='gpt-4')
47
-
48
- system_message = ('You are a system that will receive an patent abstract and needs to classify in one or more patent subsectors.'
49
- 'You need to consider that each subsector has an name, definition, keywords, Does include and Does not included.'
50
- 'Definition describe the subsector. '
51
- 'The Keywords are important words for that subsector. '
52
- 'Does include are words that can be included.'
53
- 'Does not include are words that can not be in the patent abstract that is been classifying.'
54
- 'nan will be consider as empty.'
55
- 'Your answer will be subsector: the subsector result name and reasoning: The conclusion why you classify in that subsector specifying if has keywords and does include.'
56
- 'Folow the subsectors:'
57
- f'{contexts}')
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  user_message = f'Classify this patent abstract: {abstract}'
60
 
@@ -64,8 +69,15 @@ def click_button(model, abstract):
64
 
65
  reasoning = my_chatgpt.get_last_message()
66
 
67
- return {label: round(prob, 4) for label, prob in zip(result["labels"], result["scores"])}, reasoning
 
 
 
 
 
 
68
 
 
69
 
70
 
71
  def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
@@ -82,7 +94,10 @@ with gr.Blocks() as startup_genome_demo:
82
 
83
  with gr.Tab("Patent Discovery"):
84
  with gr.Row():
85
- dropdown_model = gr.Dropdown(label="Model", choices=['model_1', 'model_2', 'model_3'], interactive=True)
 
 
 
86
  with gr.Row(equal_height=True):
87
  abstract_description = gr.Textbox(label="Abstract description", lines=10, max_lines=10000, interactive=True, value="A holographic optical accessing system includes a light source for emitting a light beam; an optical assembly module for receiving the light beam and generating a signal beam and a reference beam that are parallel to each other rather than overlap with each other, and have the same first polarization state; a lens module for focusing the signal beam and the reference beam on a focal point at the same time; and a storage medium for recording the focal point. The optical assembly module includes at least a data plane for displaying image information so that the signal beam contains the image information.")
88
  with gr.Row():
@@ -91,7 +106,7 @@ with gr.Blocks() as startup_genome_demo:
91
  with gr.Column(scale=4):
92
  label_result = gr.Label(num_top_classes=None)
93
  with gr.Column(scale=6):
94
- reasoning = gr.Textbox(label="Reasoning", lines=5)
95
  with gr.Tab("Sector definitions"):
96
  with gr.Row():
97
  with gr.Column(scale=4):
@@ -104,10 +119,7 @@ with gr.Blocks() as startup_genome_demo:
104
  does_include = gr.Textbox(label="Does include", lines=4)
105
  does_not_include = gr.Textbox(label="Does not include", lines=3)
106
 
107
- with gr.Tab("Logs"):
108
- pass
109
-
110
- btn_get_result.click(fn=click_button, inputs=[dropdown_model, abstract_description], outputs=[label_result, reasoning])
111
  df_subsectors.select(fn=on_select, outputs=[subsector_name, s1_definition, s1_keywords, does_include, does_not_include])
112
 
113
  if __name__ == "__main__":
 
1
+ import re
2
+
3
  import gradio as gr
4
  import pandas as pd
 
 
5
 
6
  from chatgpt import MessageChatCompletion
7
 
 
32
  return context
33
 
34
 
35
+ def click_button(model, api_key, abstract):
36
 
 
37
  labels = df['Subsector'].tolist()
 
 
 
 
 
38
  contexts = [build_context(row) for _, row in df.iterrows()]
39
+ my_chatgpt = MessageChatCompletion(model=model, api_key=api_key)
40
+
41
+ system_message = (
42
+ "You are a system designed to classify patent abstracts into one or more subsectors based on their content. "
43
+ "Each subsector is defined by a unique set of characteristics: "
44
+ "\n- Name: The name of the subsector."
45
+ "\n- Definition: A brief description of the subsector."
46
+ "\n- Keywords: Important words associated with the subsector."
47
+ "\n- Does include: Elements typically found within the subsector."
48
+ "\n- Does not include: Elements typically not found within the subsector."
49
+ "\nConsider 'nan' values as 'not available' or 'not applicable'. "
50
+ "When classifying an abstract, provide the following: "
51
+ "\n1. Subsector(s): Name(s) of the subsector(s) you believe the abstract belongs to."
52
+ "\n2. Reasoning: "
53
+ "\n\t- Conclusion: Explain why the abstract was classified in this subsector(s), based on its alignment with the subsector's definition, keywords, and includes/excludes criteria."
54
+ "\n\t- Keywords found: Specify any 'Keywords' from the subsector that are present in the abstract."
55
+ "\n\t- Does include found: Specify any 'Includes' criteria from the subsector that are present in the abstract."
56
+ "\n\t- If no specific 'Keywords' or 'Includes' are found, state that none were directly identified, but the classification was made based on the overall relevance to the subsector."
57
+ "\n3. Non-selected Subsectors: "
58
+ "\n\t- If a subsector had a high probability of being a match but was ultimately not chosen because the abstract contained terms from the 'Does not include' list, provide a brief explanation. Highlight the specific 'Does not include' terms found and why this led to the subsector's exclusion."
59
+ f"\n4. Probability: Provide a dictionary containing the subsectors ({labels}) and their corresponding probabilities of being a match. Each probability should be formatted to show two decimal places."
60
+ "\nYour task is to classify the following patent abstract into the appropriate subsector(s), taking into account the details of each subsector as described above. Here are the subsectors and their definitions for your reference:\n"
61
+ f"{contexts}"
62
+ )
63
 
64
  user_message = f'Classify this patent abstract: {abstract}'
65
 
 
69
 
70
  reasoning = my_chatgpt.get_last_message()
71
 
72
+ dict_pattern = r'\{.*?\}'
73
+ probabilities_match = re.search(dict_pattern, reasoning, re.DOTALL)
74
+
75
+ if probabilities_match and my_chatgpt.error is False:
76
+ probabilities_dict = eval(probabilities_match.group(0))
77
+ else:
78
+ probabilities_dict = {}
79
 
80
+ return probabilities_dict, reasoning
81
 
82
 
83
  def on_select(evt: gr.SelectData): # SelectData is a subclass of EventData
 
94
 
95
  with gr.Tab("Patent Discovery"):
96
  with gr.Row():
97
+ with gr.Column(scale=5):
98
+ dropdown_model = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-4-turbo-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-0125"], value="gpt-3.5-turbo", multiselect=False, interactive=True)
99
+ with gr.Column(scale=5):
100
+ api_key = gr.Textbox(label="API KEY", interactive=True, lines=1, max_lines=1)
101
  with gr.Row(equal_height=True):
102
  abstract_description = gr.Textbox(label="Abstract description", lines=10, max_lines=10000, interactive=True, value="A holographic optical accessing system includes a light source for emitting a light beam; an optical assembly module for receiving the light beam and generating a signal beam and a reference beam that are parallel to each other rather than overlap with each other, and have the same first polarization state; a lens module for focusing the signal beam and the reference beam on a focal point at the same time; and a storage medium for recording the focal point. The optical assembly module includes at least a data plane for displaying image information so that the signal beam contains the image information.")
103
  with gr.Row():
 
106
  with gr.Column(scale=4):
107
  label_result = gr.Label(num_top_classes=None)
108
  with gr.Column(scale=6):
109
+ reasoning = gr.Textbox(label="Reasoning", lines=34)
110
  with gr.Tab("Sector definitions"):
111
  with gr.Row():
112
  with gr.Column(scale=4):
 
119
  does_include = gr.Textbox(label="Does include", lines=4)
120
  does_not_include = gr.Textbox(label="Does not include", lines=3)
121
 
122
+ btn_get_result.click(fn=click_button, inputs=[dropdown_model, api_key, abstract_description], outputs=[label_result, reasoning])
 
 
 
123
  df_subsectors.select(fn=on_select, outputs=[subsector_name, s1_definition, s1_keywords, does_include, does_not_include])
124
 
125
  if __name__ == "__main__":
chatgpt.py CHANGED
@@ -11,23 +11,23 @@ class MessageChatCompletion:
11
  n: int = 1,
12
  stream: bool = False,
13
  stop: str = "\n",
14
- max_tokens: int = 256,
15
  presence_penalty: float = 0.0,
16
  frequency_penalty: float = 0.0,
17
  logit_bias: int = None,
18
  user: str = ''):
19
 
20
- openai.api_key = ''
21
- openai.organization = ""
22
 
23
- if model in ["gpt-4", "gpt-4-turbo-preview", "gpt-3.5-turbo"]:
24
  self.endpoint = "https://api.openai.com/v1/chat/completions"
25
  else:
26
  self.endpoint = "https://api.openai.com/v1/completions"
27
 
28
  self.headers = {
29
  "Content-Type": "application/json",
30
- "Authorization": f"Bearer {api_key}",
31
  }
32
 
33
  self.prompt = {
@@ -56,6 +56,8 @@ class MessageChatCompletion:
56
 
57
  self.response = ''
58
 
 
 
59
  def new_message(self, role: str = 'user', content: str = '', name: str = ''):
60
  new_message = {"role": role, "content": f"{content}"}
61
  if name != '':
@@ -77,23 +79,38 @@ class MessageChatCompletion:
77
 
78
  def send_message(self):
79
 
80
- response = openai.chat.completions.create(
81
- model=self.prompt['model'],
82
- messages=self.prompt['messages'],
83
- frequency_penalty=self.prompt['frequency_penalty'],
84
- temperature=self.prompt['temperature'],
85
- max_tokens=self.prompt['max_tokens'],
86
- top_p=self.prompt['top_p'],
87
- presence_penalty=self.prompt['presence_penalty'],
88
- stream=True
89
- )
90
-
91
- full_response = ""
92
- for chunk in response:
93
- chunk_message = chunk.choices[0].delta.content
94
- if chunk_message is not None:
95
- full_response += chunk_message
96
-
97
- self.new_system_message(content=full_response)
98
-
99
- return self.response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  n: int = 1,
12
  stream: bool = False,
13
  stop: str = "\n",
14
+ max_tokens: int = 4096,
15
  presence_penalty: float = 0.0,
16
  frequency_penalty: float = 0.0,
17
  logit_bias: int = None,
18
  user: str = ''):
19
 
20
+ self.api_key = api_key
21
+ openai.api_key = self.api_key
22
 
23
+ if model in ["gpt-4", "gpt-4-turbo-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-0125"]:
24
  self.endpoint = "https://api.openai.com/v1/chat/completions"
25
  else:
26
  self.endpoint = "https://api.openai.com/v1/completions"
27
 
28
  self.headers = {
29
  "Content-Type": "application/json",
30
+ "Authorization": f"Bearer {self.api_key}",
31
  }
32
 
33
  self.prompt = {
 
56
 
57
  self.response = ''
58
 
59
+ self.error = False
60
+
61
  def new_message(self, role: str = 'user', content: str = '', name: str = ''):
62
  new_message = {"role": role, "content": f"{content}"}
63
  if name != '':
 
79
 
80
  def send_message(self):
81
 
82
+ try:
83
+ self.error = False
84
+
85
+ response = openai.chat.completions.create(
86
+ model=self.prompt['model'],
87
+ messages=self.prompt['messages'],
88
+ frequency_penalty=self.prompt['frequency_penalty'],
89
+ temperature=self.prompt['temperature'],
90
+ max_tokens=self.prompt['max_tokens'],
91
+ top_p=self.prompt['top_p'],
92
+ presence_penalty=self.prompt['presence_penalty'],
93
+ stream=self.prompt['stream']
94
+ )
95
+
96
+ full_response = response.choices[0].message.content
97
+
98
+ # if stream = True
99
+ # full_response = ""
100
+ # for chunk in response:
101
+ # chunk_message = chunk.choices[0].delta.content
102
+ # if chunk_message != '':
103
+ # full_response += chunk_message
104
+
105
+ self.new_system_message(content=full_response)
106
+
107
+ return self.response
108
+
109
+ except Exception as e:
110
+ self.error = True
111
+
112
+ if self.api_key == '' or self.api_key is None:
113
+ self.new_system_message(content="API key is missing")
114
+ else:
115
+ self.new_system_message(content=f"Unable to generate ChatCompletion response\nException: {e}")
116
+ return e
logo_genome.png ADDED