fadliaulawi commited on
Commit
bd28dd7
1 Parent(s): 28b6169

Differentiate validator LLM

Browse files
Files changed (2) hide show
  1. app.py +15 -3
  2. process.py +9 -2
app.py CHANGED
@@ -21,7 +21,7 @@ st.markdown("<div style='text-align: left; color: white; font-size: 16px'>In its
21
 
22
  uploaded_files = st.file_uploader("Upload Paper(s) here :", type="pdf", accept_multiple_files=True)
23
 
24
- col1, col2 = st.columns(2)
25
 
26
  with col1:
27
  models = (
@@ -45,6 +45,18 @@ with col2:
45
  )
46
  chunk_overlap = 0
47
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  if uploaded_files:
49
  journals = []
50
  parseButtonHV = st.button("Get Result", key='table_HV')
@@ -79,7 +91,7 @@ if uploaded_files:
79
  chunks = text_splitter.split_documents(docs)
80
 
81
  # Start extraction process in parallel
82
- process = Process(model)
83
  with ThreadPoolExecutor() as executor:
84
  result_gsd = executor.submit(process.get_entity, (chunks, 'gsd'))
85
  result_summ = executor.submit(process.get_entity, (chunks, 'summ'))
@@ -131,8 +143,8 @@ if uploaded_files:
131
 
132
  st.dataframe(cleaned_df)
133
  with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
134
- # cleaned_llm_df.to_excel(writer, sheet_name='Result with LLM')
135
  cleaned_df.to_excel(writer, sheet_name='Result')
 
136
  dataframe.to_excel(writer, sheet_name='Original')
137
  writer.close()
138
 
 
21
 
22
  uploaded_files = st.file_uploader("Upload Paper(s) here :", type="pdf", accept_multiple_files=True)
23
 
24
+ col1, col2, col3 = st.columns(3)
25
 
26
  with col1:
27
  models = (
 
45
  )
46
  chunk_overlap = 0
47
 
48
+ with col3:
49
+ models_val = (
50
+ 'gpt-4-turbo',
51
+ 'gemini-1.5-pro-latest'
52
+ # 'llama-3-sonar-large-32k-chat',
53
+ # 'mixtral-8x7b-instruct',
54
+ )
55
+ model_val = st.selectbox(
56
+ 'Model validator selection:', models, key='model_val'
57
+ )
58
+
59
+
60
  if uploaded_files:
61
  journals = []
62
  parseButtonHV = st.button("Get Result", key='table_HV')
 
91
  chunks = text_splitter.split_documents(docs)
92
 
93
  # Start extraction process in parallel
94
+ process = Process(model, model_val)
95
  with ThreadPoolExecutor() as executor:
96
  result_gsd = executor.submit(process.get_entity, (chunks, 'gsd'))
97
  result_summ = executor.submit(process.get_entity, (chunks, 'summ'))
 
143
 
144
  st.dataframe(cleaned_df)
145
  with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
 
146
  cleaned_df.to_excel(writer, sheet_name='Result')
147
+ cleaned_llm_df.to_excel(writer, sheet_name='Validate with LLM')
148
  dataframe.to_excel(writer, sheet_name='Original')
149
  writer.close()
150
 
process.py CHANGED
@@ -29,7 +29,7 @@ prompts = {
29
 
30
  class Process():
31
 
32
- def __init__(self, llm):
33
 
34
  if llm.startswith('gpt'):
35
  self.llm = ChatOpenAI(temperature=0, model_name=llm)
@@ -38,6 +38,13 @@ class Process():
38
  else:
39
  self.llm = ChatOpenAI(temperature=0, model_name=llm, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
40
 
 
 
 
 
 
 
 
41
  def get_entity(self, data):
42
 
43
  chunks, types = data
@@ -229,7 +236,7 @@ class Process():
229
  json_table = df[['Genes', 'SNPs', 'Diseases']].to_json(orient='records')
230
  str_json_table = json.dumps(json.loads(json_table), indent=2)
231
 
232
- result = self.llm.invoke(input=prompt_validation.format(str_json_table)).content
233
  print('val')
234
  print(result)
235
 
 
29
 
30
  class Process():
31
 
32
+ def __init__(self, llm, llm_val):
33
 
34
  if llm.startswith('gpt'):
35
  self.llm = ChatOpenAI(temperature=0, model_name=llm)
 
38
  else:
39
  self.llm = ChatOpenAI(temperature=0, model_name=llm, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
40
 
41
+ if llm_val.startswith('gpt'):
42
+ self.llm_val = ChatOpenAI(temperature=0, model_name=llm_val)
43
+ elif llm.startswith('gemini'):
44
+ self.llm_val = ChatGoogleGenerativeAI(temperature=0, model=llm_val)
45
+ else:
46
+ self.llm_val = ChatOpenAI(temperature=0, model_name=llm_val, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
47
+
48
  def get_entity(self, data):
49
 
50
  chunks, types = data
 
236
  json_table = df[['Genes', 'SNPs', 'Diseases']].to_json(orient='records')
237
  str_json_table = json.dumps(json.loads(json_table), indent=2)
238
 
239
+ result = self.llm_val.invoke(input=prompt_validation.format(str_json_table)).content
240
  print('val')
241
  print(result)
242