Spaces:

KalbeDigitalLab
/

nutrigenme-paper-extractor

Running

fadliaulawi commited on May 13

Commit

bd28dd7

•

1 Parent(s): 28b6169

Differentiate validator LLM

Files changed (2) hide show

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ st.markdown("<div style='text-align: left; color: white; font-size: 16px'>In its
 uploaded_files = st.file_uploader("Upload Paper(s) here :", type="pdf", accept_multiple_files=True)
-col1, col2 = st.columns(2)
 with col1:
     models = (
@@ -45,6 +45,18 @@ with col2:
     )
     chunk_overlap = 0
 if uploaded_files:
     journals = []
     parseButtonHV = st.button("Get Result", key='table_HV')
@@ -79,7 +91,7 @@ if uploaded_files:
                     chunks = text_splitter.split_documents(docs)
                     # Start extraction process in parallel
-                    process = Process(model)
                     with ThreadPoolExecutor() as executor:
                         result_gsd = executor.submit(process.get_entity, (chunks, 'gsd'))
                         result_summ = executor.submit(process.get_entity, (chunks, 'summ'))
@@ -131,8 +143,8 @@ if uploaded_files:
                     st.dataframe(cleaned_df)
                     with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
-                        # cleaned_llm_df.to_excel(writer, sheet_name='Result with LLM')
                         cleaned_df.to_excel(writer, sheet_name='Result')
                         dataframe.to_excel(writer, sheet_name='Original')
                         writer.close()

 uploaded_files = st.file_uploader("Upload Paper(s) here :", type="pdf", accept_multiple_files=True)
+col1, col2, col3 = st.columns(3)
 with col1:
     models = (
     )
     chunk_overlap = 0
+with col3:
+    models_val = (
+        'gpt-4-turbo',
+        'gemini-1.5-pro-latest'
+        # 'llama-3-sonar-large-32k-chat',
+        # 'mixtral-8x7b-instruct',
+    )
+    model_val = st.selectbox(
+        'Model validator selection:', models, key='model_val'
+    )
 if uploaded_files:
     journals = []
     parseButtonHV = st.button("Get Result", key='table_HV')
                     chunks = text_splitter.split_documents(docs)
                     # Start extraction process in parallel
+                    process = Process(model, model_val)
                     with ThreadPoolExecutor() as executor:
                         result_gsd = executor.submit(process.get_entity, (chunks, 'gsd'))
                         result_summ = executor.submit(process.get_entity, (chunks, 'summ'))
                     st.dataframe(cleaned_df)
                     with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
                         cleaned_df.to_excel(writer, sheet_name='Result')
+                        cleaned_llm_df.to_excel(writer, sheet_name='Validate with LLM')
                         dataframe.to_excel(writer, sheet_name='Original')
                         writer.close()

process.py CHANGED Viewed

@@ -29,7 +29,7 @@ prompts = {
 class Process():
-    def __init__(self, llm):
         if llm.startswith('gpt'):
             self.llm = ChatOpenAI(temperature=0, model_name=llm)
@@ -38,6 +38,13 @@ class Process():
         else:
             self.llm = ChatOpenAI(temperature=0, model_name=llm, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
     def get_entity(self, data):
         chunks, types = data
@@ -229,7 +236,7 @@ class Process():
         json_table = df[['Genes', 'SNPs', 'Diseases']].to_json(orient='records')
         str_json_table = json.dumps(json.loads(json_table), indent=2)
-        result = self.llm.invoke(input=prompt_validation.format(str_json_table)).content
         print('val')
         print(result)

 class Process():
+    def __init__(self, llm, llm_val):
         if llm.startswith('gpt'):
             self.llm = ChatOpenAI(temperature=0, model_name=llm)
         else:
             self.llm = ChatOpenAI(temperature=0, model_name=llm, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
+        if llm_val.startswith('gpt'):
+            self.llm_val = ChatOpenAI(temperature=0, model_name=llm_val)
+        elif llm.startswith('gemini'):
+            self.llm_val = ChatGoogleGenerativeAI(temperature=0, model=llm_val)
+        else:
+            self.llm_val = ChatOpenAI(temperature=0, model_name=llm_val, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
     def get_entity(self, data):
         chunks, types = data
         json_table = df[['Genes', 'SNPs', 'Diseases']].to_json(orient='records')
         str_json_table = json.dumps(json.loads(json_table), indent=2)
+        result = self.llm_val.invoke(input=prompt_validation.format(str_json_table)).content
         print('val')
         print(result)