fadliaulawi
commited on
Commit
•
bd28dd7
1
Parent(s):
28b6169
Differentiate validator LLM
Browse files- app.py +15 -3
- process.py +9 -2
app.py
CHANGED
@@ -21,7 +21,7 @@ st.markdown("<div style='text-align: left; color: white; font-size: 16px'>In its
|
|
21 |
|
22 |
uploaded_files = st.file_uploader("Upload Paper(s) here :", type="pdf", accept_multiple_files=True)
|
23 |
|
24 |
-
col1, col2 = st.columns(
|
25 |
|
26 |
with col1:
|
27 |
models = (
|
@@ -45,6 +45,18 @@ with col2:
|
|
45 |
)
|
46 |
chunk_overlap = 0
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
if uploaded_files:
|
49 |
journals = []
|
50 |
parseButtonHV = st.button("Get Result", key='table_HV')
|
@@ -79,7 +91,7 @@ if uploaded_files:
|
|
79 |
chunks = text_splitter.split_documents(docs)
|
80 |
|
81 |
# Start extraction process in parallel
|
82 |
-
process = Process(model)
|
83 |
with ThreadPoolExecutor() as executor:
|
84 |
result_gsd = executor.submit(process.get_entity, (chunks, 'gsd'))
|
85 |
result_summ = executor.submit(process.get_entity, (chunks, 'summ'))
|
@@ -131,8 +143,8 @@ if uploaded_files:
|
|
131 |
|
132 |
st.dataframe(cleaned_df)
|
133 |
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
|
134 |
-
# cleaned_llm_df.to_excel(writer, sheet_name='Result with LLM')
|
135 |
cleaned_df.to_excel(writer, sheet_name='Result')
|
|
|
136 |
dataframe.to_excel(writer, sheet_name='Original')
|
137 |
writer.close()
|
138 |
|
|
|
21 |
|
22 |
uploaded_files = st.file_uploader("Upload Paper(s) here :", type="pdf", accept_multiple_files=True)
|
23 |
|
24 |
+
col1, col2, col3 = st.columns(3)
|
25 |
|
26 |
with col1:
|
27 |
models = (
|
|
|
45 |
)
|
46 |
chunk_overlap = 0
|
47 |
|
48 |
+
with col3:
|
49 |
+
models_val = (
|
50 |
+
'gpt-4-turbo',
|
51 |
+
'gemini-1.5-pro-latest'
|
52 |
+
# 'llama-3-sonar-large-32k-chat',
|
53 |
+
# 'mixtral-8x7b-instruct',
|
54 |
+
)
|
55 |
+
model_val = st.selectbox(
|
56 |
+
'Model validator selection:', models, key='model_val'
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
if uploaded_files:
|
61 |
journals = []
|
62 |
parseButtonHV = st.button("Get Result", key='table_HV')
|
|
|
91 |
chunks = text_splitter.split_documents(docs)
|
92 |
|
93 |
# Start extraction process in parallel
|
94 |
+
process = Process(model, model_val)
|
95 |
with ThreadPoolExecutor() as executor:
|
96 |
result_gsd = executor.submit(process.get_entity, (chunks, 'gsd'))
|
97 |
result_summ = executor.submit(process.get_entity, (chunks, 'summ'))
|
|
|
143 |
|
144 |
st.dataframe(cleaned_df)
|
145 |
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
|
|
|
146 |
cleaned_df.to_excel(writer, sheet_name='Result')
|
147 |
+
cleaned_llm_df.to_excel(writer, sheet_name='Validate with LLM')
|
148 |
dataframe.to_excel(writer, sheet_name='Original')
|
149 |
writer.close()
|
150 |
|
process.py
CHANGED
@@ -29,7 +29,7 @@ prompts = {
|
|
29 |
|
30 |
class Process():
|
31 |
|
32 |
-
def __init__(self, llm):
|
33 |
|
34 |
if llm.startswith('gpt'):
|
35 |
self.llm = ChatOpenAI(temperature=0, model_name=llm)
|
@@ -38,6 +38,13 @@ class Process():
|
|
38 |
else:
|
39 |
self.llm = ChatOpenAI(temperature=0, model_name=llm, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
def get_entity(self, data):
|
42 |
|
43 |
chunks, types = data
|
@@ -229,7 +236,7 @@ class Process():
|
|
229 |
json_table = df[['Genes', 'SNPs', 'Diseases']].to_json(orient='records')
|
230 |
str_json_table = json.dumps(json.loads(json_table), indent=2)
|
231 |
|
232 |
-
result = self.
|
233 |
print('val')
|
234 |
print(result)
|
235 |
|
|
|
29 |
|
30 |
class Process():
|
31 |
|
32 |
+
def __init__(self, llm, llm_val):
|
33 |
|
34 |
if llm.startswith('gpt'):
|
35 |
self.llm = ChatOpenAI(temperature=0, model_name=llm)
|
|
|
38 |
else:
|
39 |
self.llm = ChatOpenAI(temperature=0, model_name=llm, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
|
40 |
|
41 |
+
if llm_val.startswith('gpt'):
|
42 |
+
self.llm_val = ChatOpenAI(temperature=0, model_name=llm_val)
|
43 |
+
elif llm.startswith('gemini'):
|
44 |
+
self.llm_val = ChatGoogleGenerativeAI(temperature=0, model=llm_val)
|
45 |
+
else:
|
46 |
+
self.llm_val = ChatOpenAI(temperature=0, model_name=llm_val, api_key=os.environ['PERPLEXITY_API_KEY'], base_url="https://api.perplexity.ai")
|
47 |
+
|
48 |
def get_entity(self, data):
|
49 |
|
50 |
chunks, types = data
|
|
|
236 |
json_table = df[['Genes', 'SNPs', 'Diseases']].to_json(orient='records')
|
237 |
str_json_table = json.dumps(json.loads(json_table), indent=2)
|
238 |
|
239 |
+
result = self.llm_val.invoke(input=prompt_validation.format(str_json_table)).content
|
240 |
print('val')
|
241 |
print(result)
|
242 |
|