regraded01 commited on
Commit
d3ce4e0
·
1 Parent(s): 5aaa965

feat: include a pdf reader

Browse files
Files changed (1) hide show
  1. app_langchain.py +18 -6
app_langchain.py CHANGED
@@ -2,10 +2,19 @@ from langchain_core.prompts import ChatPromptTemplate
2
  from langchain_core.runnables import RunnablePassthrough
3
 
4
  import streamlit as st
 
5
 
6
  from src.utils import load_config_values
7
  from src.dev_llm import FakeLLM
8
 
 
 
 
 
 
 
 
 
9
  # TODO: Change this to reflect prod model rather than dev models
10
  # Initalise fake values and a fake LLM to test out the full pipeline
11
  tmp_llm = FakeLLM()
@@ -23,20 +32,23 @@ system_message, context_message, model_id = load_config_values(
23
  prompt = ChatPromptTemplate.from_template(
24
  template=context_message,
25
  )
 
 
 
 
 
 
26
 
27
  chain = (
28
  {
29
  "system_message": lambda x: system_message,
30
- "pdf_text": lambda x: tmp_pdf_text,
31
  "data_to_extract": RunnablePassthrough()
32
  }
33
  |prompt
34
  |tmp_llm
35
  )
36
 
37
- st.file_uploader(
38
- label="Upload PDF:\n",
39
- type=".pdf"
40
- )
41
 
42
- st.text(chain.invoke("{\"appointment_date\"}"))
 
2
  from langchain_core.runnables import RunnablePassthrough
3
 
4
  import streamlit as st
5
+ import fitz
6
 
7
  from src.utils import load_config_values
8
  from src.dev_llm import FakeLLM
9
 
10
+ def extract_text_from_pdf(pdf_file):
11
+ document = fitz.open(stream=pdf_file.read(), filetype="pdf")
12
+ text = ""
13
+ for page_num in range(len(document)):
14
+ page = document.load_page(page_num)
15
+ text += page.get_text()
16
+ return text
17
+
18
  # TODO: Change this to reflect prod model rather than dev models
19
  # Initalise fake values and a fake LLM to test out the full pipeline
20
  tmp_llm = FakeLLM()
 
32
  prompt = ChatPromptTemplate.from_template(
33
  template=context_message,
34
  )
35
+ uploaded_doc = st.file_uploader(
36
+ label="Upload PDF:\n",
37
+ type=".pdf"
38
+ )
39
+ if uploaded_doc is not None:
40
+ uploaded_text = extract_text_from_pdf(uploaded_doc)
41
 
42
  chain = (
43
  {
44
  "system_message": lambda x: system_message,
45
+ "pdf_text": lambda x: uploaded_text,
46
  "data_to_extract": RunnablePassthrough()
47
  }
48
  |prompt
49
  |tmp_llm
50
  )
51
 
52
+ user_input_1 = st.text_input(label="input")
 
 
 
53
 
54
+ st.text(chain.invoke(user_input_1))