rushi29 commited on
Commit
6e53c46
·
1 Parent(s): e669c16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -47
app.py CHANGED
@@ -22,7 +22,7 @@ st.image(url)
22
 
23
  st.markdown('_Welecome to Question Answering System 🧠 🤖_')
24
 
25
- a = st.sidebar.radio("SELECT -", ['PDF', 'Website'])
26
 
27
  ## webscrap function
28
  def my_web():
@@ -53,9 +53,7 @@ def my_web():
53
  st.write(total_lines[j])
54
 
55
 
56
-
57
-
58
- if a == 'PDF' :
59
  uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
60
  type = ['pdf', 'docx' , 'txt'] )
61
 
@@ -64,53 +62,28 @@ if a == 'PDF' :
64
  quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
65
  st.write('Your query is - ', quer)
66
 
67
- if st.button("Process"):
68
-
69
- for uploaded_file in uploaded_files:
70
- if uploaded_file is not None:
71
- file_details = {"Filename":uploaded_file.name,"FileType":uploaded_file.type,"FileSize":uploaded_file.size}
72
- #st.write(file_details)
73
-
74
- if uploaded_file.type == "text/plain":
75
- raw_text = str(uploaded_file.read(),"utf-8")
76
- st.write(raw_text)
77
-
78
- elif uploaded_file.type == "application/pdf" :
79
- reader = PdfReader(uploaded_file)
80
- text = ""
81
- for page in reader.pages:
82
- text += page.extract_text() + "\n"
83
- #st.write(text)
84
-
85
- data_lines = tokenize.sent_tokenize(text)
86
- #st.write(data_lines)
87
-
88
- seq = embeddings.similarity(quer, data_lines)
89
- three_most = seq[0:3]
90
- indexes = []
91
- for i in three_most:
92
- indexes.append(i[0])
93
- for j in indexes:
94
- st.write(data_lines[j])
95
-
96
 
97
- #total_lines = []
98
- #for i in data_lines:
99
- #total_lines += i
100
 
101
- #st.write(data_lines)
 
 
 
 
 
 
102
 
103
- #try:
104
- #with pdfplumber.open(uploaded_file) as pdf:
105
- #pages = pdf.pages[0]
106
- #st.write(pages.extract_text())
107
- #except:
108
- #st.write("None")
109
 
110
- elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" :
111
- raw_text = docx2txt.process(uploaded_file)
112
- st.write(raw_text)
113
-
114
  ## web
115
  else:
116
  number = st.number_input('Insert a number of Links -',value =1, step =1)
 
22
 
23
  st.markdown('_Welecome to Question Answering System 🧠 🤖_')
24
 
25
+ a = st.sidebar.radio("SELECT -", ['File Upload', 'Website'])
26
 
27
  ## webscrap function
28
  def my_web():
 
53
  st.write(total_lines[j])
54
 
55
 
56
+ if a == 'File Upload' :
 
 
57
  uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
58
  type = ['pdf', 'docx' , 'txt'] )
59
 
 
62
  quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
63
  st.write('Your query is - ', quer)
64
 
65
+ if st.button("Confirm!"):
66
+ text_raw = ""
67
+ for i in uploaded_files:
68
+ if i.type == "application/pdf" :
69
+ reader = PdfReader(i)
70
+ # print(reader.numPages)
71
+ pageObj = reader.getPage(0)
72
+ # print(pageObj.extractText())
73
+ text_raw += pageObj.extract_text() + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ all_tokens = tokenize.sent_tokenize(text_raw)
76
+ seq = embeddings.similarity(quer, all_tokens)
 
77
 
78
+ three_most = seq[0:3]
79
+ indexes = []
80
+ for i in three_most:
81
+ indexes.append(i[0])
82
+ # print(indexes)
83
+ for j in indexes:
84
+ st.write(all_tokens[j])
85
 
 
 
 
 
 
 
86
 
 
 
 
 
87
  ## web
88
  else:
89
  number = st.number_input('Insert a number of Links -',value =1, step =1)