Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ st.image(url)
|
|
22 |
|
23 |
st.markdown('_Welecome to Question Answering System 🧠 🤖_')
|
24 |
|
25 |
-
a = st.sidebar.radio("SELECT -", ['
|
26 |
|
27 |
## webscrap function
|
28 |
def my_web():
|
@@ -53,9 +53,7 @@ def my_web():
|
|
53 |
st.write(total_lines[j])
|
54 |
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
if a == 'PDF' :
|
59 |
uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
|
60 |
type = ['pdf', 'docx' , 'txt'] )
|
61 |
|
@@ -64,53 +62,28 @@ if a == 'PDF' :
|
|
64 |
quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
|
65 |
st.write('Your query is - ', quer)
|
66 |
|
67 |
-
if st.button("
|
68 |
-
|
69 |
-
for
|
70 |
-
if
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
st.write(raw_text)
|
77 |
-
|
78 |
-
elif uploaded_file.type == "application/pdf" :
|
79 |
-
reader = PdfReader(uploaded_file)
|
80 |
-
text = ""
|
81 |
-
for page in reader.pages:
|
82 |
-
text += page.extract_text() + "\n"
|
83 |
-
#st.write(text)
|
84 |
-
|
85 |
-
data_lines = tokenize.sent_tokenize(text)
|
86 |
-
#st.write(data_lines)
|
87 |
-
|
88 |
-
seq = embeddings.similarity(quer, data_lines)
|
89 |
-
three_most = seq[0:3]
|
90 |
-
indexes = []
|
91 |
-
for i in three_most:
|
92 |
-
indexes.append(i[0])
|
93 |
-
for j in indexes:
|
94 |
-
st.write(data_lines[j])
|
95 |
-
|
96 |
|
97 |
-
|
98 |
-
|
99 |
-
#total_lines += i
|
100 |
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
#try:
|
104 |
-
#with pdfplumber.open(uploaded_file) as pdf:
|
105 |
-
#pages = pdf.pages[0]
|
106 |
-
#st.write(pages.extract_text())
|
107 |
-
#except:
|
108 |
-
#st.write("None")
|
109 |
|
110 |
-
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" :
|
111 |
-
raw_text = docx2txt.process(uploaded_file)
|
112 |
-
st.write(raw_text)
|
113 |
-
|
114 |
## web
|
115 |
else:
|
116 |
number = st.number_input('Insert a number of Links -',value =1, step =1)
|
|
|
22 |
|
23 |
st.markdown('_Welecome to Question Answering System 🧠 🤖_')
|
24 |
|
25 |
+
a = st.sidebar.radio("SELECT -", ['File Upload', 'Website'])
|
26 |
|
27 |
## webscrap function
|
28 |
def my_web():
|
|
|
53 |
st.write(total_lines[j])
|
54 |
|
55 |
|
56 |
+
if a == 'File Upload' :
|
|
|
|
|
57 |
uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
|
58 |
type = ['pdf', 'docx' , 'txt'] )
|
59 |
|
|
|
62 |
quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
|
63 |
st.write('Your query is - ', quer)
|
64 |
|
65 |
+
if st.button("Confirm!"):
|
66 |
+
text_raw = ""
|
67 |
+
for i in uploaded_files:
|
68 |
+
if i.type == "application/pdf" :
|
69 |
+
reader = PdfReader(i)
|
70 |
+
# print(reader.numPages)
|
71 |
+
pageObj = reader.getPage(0)
|
72 |
+
# print(pageObj.extractText())
|
73 |
+
text_raw += pageObj.extract_text() + "\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
all_tokens = tokenize.sent_tokenize(text_raw)
|
76 |
+
seq = embeddings.similarity(quer, all_tokens)
|
|
|
77 |
|
78 |
+
three_most = seq[0:3]
|
79 |
+
indexes = []
|
80 |
+
for i in three_most:
|
81 |
+
indexes.append(i[0])
|
82 |
+
# print(indexes)
|
83 |
+
for j in indexes:
|
84 |
+
st.write(all_tokens[j])
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
|
|
|
|
|
|
|
|
87 |
## web
|
88 |
else:
|
89 |
number = st.number_input('Insert a number of Links -',value =1, step =1)
|