Update app.py
Browse files
app.py
CHANGED
@@ -33,8 +33,6 @@ import line_cor
|
|
33 |
import altair as alt
|
34 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
35 |
from PIL import Image
|
36 |
-
API_URL = "https://api-inference.huggingface.co/models/gpt2-large"
|
37 |
-
headers = {"Authorization": "Bearer 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"}
|
38 |
API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
|
39 |
headers0 = {"Authorization": "Bearer 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"}
|
40 |
|
@@ -65,11 +63,29 @@ def read_pdf(file):
|
|
65 |
# # Display the extracted text
|
66 |
# #st.text(extracted_text)
|
67 |
# return extracted_text
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
st.title("NLP APPLICATION")
|
71 |
#@st.cache_resource(experimental_allow_widgets=True)
|
72 |
def main():
|
|
|
73 |
#global tokenizer, model
|
74 |
#tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
75 |
#model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
@@ -114,13 +130,24 @@ def main():
|
|
114 |
#ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
|
115 |
# pytesseract image to string to get results
|
116 |
#text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
118 |
#st.success(text)
|
119 |
elif camera_photo:
|
120 |
img = Image.open(camera_photo)
|
121 |
img = img.save("img.png")
|
122 |
img = cv2.imread("img.png")
|
123 |
-
text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
#st.success(text)
|
125 |
elif uploaded_photo==None and camera_photo==None:
|
126 |
#our_image=load_image("image.jpg")
|
@@ -128,6 +155,9 @@ def main():
|
|
128 |
text = message
|
129 |
|
130 |
if st.checkbox("English Text Generation"):
|
|
|
|
|
|
|
131 |
def query(payload):
|
132 |
response = requests.post(API_URL, headers=headers, json=payload)
|
133 |
return response.json()
|
@@ -136,12 +166,15 @@ def main():
|
|
136 |
"inputs": text,
|
137 |
})
|
138 |
st.success(output)
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
if __name__ == '__main__':
|
147 |
main()
|
|
|
33 |
import altair as alt
|
34 |
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
|
35 |
from PIL import Image
|
|
|
|
|
36 |
API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
|
37 |
headers0 = {"Authorization": "Bearer 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"}
|
38 |
|
|
|
63 |
# # Display the extracted text
|
64 |
# #st.text(extracted_text)
|
65 |
# return extracted_text
|
66 |
+
def engsum(output):
|
67 |
+
API_URL1 = "https://api-inference.huggingface.co/models/Michael-Vptn/text-summarization-t5-base"
|
68 |
+
headers1 = {"Authorization": "Bearer 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"}
|
69 |
+
|
70 |
+
def query(payload):
|
71 |
+
response = requests.post(API_URL1, headers=headers1, json=payload)
|
72 |
+
return response.json()
|
73 |
+
|
74 |
+
output = query({
|
75 |
+
"inputs": output,
|
76 |
+
})
|
77 |
+
st.success(output)
|
78 |
+
def bansum(text):
|
79 |
+
def query(payload):
|
80 |
+
response = requests.post(API_URL0, headers=headers0, json=payload)
|
81 |
+
return response.json()
|
82 |
+
output = query({"inputs": text})
|
83 |
+
st.success(output)
|
84 |
|
85 |
st.title("NLP APPLICATION")
|
86 |
#@st.cache_resource(experimental_allow_widgets=True)
|
87 |
def main():
|
88 |
+
b=0
|
89 |
#global tokenizer, model
|
90 |
#tokenizer = AutoTokenizer.from_pretrained('t5-base')
|
91 |
#model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
|
|
|
130 |
#ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
|
131 |
# pytesseract image to string to get results
|
132 |
#text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
|
133 |
+
if st.checkbox("Bangla"):
|
134 |
+
b=1
|
135 |
+
text = pytesseract.image_to_string(img, lang="ben")
|
136 |
+
else:
|
137 |
+
b=0
|
138 |
+
text=pytesseract.image_to_string(img)
|
139 |
#st.success(text)
|
140 |
elif camera_photo:
|
141 |
img = Image.open(camera_photo)
|
142 |
img = img.save("img.png")
|
143 |
img = cv2.imread("img.png")
|
144 |
+
#text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
|
145 |
+
if st.checkbox("Bangla"):
|
146 |
+
b=1
|
147 |
+
text = pytesseract.image_to_string(img, lang="ben")
|
148 |
+
else:
|
149 |
+
b=0
|
150 |
+
text=pytesseract.image_to_string(img)
|
151 |
#st.success(text)
|
152 |
elif uploaded_photo==None and camera_photo==None:
|
153 |
#our_image=load_image("image.jpg")
|
|
|
155 |
text = message
|
156 |
|
157 |
if st.checkbox("English Text Generation"):
|
158 |
+
API_URL = "https://api-inference.huggingface.co/models/gpt2"
|
159 |
+
headers = {"Authorization": "Bearer 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"}
|
160 |
+
|
161 |
def query(payload):
|
162 |
response = requests.post(API_URL, headers=headers, json=payload)
|
163 |
return response.json()
|
|
|
166 |
"inputs": text,
|
167 |
})
|
168 |
st.success(output)
|
169 |
+
if st.checkbox("Summarize generated text"):
|
170 |
+
engsum(output)
|
171 |
+
|
172 |
+
|
173 |
+
if st.checkbox("Mark for Text Summarization"):
|
174 |
+
if b==1:
|
175 |
+
bansum(text)
|
176 |
+
else:
|
177 |
+
engsum(text)
|
178 |
+
#END
|
179 |
if __name__ == '__main__':
|
180 |
main()
|