Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Running

App Files Files Community

Soumen commited on Sep 14, 2023

Commit

f1ebc19

1 Parent(s): 0a75d54

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -65

app.py CHANGED Viewed

@@ -85,7 +85,8 @@ def bansum(text):
 if "photo" not in st.session_state:
     st.session_state["photo"]="not done"
-c2, c3 = st.columns([2,1])
 def change_photo_state():
     st.session_state["photo"]="done"
 @st.cache
@@ -93,71 +94,108 @@ def save(l):
     return l
 #@st.cache
 def main():
-    message = st.text_input("Type your text here!")
-    camera_photo = c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state)
-    uploaded_photo = save(c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
-    if st.session_state["photo"]=="done" or message:
-        if uploaded_photo and uploaded_photo.type=='application/pdf':
-            tet = read_pdf(uploaded_photo)
-            # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-            #     temp_file.write(uploaded_photo.read())
-            #     temp_file_path = temp_file.name
-            # loader = PyPDFLoader(temp_file_path)
-            # if loader:
-            #     text.extend(loader.load())
-            #     os.remove(temp_file_path)
-            # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
-            # text_chunks = text_splitter.split_documents(text)
-            values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
-            text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
-            #st.success(type(text_chunks))
-            if st.button("English Pdf Summarize"):
-                st.subheader("Selected text for summarize: ")
-                st.success(text)
-                st.subheader("Summarized Text: ")
-                engsum(text)
-        elif uploaded_photo and uploaded_photo.type !='application/pdf':
-            text=None
-            img = Image.open(uploaded_photo)
-            img = img.save("img.png")
-            img = cv2.imread("img.png")
-            st.text("Select the summarization type:")
-            c4, c5 = st.columns([1,1])
-            if c4.button("BENGALI"):
-                text =  pytesseract.image_to_string(img, lang="ben")
-                st.subheader("সারাংশ/সারমর্ম")
-                bansum(text)
-            if c5.button("ENGLISH"):
-                text=pytesseract.image_to_string(img)
-                st.subheader("Summarized Text")
-                engsum(text)
-            #st.success(text)
-        elif camera_photo:
-            text=None
-            img = Image.open(camera_photo)
-            img = img.save("img.png")
-            img = cv2.imread("img.png")
-            #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
-            st.text("Select the summarization type:")
-            c6, c7 = st.columns([1,1])
-            if c6.button("Bangla"):
-                text =  pytesseract.image_to_string(img, lang="ben")
-                st.subheader("সারাংশ/সারমর্ম")
-                bansum(text)
-            if c7.button("English"):
-                text=pytesseract.image_to_string(img)
-                st.subheader("Summarized Text")
-                engsum(text)
-        else:
-            text=None
-            text = message
-            c8, c9 = st.columns([1,1])
-            if c8.button("Bangla"):
-                bansum(text)
-            if c9.button("English"):
-                engsum(text)
         # if st.button("English Text Generation"):
         #     def query(payload):
         #     	response = requests.post(API_URL2, headers=headers2, json=payload)

 if "photo" not in st.session_state:
     st.session_state["photo"]="not done"
+c2, c3 = st.columns([1,1])
+a, b = st.columns([1, 1])
 def change_photo_state():
     st.session_state["photo"]="done"
 @st.cache
     return l
 #@st.cache
 def main():
+    with st.container():
+        with a:
+            #import torch
+            import streamlit as st
+            from streamlit_option_menu import option_menu
+            from streamlit_chat import message as st_message
+            from transformers import BlenderbotTokenizer
+            from transformers import BlenderbotForConditionalGeneration
+            st.title("Simple Chatbot for fun!")
+            @st.experimental_singleton
+            def get_models():
+                # it may be necessary for other frameworks to cache the model
+                # seems pytorch keeps an internal state of the conversation
+                model_name = "facebook/blenderbot-400M-distill"
+                tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
+                model = BlenderbotForConditionalGeneration.from_pretrained(model_name)
+                return tokenizer, model
+            if "history" not in st.session_state:
+                st.session_state.history = []
+            st.title("Hello Chatbot")
+            def main():
+                st.text_input("Talk to the bot", key="input_text", on_change=generate_answer)
+                def generate_answer():
+                    tokenizer, model = get_models()
+                    user_message = st.session_state.input_text
+                    inputs = tokenizer(st.session_state.input_text, return_tensors="pt")
+                    result = model.generate(**inputs)
+                    message_bot = tokenizer.decode(
+                        result[0], skip_special_tokens=True
+                    )  # .replace("<s>", "").replace("</s>", "")
+                    st.session_state.history.append({"message": user_message, "is_user": True})
+                    st.session_state.history.append({"message": message_bot, "is_user": False})
+                from copyreg import clear_extension_cache
+                for chat in st.session_state.history:
+                    st_message(**chat)
+        with b:
+            message = st.text_input("Type your text here!")
+            camera_photo = c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state)
+            uploaded_photo = save(c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state))
+            if st.session_state["photo"]=="done" or message:
+                if uploaded_photo and uploaded_photo.type=='application/pdf':
+                    tet = read_pdf(uploaded_photo)
+                    # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                    #     temp_file.write(uploaded_photo.read())
+                    #     temp_file_path = temp_file.name
+                    # loader = PyPDFLoader(temp_file_path)
+                    # if loader:
+                    #     text.extend(loader.load())
+                    #     os.remove(temp_file_path)
+                    # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
+                    # text_chunks = text_splitter.split_documents(text)
+                    values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
+                    text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
+                    #st.success(type(text_chunks))
+                    if st.button("English Pdf Summarize"):
+                        st.subheader("Selected text for summarize: ")
+                        st.success(text)
+                        st.subheader("Summarized Text: ")
+                        engsum(text)
+                elif uploaded_photo and uploaded_photo.type !='application/pdf':
+                    text=None
+                    img = Image.open(uploaded_photo)
+                    img = img.save("img.png")
+                    img = cv2.imread("img.png")
+                    st.text("Select the summarization type:")
+                    c4, c5 = st.columns([1,1])
+                    if c4.button("BENGALI"):
+                        text =  pytesseract.image_to_string(img, lang="ben")
+                        st.subheader("সারাংশ/সারমর্ম")
+                        bansum(text)
+                    if c5.button("ENGLISH"):
+                        text=pytesseract.image_to_string(img)
+                        st.subheader("Summarized Text")
+                        engsum(text)
+                    #st.success(text)
+                elif camera_photo:
+                    text=None
+                    img = Image.open(camera_photo)
+                    img = img.save("img.png")
+                    img = cv2.imread("img.png")
+                    #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
+                    st.text("Select the summarization type:")
+                    c6, c7 = st.columns([1,1])
+                    if c6.button("Bangla"):
+                        text =  pytesseract.image_to_string(img, lang="ben")
+                        st.subheader("সারাংশ/সারমর্ম")
+                        bansum(text)
+                    if c7.button("English"):
+                        text=pytesseract.image_to_string(img)
+                        st.subheader("Summarized Text")
+                        engsum(text)
+                else:
+                    text=None
+                    text = message
+                    c8, c9 = st.columns([1,1])
+                    if c8.button("Bangla"):
+                        bansum(text)
+                    if c9.button("English"):
+                        engsum(text)
         # if st.button("English Text Generation"):
         #     def query(payload):
         #     	response = requests.post(API_URL2, headers=headers2, json=payload)