"""
#App: NLP App with Streamlit
Description
This is a Natural Language Processing(NLP) base Application that is useful for 
Document/Text Summarization from Bangla images and English Images/PDF files.
"""
# Core Pkgs
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
#os.system('sudo apt-get install tesseract-ocr-eng')
#os.system('sudo apt-get install tesseract-ocr-ben')

#os.system('wget https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata')
#os.system('gunzip ben.traineddata.gz ')
#os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
#os.system('pip install -q pytesseract')
#os.system('conda install -c conda-forge poppler')
import streamlit as st
st.set_page_config(page_title="Summarization Tool", layout="wide", initial_sidebar_state="expanded")
import torch
import docx2txt
from PIL import Image 
from PyPDF2 import PdfFileReader
from pdf2image import convert_from_bytes
import pdfplumber
#from line_cor import mark_region
import pdf2image
import requests
import cv2
import numpy as np
import pytesseract
import line_cor
import altair as alt
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
from PIL import Image
API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
headers0 = {"Authorization": "Bearer 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"}

	
def read_pdf(file):
#     images=pdf2image.convert_from_path(file)
#     # print(type(images))
    pdfReader = PdfFileReader(file)
    count = pdfReader.numPages
    all_page_text = " "
    for i in range(count):
        page = pdfReader.getPage(i)
        # img = Image.open(page)
        # img = Image.open(page)
        # img = img.save("img.png")
        # image_name = cv2.imread("img.png")
        # # get co-ordinates to cr
#         text = pytesseract.image_to_string(image_name, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(image_name)
        all_page_text += page.extractText()+" "
    return all_page_text
# def read_pdf_with_pdfplumber(file):
#     # Open the uploaded PDF file with pdfplumber
#     with pdfplumber.open(file) as pdf:
#         extracted_text = ''
#         for page in pdf.pages:
#             extracted_text += page.extract_text()

#     # Display the extracted text
#     #st.text(extracted_text)
#     return extracted_text
def engsum(output):
    API_URL1 = "https://api-inference.huggingface.co/models/Michael-Vptn/text-summarization-t5-base"
    headers1 = {"Authorization": "Bearer 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"}
    
    def query(payload):
        response = requests.post(API_URL1, headers=headers1, json=payload)
        return response.json()
        
    output = query({
        "inputs": output,
    })
    st.success(output)
def bansum(text):
    def query(payload):
        response = requests.post(API_URL0, headers=headers0, json=payload)
        return response.json()
    output = query({"inputs": text})
    st.success(output) 

st.title("NLP APPLICATION")
#@st.cache_resource(experimental_allow_widgets=True)
def main():
    b=0
    #global tokenizer, model
    #tokenizer = AutoTokenizer.from_pretrained('t5-base')
    #model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
    """ NLP Based Application with Streamlit """
    st.markdown("""
    	#### Description
    	##This is a Natural Language Processing(NLP) base Application that is useful for basic NLP tasks such as follows:
        PDF Document/Image's Text Summarization.
    	""")
    def change_photo_state():
        st.session_state["photo"]="done"
    st.subheader("Please, feed your pdf/images/text, features/services will appear automatically!")
    message = st.text_input("Type your text here!")
    uploaded_photo = st.file_uploader("Upload your PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
    camera_photo = st.camera_input("Take a photo, Containing English texts", on_change=change_photo_state)
    if "photo" not in st.session_state:
        st.session_state["photo"]="not done"
    if st.session_state["photo"]=="done" or message:
        text=" "
        if uploaded_photo and uploaded_photo.type=='application/pdf':
            #file = uploaded_photo.read() # Read the data
            #image_result = open(uploaded_photo.name, 'wb') # creates a writable image and later we can write the decoded result
            #image_result.write(file)
            tet = read_pdf(uploaded_photo)
            #tet = pytesseract.image_to_string(img, lang="ben") if st.checkbox("Mark to see Bangla Image's Text") else pytesseract.image_to_string(img)
            values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
            text = tet[values[0]*7*10:values[1]*7*10] if values[0]!=len(tet)//(7*10) else tet[len(tet)//(7*100):]
            st.success(text)
        elif uploaded_photo:
            img = Image.open(uploaded_photo)
            img = img.save("img.png")
            img = cv2.imread("img.png")
            # get co-ordinates to crop the image
            #imag, lc = line_cor.mark_region(imge)
            #st.success(*lc)
           # c = lc
            # cropping image img = image[y0:y1, x0:x1]
            #imgg = imge[c[0][1]:c[1][1], c[0][0]:c[1][0]]
            #plt.figure(figsize=(10,10))
           # plt.imshow(img)
            # convert the image to black and white for better OCR
            #ret,thresh1 = cv2.threshold(imge,120,255,cv2.THRESH_BINARY)
            # pytesseract image to string to get results
            #text = str(pytesseract.image_to_string(img, config='--psm 6',lang="ben")) if st.checkbox("Bangla") else str(pytesseract.image_to_string(thresh1, config='--psm 6'))
            if st.checkbox("Bangla"):
                b=1
                text =  pytesseract.image_to_string(img, lang="ben")
            else: 
                b=0
                text=pytesseract.image_to_string(img)
            #st.success(text)
        elif camera_photo:
            img = Image.open(camera_photo)
            img = img.save("img.png")
            img = cv2.imread("img.png")
            #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
            if st.checkbox("Bangla"):
                b=1
                text =  pytesseract.image_to_string(img, lang="ben")
            else: 
                b=0
                text=pytesseract.image_to_string(img)
            #st.success(text)
        elif uploaded_photo==None and camera_photo==None:
    	#our_image=load_image("image.jpg")
        #img = cv2.imread("scholarly_text.jpg")
            text = message

        if st.checkbox("English Text Generation"): 
            API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
            headers2 = {"Authorization": "Bearer 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"}
            
            def query(payload):
            	response = requests.post(API_URL2, headers=headers2, json=payload)
            	return response.json()
            	
            output = query({
            	"inputs": text,
            })
            if isinstance(output, list) and output[0].get("generated_text"):
                text_output = output[0]["generated_text"]
                st.success(text_output)
            if st.checkbox("Summarize generated text"):
                engsum(text_ouput)
                
                
        if st.checkbox("Mark for Text Summarization"):
            if b==1:
                bansum(text)
            else:
                engsum(text)
if __name__ == '__main__':
    main()