Spaces:

KingNish
/

Doc-Reader-and-Chat

Running

App Files Files Community

KingNish commited on Sep 24, 2024

Commit

e27d06b

verified ·

1 Parent(s): 3d46c63

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -131

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from openpyxl import load_workbook
 from pptx import Presentation
 import gradio as gr
 import io
-from huggingface_hub import InferenceClient
 import re
 import zipfile
 import xml.etree.ElementTree as ET
@@ -11,10 +10,6 @@ import filetype
 # Constants
 CHUNK_SIZE = 32000
-MAX_NEW_TOKENS = 4096
-# Initialize the Mistral chat model
-client = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
 # --- Utility Functions ---
@@ -168,131 +163,23 @@ def read_document(file, clean=True):
             return f"Error reading file: {e}", 0
-# --- Chat Functions ---
-def generate_mistral_response(message):
-    """Generates a response from the Mistral API."""
-    stream = client.text_generation(
-        message,
-        max_new_tokens=MAX_NEW_TOKENS,
-        stream=True,
-        details=True,
-        return_full_text=False
-    )
-    output = ""
-    for response in stream:
-        if not response.token.text == "</s>":
-            output += response.token.text
-        yield output
-def chat_document(file, question, clean=True):
-    """Chats with a document using a single Mistral API call."""
-    content, length = read_document(file, clean)
-    if length > CHUNK_SIZE:
-        content = content[:CHUNK_SIZE]  # Limit to max chunk size
-    system_prompt = """
-    You are a helpful and informative assistant that can answer questions based on the content of documents.
-    You will receive the content of a document and a question about it.
-    Your task is to provide a concise and accurate answer to the question based solely on the provided document content.
-    If the document does not contain enough information to answer the question, simply state that you cannot answer the question based on the provided information.
-    """
-    message = f"""[INST] [SYSTEM] {system_prompt}
-    Document Content: {content}
-    Question: {question}
-    Answer:"""
-    yield from generate_mistral_response(message)
-def chat_document_v2(file, question, clean=True):
-    """Chats with a document using chunk-based Mistral API calls and summarizes the answers."""
-    content, length = read_document(file, clean)
-    chunks = split_content(content)
-    system_prompt = """
-    You are a helpful and informative assistant that can answer questions based on the content of documents.
-    You will receive the content of a document and a question about it.
-    Your task is to provide a concise and accurate answer to the question based solely on the provided document content.
-    If the document does not contain enough information to answer the question, simply state that you cannot answer the question based on the provided information.
-    """
-    all_answers = []
-    for chunk in chunks:
-        message = f"""[INST] [SYSTEM] {system_prompt}
-        Document Content: {chunk[:CHUNK_SIZE]}
-        Question: {question}
-        Answer:"""
-        response = ""
-        for stream_response in generate_mistral_response(message):
-            response = stream_response  # Update with latest response
-        all_answers.append(response)
-    # Summarize all answers using Mistral
-    summary_prompt = """
-    You are a helpful and informative assistant that can summarize multiple answers related to the same question.
-    You will receive a list of answers to a question, and your task is to generate a concise and comprehensive summary that incorporates the key information from all the answers.
-    Avoid repeating information unnecessarily and focus on providing the most relevant and accurate summary based on the provided answers.
-    Answers:
-    """
-    all_answers_str = "\n".join(all_answers)
-    summary_message = f"""[INST] [SYSTEM] {summary_prompt}
-    {all_answers_str[:30000]}
-    Summary:"""
-    yield from generate_mistral_response(summary_message)
 # --- Gradio Interface ---
-with gr.Blocks() as demo:
-    with gr.Tabs():
-        with gr.TabItem("Document Reader"):
-            iface1 = gr.Interface(
-                fn=read_document,
-                inputs=[
-                    gr.File(label="Upload a Document"),
-                    gr.Checkbox(label="Clean Text", value=True),
-                ],
-                outputs=[
-                    gr.Textbox(label="Document Content"),
-                    gr.Number(label="Document Length (characters)"),
-                ],
-                title="Document Reader",
-                description="Upload a document (PDF, XLSX, PPTX, TXT, CSV, DOC, DOCX and Code or text file) to read its content.",
-                concurrency_limit = None
-            )
-        with gr.TabItem("Document Chat"):
-            iface2 = gr.Interface(
-                fn=chat_document,
-                inputs=[
-                    gr.File(label="Upload a Document"),
-                    gr.Textbox(label="Question"),
-                    gr.Checkbox(label="Clean and Compress Text", value=True),
-                ],
-                outputs=gr.Markdown(label="Answer"),
-                title="Document Chat",
-                description="Upload a document and ask questions about its content.",
-                concurrency_limit = None
-            )
-        with gr.TabItem("Document Chat V2"):
-            iface3 = gr.Interface(
-                fn=chat_document_v2,
-                inputs=[
-                    gr.File(label="Upload a Document"),
-                    gr.Textbox(label="Question"),
-                    gr.Checkbox(label="Clean Text", value=True),
-                ],
-                outputs=gr.Markdown(label="Answer"),
-                title="Document Chat V2",
-                description="Upload a document and ask questions about its content (using chunk-based approach).",
-                concurrency_limit =None
-            )
-demo.launch()

 from pptx import Presentation
 import gradio as gr
 import io
 import re
 import zipfile
 import xml.etree.ElementTree as ET
 # Constants
 CHUNK_SIZE = 32000
 # --- Utility Functions ---
             return f"Error reading file: {e}", 0
 # --- Gradio Interface ---
+iface = gr.Interface(
+    fn=read_document,
+    inputs=[
+        gr.File(label="Upload a Document"),
+        gr.Checkbox(label="Clean Text", value=True),
+    ],
+    outputs=[
+        gr.Textbox(label="Document Content"),
+        gr.Number(label="Document Length (characters)"),
+    ],
+    title="Better Document Reader for Hugging Face Chat Tools",
+    description="Upload a document (PDF, XLSX, PPTX, TXT, CSV, DOC, DOCX and Code or text file) to read its content."
+                "This tool is designed for use with Hugging Face Chat Tools: "
+                "[https://hf.co/chat/tools/66ed8236a35891a61e2bfcf2](https://hf.co/chat/tools/66ed8236a35891a61e2bfcf2)",
+    concurrency_limit = None
+)
+iface.launch()