Spaces:

Hyma7
/

Commercial_court_chatbot

Running

App Files Files Community

Hyma7 commited on 5 days ago

Commit

d476279

•

1 Parent(s): c4be5d5

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -102

app.py CHANGED Viewed

@@ -1,104 +1,59 @@
 import streamlit as st
-from transformers import pipeline
-from PyPDF2 import PdfReader
-import easyocr
-from PIL import Image
-import numpy as np
-from difflib import SequenceMatcher
-# Initialize the T5-small pipeline for lightweight generative capabilities
-qa_pipeline = pipeline("text2text-generation", model="t5-small")
-def analyze_pdf(file):
-    """Extract text from a PDF file."""
-    pdf_reader = PdfReader(file)
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text()
-    return text
-def analyze_image(file):
-    """Extract text from an image file using EasyOCR."""
-    image = Image.open(file)  # Open the uploaded image
-    image_np = np.array(image)  # Convert the PIL image to a NumPy array
-    reader = easyocr.Reader(['en'], gpu=False)  # Initialize EasyOCR reader
-    result = reader.readtext(image_np, detail=0)  # Extract text without bounding box details
-    return " ".join(result)  # Combine detected text into a single string
-def classify_and_respond(extracted_text):
-    """Classify the content and provide a response."""
-    keywords = ["commercial court", "business law", "commercial dispute"]
-    if any(keyword in extracted_text.lower() for keyword in keywords):
-        return "The file content is related to commercial courts."
-    else:
-        return "The file content is not related to commercial courts."
-def find_similar_question(user_question, default_questions):
-    """Find the most similar default question."""
-    best_match = None
-    highest_similarity = 0.0
-    for default_question in default_questions.keys():
-        similarity = SequenceMatcher(None, user_question.lower(), default_question.lower()).ratio()
-        if similarity > highest_similarity:
-            highest_similarity = similarity
-            best_match = default_question
-    return best_match if highest_similarity > 0.6 else None
-# Expanded default questions and answers
-default_questions = {
-    "What is a commercial court?": "A commercial court is a specialized court that handles business disputes and cases related to commerce and trade.",
-    "What are the rules under the Commercial Courts Act?": "The Commercial Courts Act provides guidelines for the establishment and operation of commercial courts to ensure speedy resolution of business disputes.",
-    "What are the steps to file a case in a commercial court?": "To file a case, prepare the necessary documents, hire a lawyer, draft a plaint, and submit it to the relevant commercial court along with applicable fees.",
-    "What is the jurisdiction of a commercial court?": "Commercial courts handle cases related to trade, commerce, contractual disputes, intellectual property rights, and arbitration.",
-    "What is the timeline for resolving cases in commercial courts?": "The Commercial Courts Act emphasizes quick resolution, with timelines often set between 6 months to 1 year for case disposal.",
-    "Can commercial courts handle arbitration matters?": "Yes, commercial courts can handle matters related to the enforcement and appeal of arbitration awards.",
-    "What types of cases can be filed in a commercial court?": "Cases related to trade disputes, contractual disagreements, intellectual property rights, company law issues, and arbitration fall under commercial court jurisdiction.",
-    "What is the minimum value for a case to be heard in a commercial court?": "The minimum value of the dispute to be heard in a commercial court is typically Rs. 3,00,000, but this may vary by jurisdiction.",
-    "Do commercial courts require pre-institution mediation?": "Yes, many commercial disputes require pre-institution mediation as mandated by the Commercial Courts Act to promote settlement.",
-    "What documents are needed to file a case in a commercial court?": "Key documents include the plaint, contract/agreement, proof of breach, invoices, correspondence, and other supporting evidence.",
-    "How do I appeal a decision made by a commercial court?": "Decisions made by commercial courts can be appealed in the respective High Court with jurisdiction over the matter.",
-    "Are there special procedures for intellectual property disputes in commercial courts?": "Yes, commercial courts follow expedited procedures for intellectual property cases to ensure quick resolutions.",
-    "Can individuals represent themselves in commercial court cases?": "While it is possible, it is highly recommended to hire a lawyer experienced in commercial law for better outcomes.",
-    "Are commercial courts different from arbitration?": "Yes, commercial courts are judicial bodies for resolving disputes, while arbitration is an alternative dispute resolution mechanism agreed upon by the parties."
-}
-# Streamlit UI
-st.title("Commercial Court Chatbot")
-# Input text query
-question = st.text_input("Ask a question related to commercial courts:")
-# File uploader
-uploaded_file = st.file_uploader("Upload a PDF or Image file for analysis:", type=["pdf", "png", "jpg", "jpeg"])
-if question:
-    # Check for similar default questions
-    similar_question = find_similar_question(question, default_questions)
-    if similar_question:
-        st.write(f"Answer: {default_questions[similar_question]}")
-    else:
-        # Check if the query is related to commercial courts
-        related = classify_and_respond(question)
-        if "not related" in related.lower():
-            st.write("The question is not related to commercial courts.")
-        else:
-            # Generate answer using the pipeline
-            try:
-                response = qa_pipeline(question)[0]['generated_text']
-                st.write(f"Answer: {response}")
-            except:
-                st.write("I'm sorry, I couldn't generate an answer. Please try rephrasing your question.")
-if uploaded_file:
-    # Handle uploaded files
-    file_type = uploaded_file.type
-    extracted_text = ""
-    if file_type == "application/pdf":
-        extracted_text = analyze_pdf(uploaded_file)
-    elif file_type in ["image/png", "image/jpeg", "image/jpg"]:
-        extracted_text = analyze_image(uploaded_file)
-    if extracted_text:
-        response = classify_and_respond(extracted_text)
-        st.write(response)

 import streamlit as st
+from langchain.vectorstores import Chroma
+from langchain.chains import RetrievalQA
+from langchain.llms import HuggingFacePipeline
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+from chromadb.config import Settings
+import chromadb
+# Streamlit title and description
+st.title("Document Question Answering")
+st.markdown("Ask questions about the documents in the pre-saved database!")
+# Step 1: Load the saved Chroma database
+persist_directory = "db"
+client = chromadb.Client(Settings(persist_directory=persist_directory))
+retriever = Chroma(client=client, collection_name="example_collection").as_retriever(
+    search_type="similarity", search_kwargs={"k": 2}
+)
+# Step 2: Load the language model
+checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+base_model = AutoModelForSeq2SeqLM.from_pretrained(
+    checkpoint,
+    device_map="auto",
+    torch_dtype="auto"
+)
+pipe = pipeline(
+    "text2text-generation",
+    model=base_model,
+    tokenizer=tokenizer,
+    max_length=512,
+    do_sample=True,
+    temperature=0.3,
+    top_p=0.95,
+)
+local_llm = HuggingFacePipeline(pipeline=pipe)
+# Step 3: Create the RetrievalQA chain
+qa_chain = RetrievalQA.from_chain_type(
+    llm=local_llm,
+    chain_type="stuff",
+    retriever=retriever,
+    return_source_documents=True,
+)
+# Step 4: Query input
+input_query = st.text_input("Enter your query:")
+if input_query:
+    # Execute the query using the QA chain
+    llm_response = qa_chain({"query": input_query})
+    # Display the response
+    st.markdown(f"### Response: {llm_response['result']}")
+    st.markdown("#### Source Documents:")
+    for doc in llm_response['source_documents']:
+        st.write(doc.page_content[:500])  # Display a snippet of the source document