Hyma7 commited on
Commit
d476279
1 Parent(s): c4be5d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -102
app.py CHANGED
@@ -1,104 +1,59 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from PyPDF2 import PdfReader
4
- import easyocr
5
- from PIL import Image
6
- import numpy as np
7
- from difflib import SequenceMatcher
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Initialize the T5-small pipeline for lightweight generative capabilities
10
- qa_pipeline = pipeline("text2text-generation", model="t5-small")
11
-
12
- def analyze_pdf(file):
13
- """Extract text from a PDF file."""
14
- pdf_reader = PdfReader(file)
15
- text = ""
16
- for page in pdf_reader.pages:
17
- text += page.extract_text()
18
- return text
19
-
20
- def analyze_image(file):
21
- """Extract text from an image file using EasyOCR."""
22
- image = Image.open(file) # Open the uploaded image
23
- image_np = np.array(image) # Convert the PIL image to a NumPy array
24
- reader = easyocr.Reader(['en'], gpu=False) # Initialize EasyOCR reader
25
- result = reader.readtext(image_np, detail=0) # Extract text without bounding box details
26
- return " ".join(result) # Combine detected text into a single string
27
-
28
- def classify_and_respond(extracted_text):
29
- """Classify the content and provide a response."""
30
- keywords = ["commercial court", "business law", "commercial dispute"]
31
- if any(keyword in extracted_text.lower() for keyword in keywords):
32
- return "The file content is related to commercial courts."
33
- else:
34
- return "The file content is not related to commercial courts."
35
-
36
- def find_similar_question(user_question, default_questions):
37
- """Find the most similar default question."""
38
- best_match = None
39
- highest_similarity = 0.0
40
- for default_question in default_questions.keys():
41
- similarity = SequenceMatcher(None, user_question.lower(), default_question.lower()).ratio()
42
- if similarity > highest_similarity:
43
- highest_similarity = similarity
44
- best_match = default_question
45
- return best_match if highest_similarity > 0.6 else None
46
-
47
- # Expanded default questions and answers
48
- default_questions = {
49
- "What is a commercial court?": "A commercial court is a specialized court that handles business disputes and cases related to commerce and trade.",
50
- "What are the rules under the Commercial Courts Act?": "The Commercial Courts Act provides guidelines for the establishment and operation of commercial courts to ensure speedy resolution of business disputes.",
51
- "What are the steps to file a case in a commercial court?": "To file a case, prepare the necessary documents, hire a lawyer, draft a plaint, and submit it to the relevant commercial court along with applicable fees.",
52
- "What is the jurisdiction of a commercial court?": "Commercial courts handle cases related to trade, commerce, contractual disputes, intellectual property rights, and arbitration.",
53
- "What is the timeline for resolving cases in commercial courts?": "The Commercial Courts Act emphasizes quick resolution, with timelines often set between 6 months to 1 year for case disposal.",
54
- "Can commercial courts handle arbitration matters?": "Yes, commercial courts can handle matters related to the enforcement and appeal of arbitration awards.",
55
- "What types of cases can be filed in a commercial court?": "Cases related to trade disputes, contractual disagreements, intellectual property rights, company law issues, and arbitration fall under commercial court jurisdiction.",
56
- "What is the minimum value for a case to be heard in a commercial court?": "The minimum value of the dispute to be heard in a commercial court is typically Rs. 3,00,000, but this may vary by jurisdiction.",
57
- "Do commercial courts require pre-institution mediation?": "Yes, many commercial disputes require pre-institution mediation as mandated by the Commercial Courts Act to promote settlement.",
58
- "What documents are needed to file a case in a commercial court?": "Key documents include the plaint, contract/agreement, proof of breach, invoices, correspondence, and other supporting evidence.",
59
- "How do I appeal a decision made by a commercial court?": "Decisions made by commercial courts can be appealed in the respective High Court with jurisdiction over the matter.",
60
- "Are there special procedures for intellectual property disputes in commercial courts?": "Yes, commercial courts follow expedited procedures for intellectual property cases to ensure quick resolutions.",
61
- "Can individuals represent themselves in commercial court cases?": "While it is possible, it is highly recommended to hire a lawyer experienced in commercial law for better outcomes.",
62
- "Are commercial courts different from arbitration?": "Yes, commercial courts are judicial bodies for resolving disputes, while arbitration is an alternative dispute resolution mechanism agreed upon by the parties."
63
- }
64
-
65
- # Streamlit UI
66
- st.title("Commercial Court Chatbot")
67
-
68
- # Input text query
69
- question = st.text_input("Ask a question related to commercial courts:")
70
-
71
- # File uploader
72
- uploaded_file = st.file_uploader("Upload a PDF or Image file for analysis:", type=["pdf", "png", "jpg", "jpeg"])
73
-
74
- if question:
75
- # Check for similar default questions
76
- similar_question = find_similar_question(question, default_questions)
77
- if similar_question:
78
- st.write(f"Answer: {default_questions[similar_question]}")
79
- else:
80
- # Check if the query is related to commercial courts
81
- related = classify_and_respond(question)
82
- if "not related" in related.lower():
83
- st.write("The question is not related to commercial courts.")
84
- else:
85
- # Generate answer using the pipeline
86
- try:
87
- response = qa_pipeline(question)[0]['generated_text']
88
- st.write(f"Answer: {response}")
89
- except:
90
- st.write("I'm sorry, I couldn't generate an answer. Please try rephrasing your question.")
91
-
92
- if uploaded_file:
93
- # Handle uploaded files
94
- file_type = uploaded_file.type
95
- extracted_text = ""
96
-
97
- if file_type == "application/pdf":
98
- extracted_text = analyze_pdf(uploaded_file)
99
- elif file_type in ["image/png", "image/jpeg", "image/jpg"]:
100
- extracted_text = analyze_image(uploaded_file)
101
-
102
- if extracted_text:
103
- response = classify_and_respond(extracted_text)
104
- st.write(response)
 
1
  import streamlit as st
2
+ from langchain.vectorstores import Chroma
3
+ from langchain.chains import RetrievalQA
4
+ from langchain.llms import HuggingFacePipeline
5
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
6
+ from chromadb.config import Settings
7
+ import chromadb
8
+
9
+ # Streamlit title and description
10
+ st.title("Document Question Answering")
11
+ st.markdown("Ask questions about the documents in the pre-saved database!")
12
+
13
+ # Step 1: Load the saved Chroma database
14
+ persist_directory = "db"
15
+ client = chromadb.Client(Settings(persist_directory=persist_directory))
16
+ retriever = Chroma(client=client, collection_name="example_collection").as_retriever(
17
+ search_type="similarity", search_kwargs={"k": 2}
18
+ )
19
+
20
+ # Step 2: Load the language model
21
+ checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
22
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
23
+ base_model = AutoModelForSeq2SeqLM.from_pretrained(
24
+ checkpoint,
25
+ device_map="auto",
26
+ torch_dtype="auto"
27
+ )
28
+ pipe = pipeline(
29
+ "text2text-generation",
30
+ model=base_model,
31
+ tokenizer=tokenizer,
32
+ max_length=512,
33
+ do_sample=True,
34
+ temperature=0.3,
35
+ top_p=0.95,
36
+ )
37
+ local_llm = HuggingFacePipeline(pipeline=pipe)
38
+
39
+ # Step 3: Create the RetrievalQA chain
40
+ qa_chain = RetrievalQA.from_chain_type(
41
+ llm=local_llm,
42
+ chain_type="stuff",
43
+ retriever=retriever,
44
+ return_source_documents=True,
45
+ )
46
+
47
+ # Step 4: Query input
48
+ input_query = st.text_input("Enter your query:")
49
+
50
+ if input_query:
51
+ # Execute the query using the QA chain
52
+ llm_response = qa_chain({"query": input_query})
53
+
54
+ # Display the response
55
+ st.markdown(f"### Response: {llm_response['result']}")
56
+ st.markdown("#### Source Documents:")
57
+ for doc in llm_response['source_documents']:
58
+ st.write(doc.page_content[:500]) # Display a snippet of the source document
59