Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,104 +1,59 @@
|
|
1 |
import streamlit as st
|
2 |
-
from
|
3 |
-
from
|
4 |
-
import
|
5 |
-
from
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
# Initialize the T5-small pipeline for lightweight generative capabilities
|
10 |
-
qa_pipeline = pipeline("text2text-generation", model="t5-small")
|
11 |
-
|
12 |
-
def analyze_pdf(file):
|
13 |
-
"""Extract text from a PDF file."""
|
14 |
-
pdf_reader = PdfReader(file)
|
15 |
-
text = ""
|
16 |
-
for page in pdf_reader.pages:
|
17 |
-
text += page.extract_text()
|
18 |
-
return text
|
19 |
-
|
20 |
-
def analyze_image(file):
|
21 |
-
"""Extract text from an image file using EasyOCR."""
|
22 |
-
image = Image.open(file) # Open the uploaded image
|
23 |
-
image_np = np.array(image) # Convert the PIL image to a NumPy array
|
24 |
-
reader = easyocr.Reader(['en'], gpu=False) # Initialize EasyOCR reader
|
25 |
-
result = reader.readtext(image_np, detail=0) # Extract text without bounding box details
|
26 |
-
return " ".join(result) # Combine detected text into a single string
|
27 |
-
|
28 |
-
def classify_and_respond(extracted_text):
|
29 |
-
"""Classify the content and provide a response."""
|
30 |
-
keywords = ["commercial court", "business law", "commercial dispute"]
|
31 |
-
if any(keyword in extracted_text.lower() for keyword in keywords):
|
32 |
-
return "The file content is related to commercial courts."
|
33 |
-
else:
|
34 |
-
return "The file content is not related to commercial courts."
|
35 |
-
|
36 |
-
def find_similar_question(user_question, default_questions):
|
37 |
-
"""Find the most similar default question."""
|
38 |
-
best_match = None
|
39 |
-
highest_similarity = 0.0
|
40 |
-
for default_question in default_questions.keys():
|
41 |
-
similarity = SequenceMatcher(None, user_question.lower(), default_question.lower()).ratio()
|
42 |
-
if similarity > highest_similarity:
|
43 |
-
highest_similarity = similarity
|
44 |
-
best_match = default_question
|
45 |
-
return best_match if highest_similarity > 0.6 else None
|
46 |
-
|
47 |
-
# Expanded default questions and answers
|
48 |
-
default_questions = {
|
49 |
-
"What is a commercial court?": "A commercial court is a specialized court that handles business disputes and cases related to commerce and trade.",
|
50 |
-
"What are the rules under the Commercial Courts Act?": "The Commercial Courts Act provides guidelines for the establishment and operation of commercial courts to ensure speedy resolution of business disputes.",
|
51 |
-
"What are the steps to file a case in a commercial court?": "To file a case, prepare the necessary documents, hire a lawyer, draft a plaint, and submit it to the relevant commercial court along with applicable fees.",
|
52 |
-
"What is the jurisdiction of a commercial court?": "Commercial courts handle cases related to trade, commerce, contractual disputes, intellectual property rights, and arbitration.",
|
53 |
-
"What is the timeline for resolving cases in commercial courts?": "The Commercial Courts Act emphasizes quick resolution, with timelines often set between 6 months to 1 year for case disposal.",
|
54 |
-
"Can commercial courts handle arbitration matters?": "Yes, commercial courts can handle matters related to the enforcement and appeal of arbitration awards.",
|
55 |
-
"What types of cases can be filed in a commercial court?": "Cases related to trade disputes, contractual disagreements, intellectual property rights, company law issues, and arbitration fall under commercial court jurisdiction.",
|
56 |
-
"What is the minimum value for a case to be heard in a commercial court?": "The minimum value of the dispute to be heard in a commercial court is typically Rs. 3,00,000, but this may vary by jurisdiction.",
|
57 |
-
"Do commercial courts require pre-institution mediation?": "Yes, many commercial disputes require pre-institution mediation as mandated by the Commercial Courts Act to promote settlement.",
|
58 |
-
"What documents are needed to file a case in a commercial court?": "Key documents include the plaint, contract/agreement, proof of breach, invoices, correspondence, and other supporting evidence.",
|
59 |
-
"How do I appeal a decision made by a commercial court?": "Decisions made by commercial courts can be appealed in the respective High Court with jurisdiction over the matter.",
|
60 |
-
"Are there special procedures for intellectual property disputes in commercial courts?": "Yes, commercial courts follow expedited procedures for intellectual property cases to ensure quick resolutions.",
|
61 |
-
"Can individuals represent themselves in commercial court cases?": "While it is possible, it is highly recommended to hire a lawyer experienced in commercial law for better outcomes.",
|
62 |
-
"Are commercial courts different from arbitration?": "Yes, commercial courts are judicial bodies for resolving disputes, while arbitration is an alternative dispute resolution mechanism agreed upon by the parties."
|
63 |
-
}
|
64 |
-
|
65 |
-
# Streamlit UI
|
66 |
-
st.title("Commercial Court Chatbot")
|
67 |
-
|
68 |
-
# Input text query
|
69 |
-
question = st.text_input("Ask a question related to commercial courts:")
|
70 |
-
|
71 |
-
# File uploader
|
72 |
-
uploaded_file = st.file_uploader("Upload a PDF or Image file for analysis:", type=["pdf", "png", "jpg", "jpeg"])
|
73 |
-
|
74 |
-
if question:
|
75 |
-
# Check for similar default questions
|
76 |
-
similar_question = find_similar_question(question, default_questions)
|
77 |
-
if similar_question:
|
78 |
-
st.write(f"Answer: {default_questions[similar_question]}")
|
79 |
-
else:
|
80 |
-
# Check if the query is related to commercial courts
|
81 |
-
related = classify_and_respond(question)
|
82 |
-
if "not related" in related.lower():
|
83 |
-
st.write("The question is not related to commercial courts.")
|
84 |
-
else:
|
85 |
-
# Generate answer using the pipeline
|
86 |
-
try:
|
87 |
-
response = qa_pipeline(question)[0]['generated_text']
|
88 |
-
st.write(f"Answer: {response}")
|
89 |
-
except:
|
90 |
-
st.write("I'm sorry, I couldn't generate an answer. Please try rephrasing your question.")
|
91 |
-
|
92 |
-
if uploaded_file:
|
93 |
-
# Handle uploaded files
|
94 |
-
file_type = uploaded_file.type
|
95 |
-
extracted_text = ""
|
96 |
-
|
97 |
-
if file_type == "application/pdf":
|
98 |
-
extracted_text = analyze_pdf(uploaded_file)
|
99 |
-
elif file_type in ["image/png", "image/jpeg", "image/jpg"]:
|
100 |
-
extracted_text = analyze_image(uploaded_file)
|
101 |
-
|
102 |
-
if extracted_text:
|
103 |
-
response = classify_and_respond(extracted_text)
|
104 |
-
st.write(response)
|
|
|
1 |
import streamlit as st
|
2 |
+
from langchain.vectorstores import Chroma
|
3 |
+
from langchain.chains import RetrievalQA
|
4 |
+
from langchain.llms import HuggingFacePipeline
|
5 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
+
from chromadb.config import Settings
|
7 |
+
import chromadb
|
8 |
+
|
9 |
+
# Streamlit title and description
|
10 |
+
st.title("Document Question Answering")
|
11 |
+
st.markdown("Ask questions about the documents in the pre-saved database!")
|
12 |
+
|
13 |
+
# Step 1: Load the saved Chroma database
|
14 |
+
persist_directory = "db"
|
15 |
+
client = chromadb.Client(Settings(persist_directory=persist_directory))
|
16 |
+
retriever = Chroma(client=client, collection_name="example_collection").as_retriever(
|
17 |
+
search_type="similarity", search_kwargs={"k": 2}
|
18 |
+
)
|
19 |
+
|
20 |
+
# Step 2: Load the language model
|
21 |
+
checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
23 |
+
base_model = AutoModelForSeq2SeqLM.from_pretrained(
|
24 |
+
checkpoint,
|
25 |
+
device_map="auto",
|
26 |
+
torch_dtype="auto"
|
27 |
+
)
|
28 |
+
pipe = pipeline(
|
29 |
+
"text2text-generation",
|
30 |
+
model=base_model,
|
31 |
+
tokenizer=tokenizer,
|
32 |
+
max_length=512,
|
33 |
+
do_sample=True,
|
34 |
+
temperature=0.3,
|
35 |
+
top_p=0.95,
|
36 |
+
)
|
37 |
+
local_llm = HuggingFacePipeline(pipeline=pipe)
|
38 |
+
|
39 |
+
# Step 3: Create the RetrievalQA chain
|
40 |
+
qa_chain = RetrievalQA.from_chain_type(
|
41 |
+
llm=local_llm,
|
42 |
+
chain_type="stuff",
|
43 |
+
retriever=retriever,
|
44 |
+
return_source_documents=True,
|
45 |
+
)
|
46 |
+
|
47 |
+
# Step 4: Query input
|
48 |
+
input_query = st.text_input("Enter your query:")
|
49 |
+
|
50 |
+
if input_query:
|
51 |
+
# Execute the query using the QA chain
|
52 |
+
llm_response = qa_chain({"query": input_query})
|
53 |
+
|
54 |
+
# Display the response
|
55 |
+
st.markdown(f"### Response: {llm_response['result']}")
|
56 |
+
st.markdown("#### Source Documents:")
|
57 |
+
for doc in llm_response['source_documents']:
|
58 |
+
st.write(doc.page_content[:500]) # Display a snippet of the source document
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|