Spaces:
Sleeping
Sleeping
UnnamedUnknownx1234987789489
commited on
Commit
•
6acaa77
1
Parent(s):
d355eed
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
import asyncio
|
4 |
+
|
5 |
+
from langchain_community.utilities import SQLDatabase
|
6 |
+
from typing_extensions import TypedDict, List
|
7 |
+
from IPython.display import Image, display
|
8 |
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
9 |
+
from langchain.schema import Document
|
10 |
+
from langgraph.graph import START, END, StateGraph
|
11 |
+
from langchain.prompts import PromptTemplate, ChatPromptTemplate
|
12 |
+
from sqlalchemy import create_engine
|
13 |
+
import uuid
|
14 |
+
from langchain_groq import ChatGroq
|
15 |
+
from langchain_community.utilities import GoogleSerperAPIWrapper
|
16 |
+
from langchain_chroma import Chroma
|
17 |
+
from langchain_community.document_loaders import NewsURLLoader
|
18 |
+
from langchain_community.retrievers.wikipedia import WikipediaRetriever
|
19 |
+
|
20 |
+
from langchain.vectorstores import Chroma
|
21 |
+
from langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader
|
22 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
23 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
24 |
+
from langchain_community.document_loaders import WebBaseLoader
|
25 |
+
from langchain_core.output_parsers import JsonOutputParser
|
26 |
+
from langchain_community.vectorstores.utils import filter_complex_metadata
|
27 |
+
from langchain.schema import Document
|
28 |
+
from langgraph.graph import START, END, StateGraph
|
29 |
+
from langchain_community.document_loaders.directory import DirectoryLoader
|
30 |
+
|
31 |
+
from functions import *
|
32 |
+
import pprint
|
33 |
+
from pathlib import Path
|
34 |
+
from sqlalchemy import create_engine
|
35 |
+
from langchain.retrievers import ContextualCompressionRetriever
|
36 |
+
|
37 |
+
from langchain.storage import InMemoryByteStore
|
38 |
+
|
39 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
40 |
+
from langchain.retrievers.document_compressors import CrossEncoderReranker
|
41 |
+
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
|
42 |
+
|
43 |
+
import re
|
44 |
+
from functions import *
|
45 |
+
|
46 |
+
from langchain.callbacks.manager import CallbackManager
|
47 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
48 |
+
from langchain.retrievers.document_compressors import FlashrankRerank
|
49 |
+
|
50 |
+
|
51 |
+
os.environ["LANGCHAIN_API_KEY"] = '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'
|
52 |
+
os.environ["LANGCHAIN_TRACING_V2"]="true"
|
53 |
+
os.environ["LANGCHAIN_ENDPOINT"]= "https://api.smith.langchain.com"
|
54 |
+
os.environ["LANGCHAIN_PROJECT"] = "Lithuanian_law_v2_LT_Kalba"
|
55 |
+
os.environ["GROQ_API_KEY"] = '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'
|
56 |
+
os.environ["SERPER_API_KEY"] = '6f80701ecd004c2466e8bd7bcebacacf89c74b84'
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
def main():
|
64 |
+
|
65 |
+
st.set_page_config(page_title="Birutė, Teisės Asistentė: ",
|
66 |
+
page_icon=":books:")
|
67 |
+
|
68 |
+
|
69 |
+
st.header("Birutė, Lietuviškos Civilinės teisės asistentė :" ":books:")
|
70 |
+
|
71 |
+
db = None
|
72 |
+
custom_graph = None
|
73 |
+
|
74 |
+
search_type = st.selectbox(
|
75 |
+
" ##### Pasirinkite paieškos būdą. Pasirinkimai yra: [Max marginal relevance search (MMR), Similarity search (similarity). Default value (similarity)], MMR gražina dokumentus iš platesnio spectro infromacijos, similarity is mažesnio, konkrečiau susijusio su užklausa",
|
76 |
+
options=["mmr", "similarity"],
|
77 |
+
index=0
|
78 |
+
)
|
79 |
+
|
80 |
+
k = st.select_slider(
|
81 |
+
" ###Pasirinkite kieki dokumentų, kuriuos norėtumete naudoti generuojant atsakymą. Per daug dokumentų, kartais gali prikimšti asistentės atminti nereikalinga informacija ir dėl to gali atsirasti haliucinacijos. Parinktasis dokumentų kiekis: (4): ",
|
82 |
+
options=list(range(2, 9)),
|
83 |
+
value=3
|
84 |
+
)
|
85 |
+
|
86 |
+
full_retriever = create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type=search_type, k=k, chunk_size=9500, chunk_overlap=0)
|
87 |
+
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
#compression_full_retriever = ContextualCompressionRetriever(
|
92 |
+
# base_compressor=compressor, base_retriever=full_retriever
|
93 |
+
# )
|
94 |
+
|
95 |
+
|
96 |
+
# Tabs for Chat and Document Search
|
97 |
+
tab1, tab2 = st.tabs(["Teisės Asistentė", "Informacijos Paieška Dokumentuose"])
|
98 |
+
|
99 |
+
if "active_tab" not in st.session_state:
|
100 |
+
st.session_state["active_tab"] = "Teisės Asistentė "
|
101 |
+
|
102 |
+
with tab1:
|
103 |
+
|
104 |
+
st.markdown("""
|
105 |
+
###### Prisiminkite: Aš esu virtuali asistentė, ne profesionali teisininkė. Naudokite savo kritinį mąstymą priimdami sprendimus arba pasitarkite su teisininku.
|
106 |
+
""")
|
107 |
+
|
108 |
+
if "messages" not in st.session_state:
|
109 |
+
st.session_state["messages"] = [
|
110 |
+
{"role": "assistant", "content": "Labas, aš virtuali teisės asistentė Birutė. Kuo galėčiau jums padėti?"}
|
111 |
+
]
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
custom_graph = create_workflow(full_retriever)
|
121 |
+
|
122 |
+
user_question = st.text_input("Klauskite klausimus susijusius su civiline teise.:")
|
123 |
+
klausti_btn = st.button("Klausti")
|
124 |
+
if klausti_btn:
|
125 |
+
with st.spinner("Galvojama"):
|
126 |
+
try:
|
127 |
+
handle_userinput(user_question, custom_graph)
|
128 |
+
except Exception as e:
|
129 |
+
st.write(f"Error: {e}")
|
130 |
+
|
131 |
+
with tab2:
|
132 |
+
|
133 |
+
st.session_state["active_tab"] = "Informacijos Paieška Dokumentuose"
|
134 |
+
|
135 |
+
if st.session_state["active_tab"] == "Informacijos Paieška Dokumentuose":
|
136 |
+
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
user_topic = st.text_input("Paieška dokumentuose pagal tekstinį atitikimą:")
|
141 |
+
ieskoti_btn = st.button("Ieškoti informacijos")
|
142 |
+
if ieskoti_btn:
|
143 |
+
|
144 |
+
with st.sidebar:
|
145 |
+
try:
|
146 |
+
relevant_docs = full_retriever.get_relevant_documents(user_topic)
|
147 |
+
pretty_output_parts = []
|
148 |
+
for doc in relevant_docs:
|
149 |
+
pretty_output_parts.append("\n\n**Documentų Meta duomenys**:\n")
|
150 |
+
metadata_str = "Dokumento Pavadinimas: " + doc.metadata.get('original_doc_name', 'unknown')
|
151 |
+
pretty_output_parts.append(metadata_str)
|
152 |
+
pretty_output_parts.append("\n\n**Dokumentu skirsniai**:\n")
|
153 |
+
content = doc.page_content.replace('\\n\\n\\n\\n', '\n\n').replace('\\n\\n', '\n\n')
|
154 |
+
pretty_output_parts.append(content)
|
155 |
+
|
156 |
+
pretty_output = "\n\n\n".join(pretty_output_parts)
|
157 |
+
st.markdown(pretty_output)
|
158 |
+
except Exception as e:
|
159 |
+
st.write(f"Error: {e}")
|
160 |
+
|
161 |
+
if __name__ == "__main__":
|
162 |
+
main()
|