UnnamedUnknownx1234987789489 commited on
Commit
6acaa77
1 Parent(s): d355eed

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import asyncio
4
+
5
+ from langchain_community.utilities import SQLDatabase
6
+ from typing_extensions import TypedDict, List
7
+ from IPython.display import Image, display
8
+ from langchain_core.pydantic_v1 import BaseModel, Field
9
+ from langchain.schema import Document
10
+ from langgraph.graph import START, END, StateGraph
11
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
12
+ from sqlalchemy import create_engine
13
+ import uuid
14
+ from langchain_groq import ChatGroq
15
+ from langchain_community.utilities import GoogleSerperAPIWrapper
16
+ from langchain_chroma import Chroma
17
+ from langchain_community.document_loaders import NewsURLLoader
18
+ from langchain_community.retrievers.wikipedia import WikipediaRetriever
19
+
20
+ from langchain.vectorstores import Chroma
21
+ from langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader
22
+ from langchain_community.embeddings import HuggingFaceEmbeddings
23
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
24
+ from langchain_community.document_loaders import WebBaseLoader
25
+ from langchain_core.output_parsers import JsonOutputParser
26
+ from langchain_community.vectorstores.utils import filter_complex_metadata
27
+ from langchain.schema import Document
28
+ from langgraph.graph import START, END, StateGraph
29
+ from langchain_community.document_loaders.directory import DirectoryLoader
30
+
31
+ from functions import *
32
+ import pprint
33
+ from pathlib import Path
34
+ from sqlalchemy import create_engine
35
+ from langchain.retrievers import ContextualCompressionRetriever
36
+
37
+ from langchain.storage import InMemoryByteStore
38
+
39
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
40
+ from langchain.retrievers.document_compressors import CrossEncoderReranker
41
+ from langchain_community.cross_encoders import HuggingFaceCrossEncoder
42
+
43
+ import re
44
+ from functions import *
45
+
46
+ from langchain.callbacks.manager import CallbackManager
47
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
48
+ from langchain.retrievers.document_compressors import FlashrankRerank
49
+
50
+
51
+ os.environ["LANGCHAIN_API_KEY"] = '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'
52
+ os.environ["LANGCHAIN_TRACING_V2"]="true"
53
+ os.environ["LANGCHAIN_ENDPOINT"]= "https://api.smith.langchain.com"
54
+ os.environ["LANGCHAIN_PROJECT"] = "Lithuanian_law_v2_LT_Kalba"
55
+ os.environ["GROQ_API_KEY"] = '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'
56
+ os.environ["SERPER_API_KEY"] = '6f80701ecd004c2466e8bd7bcebacacf89c74b84'
57
+
58
+
59
+
60
+
61
+
62
+
63
+ def main():
64
+
65
+ st.set_page_config(page_title="Birutė, Teisės Asistentė: ",
66
+ page_icon=":books:")
67
+
68
+
69
+ st.header("Birutė, Lietuviškos Civilinės teisės asistentė :" ":books:")
70
+
71
+ db = None
72
+ custom_graph = None
73
+
74
+ search_type = st.selectbox(
75
+ " ##### Pasirinkite paieškos būdą. Pasirinkimai yra: [Max marginal relevance search (MMR), Similarity search (similarity). Default value (similarity)], MMR gražina dokumentus iš platesnio spectro infromacijos, similarity is mažesnio, konkrečiau susijusio su užklausa",
76
+ options=["mmr", "similarity"],
77
+ index=0
78
+ )
79
+
80
+ k = st.select_slider(
81
+ " ###Pasirinkite kieki dokumentų, kuriuos norėtumete naudoti generuojant atsakymą. Per daug dokumentų, kartais gali prikimšti asistentės atminti nereikalinga informacija ir dėl to gali atsirasti haliucinacijos. Parinktasis dokumentų kiekis: (4): ",
82
+ options=list(range(2, 9)),
83
+ value=3
84
+ )
85
+
86
+ full_retriever = create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type=search_type, k=k, chunk_size=9500, chunk_overlap=0)
87
+
88
+
89
+
90
+
91
+ #compression_full_retriever = ContextualCompressionRetriever(
92
+ # base_compressor=compressor, base_retriever=full_retriever
93
+ # )
94
+
95
+
96
+ # Tabs for Chat and Document Search
97
+ tab1, tab2 = st.tabs(["Teisės Asistentė", "Informacijos Paieška Dokumentuose"])
98
+
99
+ if "active_tab" not in st.session_state:
100
+ st.session_state["active_tab"] = "Teisės Asistentė "
101
+
102
+ with tab1:
103
+
104
+ st.markdown("""
105
+ ###### Prisiminkite: Aš esu virtuali asistentė, ne profesionali teisininkė. Naudokite savo kritinį mąstymą priimdami sprendimus arba pasitarkite su teisininku.
106
+ """)
107
+
108
+ if "messages" not in st.session_state:
109
+ st.session_state["messages"] = [
110
+ {"role": "assistant", "content": "Labas, aš virtuali teisės asistentė Birutė. Kuo galėčiau jums padėti?"}
111
+ ]
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+ custom_graph = create_workflow(full_retriever)
121
+
122
+ user_question = st.text_input("Klauskite klausimus susijusius su civiline teise.:")
123
+ klausti_btn = st.button("Klausti")
124
+ if klausti_btn:
125
+ with st.spinner("Galvojama"):
126
+ try:
127
+ handle_userinput(user_question, custom_graph)
128
+ except Exception as e:
129
+ st.write(f"Error: {e}")
130
+
131
+ with tab2:
132
+
133
+ st.session_state["active_tab"] = "Informacijos Paieška Dokumentuose"
134
+
135
+ if st.session_state["active_tab"] == "Informacijos Paieška Dokumentuose":
136
+
137
+
138
+
139
+
140
+ user_topic = st.text_input("Paieška dokumentuose pagal tekstinį atitikimą:")
141
+ ieskoti_btn = st.button("Ieškoti informacijos")
142
+ if ieskoti_btn:
143
+
144
+ with st.sidebar:
145
+ try:
146
+ relevant_docs = full_retriever.get_relevant_documents(user_topic)
147
+ pretty_output_parts = []
148
+ for doc in relevant_docs:
149
+ pretty_output_parts.append("\n\n**Documentų Meta duomenys**:\n")
150
+ metadata_str = "Dokumento Pavadinimas: " + doc.metadata.get('original_doc_name', 'unknown')
151
+ pretty_output_parts.append(metadata_str)
152
+ pretty_output_parts.append("\n\n**Dokumentu skirsniai**:\n")
153
+ content = doc.page_content.replace('\\n\\n\\n\\n', '\n\n').replace('\\n\\n', '\n\n')
154
+ pretty_output_parts.append(content)
155
+
156
+ pretty_output = "\n\n\n".join(pretty_output_parts)
157
+ st.markdown(pretty_output)
158
+ except Exception as e:
159
+ st.write(f"Error: {e}")
160
+
161
+ if __name__ == "__main__":
162
+ main()