File size: 6,429 Bytes
6acaa77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import os
import streamlit as st
import asyncio

from langchain_community.utilities import SQLDatabase
from typing_extensions import TypedDict, List
from IPython.display import Image, display
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.schema import Document
from langgraph.graph import START, END, StateGraph
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from sqlalchemy import create_engine
import uuid
from langchain_groq import ChatGroq
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_chroma import Chroma
from langchain_community.document_loaders import NewsURLLoader
from langchain_community.retrievers.wikipedia import WikipediaRetriever

from langchain.vectorstores import Chroma
from langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader 
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.schema import Document
from langgraph.graph import START, END, StateGraph
from langchain_community.document_loaders.directory import DirectoryLoader 

from functions import *
import pprint
from pathlib import Path
from sqlalchemy import create_engine
from langchain.retrievers import ContextualCompressionRetriever

from langchain.storage import InMemoryByteStore

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder 

import re
from functions import *

from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.retrievers.document_compressors import FlashrankRerank


os.environ["LANGCHAIN_API_KEY"] = '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'
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]= "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "Lithuanian_law_v2_LT_Kalba"
os.environ["GROQ_API_KEY"] = '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' 
os.environ["SERPER_API_KEY"] = '6f80701ecd004c2466e8bd7bcebacacf89c74b84'  






def main():

    st.set_page_config(page_title="Birutė, Teisės Asistentė: ",
                       page_icon=":books:")
    

    st.header("Birutė, Lietuviškos Civilinės teisės asistentė :" ":books:")

    db = None
    custom_graph = None

    search_type = st.selectbox(
            " ##### Pasirinkite paieškos būdą. Pasirinkimai yra: [Max marginal relevance search (MMR), Similarity search (similarity). Default value (similarity)], MMR gražina dokumentus iš platesnio spectro infromacijos, similarity is mažesnio, konkrečiau susijusio su užklausa", 
            options=["mmr", "similarity"], 
            index=0  
        )

    k = st.select_slider(
            " ###Pasirinkite kieki dokumentų, kuriuos norėtumete naudoti generuojant atsakymą. Per daug dokumentų, kartais gali prikimšti asistentės atminti nereikalinga informacija ir dėl to gali atsirasti haliucinacijos. Parinktasis dokumentų kiekis: (4): ", 
            options=list(range(2, 9)), 
            value=3  
        )

    full_retriever = create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type=search_type, k=k, chunk_size=9500, chunk_overlap=0)
    

    

    #compression_full_retriever = ContextualCompressionRetriever(
     #       base_compressor=compressor, base_retriever=full_retriever
      #  )
   

    # Tabs for Chat and Document Search
    tab1, tab2 = st.tabs(["Teisės Asistentė", "Informacijos Paieška Dokumentuose"])

    if "active_tab" not in st.session_state:
        st.session_state["active_tab"] = "Teisės Asistentė "

    with tab1:
        
        st.markdown("""
            ###### Prisiminkite: Aš esu virtuali asistentė, ne profesionali teisininkė. Naudokite savo kritinį mąstymą priimdami sprendimus arba pasitarkite su teisininku.
        """)
        
        if "messages" not in st.session_state:
            st.session_state["messages"] = [
            {"role": "assistant", "content": "Labas, aš virtuali teisės asistentė Birutė. Kuo galėčiau jums padėti?"}
        ]

        
        
        
       



        custom_graph = create_workflow(full_retriever)

        user_question = st.text_input("Klauskite klausimus susijusius su civiline teise.:")
        klausti_btn = st.button("Klausti")
        if klausti_btn:
            with st.spinner("Galvojama"):
                try:
                    handle_userinput(user_question, custom_graph)
                except Exception as e:
                    st.write(f"Error: {e}")

    with tab2:
        
        st.session_state["active_tab"] = "Informacijos Paieška Dokumentuose"

        if st.session_state["active_tab"] == "Informacijos Paieška Dokumentuose":

       

            
            user_topic = st.text_input("Paieška dokumentuose pagal tekstinį atitikimą:")
            ieskoti_btn = st.button("Ieškoti informacijos")
            if ieskoti_btn:
                
                with st.sidebar:
                    try:
                        relevant_docs = full_retriever.get_relevant_documents(user_topic)
                        pretty_output_parts = []
                        for doc in relevant_docs:
                            pretty_output_parts.append("\n\n**Documentų Meta duomenys**:\n")
                            metadata_str = "Dokumento Pavadinimas: " + doc.metadata.get('original_doc_name', 'unknown')
                            pretty_output_parts.append(metadata_str)
                            pretty_output_parts.append("\n\n**Dokumentu skirsniai**:\n")
                            content = doc.page_content.replace('\\n\\n\\n\\n', '\n\n').replace('\\n\\n', '\n\n')
                            pretty_output_parts.append(content)

                            pretty_output = "\n\n\n".join(pretty_output_parts)
                            st.markdown(pretty_output)
                    except Exception as e:
                        st.write(f"Error: {e}")

if __name__ == "__main__":
    main()