import os import streamlit as st import asyncio from langchain_community.utilities import SQLDatabase from typing_extensions import TypedDict, List from IPython.display import Image, display from langchain_core.pydantic_v1 import BaseModel, Field from langchain.schema import Document from langgraph.graph import START, END, StateGraph from langchain.prompts import PromptTemplate, ChatPromptTemplate from sqlalchemy import create_engine import uuid from langchain_groq import ChatGroq from langchain_community.utilities import GoogleSerperAPIWrapper from langchain_chroma import Chroma from langchain_community.document_loaders import NewsURLLoader from langchain_community.retrievers.wikipedia import WikipediaRetriever from langchain.vectorstores import Chroma from langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import WebBaseLoader from langchain_core.output_parsers import JsonOutputParser from langchain_community.vectorstores.utils import filter_complex_metadata from langchain.schema import Document from langgraph.graph import START, END, StateGraph from langchain_community.document_loaders.directory import DirectoryLoader from functions import * import pprint from pathlib import Path from sqlalchemy import create_engine from langchain.retrievers import ContextualCompressionRetriever from langchain.storage import InMemoryByteStore from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.retrievers.document_compressors import CrossEncoderReranker from langchain_community.cross_encoders import HuggingFaceCrossEncoder import re from functions import * from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.retrievers.document_compressors import FlashrankRerank os.environ["LANGCHAIN_API_KEY"] = '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' os.environ["LANGCHAIN_TRACING_V2"]="true" os.environ["LANGCHAIN_ENDPOINT"]= "https://api.smith.langchain.com" os.environ["LANGCHAIN_PROJECT"] = "Lithuanian_law_v2_LT_Kalba" os.environ["GROQ_API_KEY"] = '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' os.environ["SERPER_API_KEY"] = '6f80701ecd004c2466e8bd7bcebacacf89c74b84' def main(): st.set_page_config(page_title="Birutė, Teisės Asistentė: ", page_icon=":books:") st.header("Birutė, Lietuviškos Civilinės teisės asistentė :" ":books:") db = None custom_graph = None search_type = st.selectbox( " ##### Pasirinkite paieškos būdą. Pasirinkimai yra: [Max marginal relevance search (MMR), Similarity search (similarity). Default value (similarity)], MMR gražina dokumentus iš platesnio spectro infromacijos, similarity is mažesnio, konkrečiau susijusio su užklausa", options=["mmr", "similarity"], index=0 ) k = st.select_slider( " ###Pasirinkite kieki dokumentų, kuriuos norėtumete naudoti generuojant atsakymą. Per daug dokumentų, kartais gali prikimšti asistentės atminti nereikalinga informacija ir dėl to gali atsirasti haliucinacijos. Parinktasis dokumentų kiekis: (4): ", options=list(range(2, 9)), value=3 ) full_retriever = create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type=search_type, k=k, chunk_size=9500, chunk_overlap=0) #compression_full_retriever = ContextualCompressionRetriever( # base_compressor=compressor, base_retriever=full_retriever # ) # Tabs for Chat and Document Search tab1, tab2 = st.tabs(["Teisės Asistentė", "Informacijos Paieška Dokumentuose"]) if "active_tab" not in st.session_state: st.session_state["active_tab"] = "Teisės Asistentė " with tab1: st.markdown(""" ###### Prisiminkite: Aš esu virtuali asistentė, ne profesionali teisininkė. Naudokite savo kritinį mąstymą priimdami sprendimus arba pasitarkite su teisininku. """) if "messages" not in st.session_state: st.session_state["messages"] = [ {"role": "assistant", "content": "Labas, aš virtuali teisės asistentė Birutė. Kuo galėčiau jums padėti?"} ] custom_graph = create_workflow(full_retriever) user_question = st.text_input("Klauskite klausimus susijusius su civiline teise.:") klausti_btn = st.button("Klausti") if klausti_btn: with st.spinner("Galvojama"): try: handle_userinput(user_question, custom_graph) except Exception as e: st.write(f"Error: {e}") with tab2: st.session_state["active_tab"] = "Informacijos Paieška Dokumentuose" if st.session_state["active_tab"] == "Informacijos Paieška Dokumentuose": user_topic = st.text_input("Paieška dokumentuose pagal tekstinį atitikimą:") ieskoti_btn = st.button("Ieškoti informacijos") if ieskoti_btn: with st.sidebar: try: relevant_docs = full_retriever.get_relevant_documents(user_topic) pretty_output_parts = [] for doc in relevant_docs: pretty_output_parts.append("\n\n**Documentų Meta duomenys**:\n") metadata_str = "Dokumento Pavadinimas: " + doc.metadata.get('original_doc_name', 'unknown') pretty_output_parts.append(metadata_str) pretty_output_parts.append("\n\n**Dokumentu skirsniai**:\n") content = doc.page_content.replace('\\n\\n\\n\\n', '\n\n').replace('\\n\\n', '\n\n') pretty_output_parts.append(content) pretty_output = "\n\n\n".join(pretty_output_parts) st.markdown(pretty_output) except Exception as e: st.write(f"Error: {e}") if __name__ == "__main__": main()