Spaces:
Sleeping
Sleeping
File size: 6,429 Bytes
6acaa77 |
|
import os
import streamlit as st
import asyncio
from langchain_community.utilities import SQLDatabase
from typing_extensions import TypedDict, List
from IPython.display import Image, display
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.schema import Document
from langgraph.graph import START, END, StateGraph
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from sqlalchemy import create_engine
import uuid
from langchain_groq import ChatGroq
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_chroma import Chroma
from langchain_community.document_loaders import NewsURLLoader
from langchain_community.retrievers.wikipedia import WikipediaRetriever
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.schema import Document
from langgraph.graph import START, END, StateGraph
from langchain_community.document_loaders.directory import DirectoryLoader
from functions import *
import pprint
from pathlib import Path
from sqlalchemy import create_engine
from langchain.retrievers import ContextualCompressionRetriever
from langchain.storage import InMemoryByteStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
import re
from functions import *
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.retrievers.document_compressors import FlashrankRerank
os.environ["LANGCHAIN_API_KEY"] = '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'
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]= "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "Lithuanian_law_v2_LT_Kalba"
os.environ["GROQ_API_KEY"] = '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'
os.environ["SERPER_API_KEY"] = '6f80701ecd004c2466e8bd7bcebacacf89c74b84'
def main():
st.set_page_config(page_title="Birutė, Teisės Asistentė: ",
page_icon=":books:")
st.header("Birutė, Lietuviškos Civilinės teisės asistentė :" ":books:")
db = None
custom_graph = None
search_type = st.selectbox(
" ##### Pasirinkite paieškos būdą. Pasirinkimai yra: [Max marginal relevance search (MMR), Similarity search (similarity). Default value (similarity)], MMR gražina dokumentus iš platesnio spectro infromacijos, similarity is mažesnio, konkrečiau susijusio su užklausa",
options=["mmr", "similarity"],
index=0
)
k = st.select_slider(
" ###Pasirinkite kieki dokumentų, kuriuos norėtumete naudoti generuojant atsakymą. Per daug dokumentų, kartais gali prikimšti asistentės atminti nereikalinga informacija ir dėl to gali atsirasti haliucinacijos. Parinktasis dokumentų kiekis: (4): ",
options=list(range(2, 9)),
value=3
)
full_retriever = create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type=search_type, k=k, chunk_size=9500, chunk_overlap=0)
#compression_full_retriever = ContextualCompressionRetriever(
# base_compressor=compressor, base_retriever=full_retriever
# )
# Tabs for Chat and Document Search
tab1, tab2 = st.tabs(["Teisės Asistentė", "Informacijos Paieška Dokumentuose"])
if "active_tab" not in st.session_state:
st.session_state["active_tab"] = "Teisės Asistentė "
with tab1:
st.markdown("""
###### Prisiminkite: Aš esu virtuali asistentė, ne profesionali teisininkė. Naudokite savo kritinį mąstymą priimdami sprendimus arba pasitarkite su teisininku.
""")
if "messages" not in st.session_state:
st.session_state["messages"] = [
{"role": "assistant", "content": "Labas, aš virtuali teisės asistentė Birutė. Kuo galėčiau jums padėti?"}
]
custom_graph = create_workflow(full_retriever)
user_question = st.text_input("Klauskite klausimus susijusius su civiline teise.:")
klausti_btn = st.button("Klausti")
if klausti_btn:
with st.spinner("Galvojama"):
try:
handle_userinput(user_question, custom_graph)
except Exception as e:
st.write(f"Error: {e}")
with tab2:
st.session_state["active_tab"] = "Informacijos Paieška Dokumentuose"
if st.session_state["active_tab"] == "Informacijos Paieška Dokumentuose":
user_topic = st.text_input("Paieška dokumentuose pagal tekstinį atitikimą:")
ieskoti_btn = st.button("Ieškoti informacijos")
if ieskoti_btn:
with st.sidebar:
try:
relevant_docs = full_retriever.get_relevant_documents(user_topic)
pretty_output_parts = []
for doc in relevant_docs:
pretty_output_parts.append("\n\n**Documentų Meta duomenys**:\n")
metadata_str = "Dokumento Pavadinimas: " + doc.metadata.get('original_doc_name', 'unknown')
pretty_output_parts.append(metadata_str)
pretty_output_parts.append("\n\n**Dokumentu skirsniai**:\n")
content = doc.page_content.replace('\\n\\n\\n\\n', '\n\n').replace('\\n\\n', '\n\n')
pretty_output_parts.append(content)
pretty_output = "\n\n\n".join(pretty_output_parts)
st.markdown(pretty_output)
except Exception as e:
st.write(f"Error: {e}")
if __name__ == "__main__":
main() |