Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import asyncio | |
from langchain_community.utilities import SQLDatabase | |
from typing_extensions import TypedDict, List | |
from IPython.display import Image, display | |
from langchain_core.pydantic_v1 import BaseModel, Field | |
from langchain.schema import Document | |
from langgraph.graph import START, END, StateGraph | |
from langchain.prompts import PromptTemplate, ChatPromptTemplate | |
from sqlalchemy import create_engine | |
import uuid | |
from langchain_groq import ChatGroq | |
from langchain_community.utilities import GoogleSerperAPIWrapper | |
from langchain_chroma import Chroma | |
from langchain_community.document_loaders import NewsURLLoader | |
from langchain_community.retrievers.wikipedia import WikipediaRetriever | |
from langchain.vectorstores import Chroma | |
from langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain_core.output_parsers import JsonOutputParser | |
from langchain_community.vectorstores.utils import filter_complex_metadata | |
from langchain.schema import Document | |
from langgraph.graph import START, END, StateGraph | |
from langchain_community.document_loaders.directory import DirectoryLoader | |
from functions import * | |
import pprint | |
from pathlib import Path | |
from sqlalchemy import create_engine | |
from langchain.retrievers import ContextualCompressionRetriever | |
from langchain.storage import InMemoryByteStore | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.retrievers.document_compressors import CrossEncoderReranker | |
from langchain_community.cross_encoders import HuggingFaceCrossEncoder | |
import re | |
from functions import * | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.retrievers.document_compressors import FlashrankRerank | |
os.environ["LANGCHAIN_API_KEY"] = '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' | |
os.environ["LANGCHAIN_TRACING_V2"]="true" | |
os.environ["LANGCHAIN_ENDPOINT"]= "https://api.smith.langchain.com" | |
os.environ["LANGCHAIN_PROJECT"] = "Lithuanian_law_v2_LT_Kalba" | |
os.environ["GROQ_API_KEY"] = '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' | |
os.environ["SERPER_API_KEY"] = '6f80701ecd004c2466e8bd7bcebacacf89c74b84' | |
def main(): | |
st.set_page_config(page_title="Birutė, Teisės Asistentė: ", | |
page_icon=":books:") | |
st.header("Birutė, Lietuviškos Civilinės teisės asistentė :" ":books:") | |
db = None | |
custom_graph = None | |
search_type = st.selectbox( | |
" ##### Pasirinkite paieškos būdą. Pasirinkimai yra: [Max marginal relevance search (MMR), Similarity search (similarity). Default value (similarity)], MMR gražina dokumentus iš platesnio spectro infromacijos, similarity is mažesnio, konkrečiau susijusio su užklausa", | |
options=["mmr", "similarity"], | |
index=0 | |
) | |
k = st.select_slider( | |
" ###Pasirinkite kieki dokumentų, kuriuos norėtumete naudoti generuojant atsakymą. Per daug dokumentų, kartais gali prikimšti asistentės atminti nereikalinga informacija ir dėl to gali atsirasti haliucinacijos. Parinktasis dokumentų kiekis: (4): ", | |
options=list(range(2, 9)), | |
value=3 | |
) | |
full_retriever = create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type=search_type, k=k, chunk_size=9500, chunk_overlap=0) | |
#compression_full_retriever = ContextualCompressionRetriever( | |
# base_compressor=compressor, base_retriever=full_retriever | |
# ) | |
# Tabs for Chat and Document Search | |
tab1, tab2 = st.tabs(["Teisės Asistentė", "Informacijos Paieška Dokumentuose"]) | |
if "active_tab" not in st.session_state: | |
st.session_state["active_tab"] = "Teisės Asistentė " | |
with tab1: | |
st.markdown(""" | |
###### Prisiminkite: Aš esu virtuali asistentė, ne profesionali teisininkė. Naudokite savo kritinį mąstymą priimdami sprendimus arba pasitarkite su teisininku. | |
""") | |
if "messages" not in st.session_state: | |
st.session_state["messages"] = [ | |
{"role": "assistant", "content": "Labas, aš virtuali teisės asistentė Birutė. Kuo galėčiau jums padėti?"} | |
] | |
custom_graph = create_workflow(full_retriever) | |
user_question = st.text_input("Klauskite klausimus susijusius su civiline teise.:") | |
klausti_btn = st.button("Klausti") | |
if klausti_btn: | |
with st.spinner("Galvojama"): | |
try: | |
handle_userinput(user_question, custom_graph) | |
except Exception as e: | |
st.write(f"Error: {e}") | |
with tab2: | |
st.session_state["active_tab"] = "Informacijos Paieška Dokumentuose" | |
if st.session_state["active_tab"] == "Informacijos Paieška Dokumentuose": | |
user_topic = st.text_input("Paieška dokumentuose pagal tekstinį atitikimą:") | |
ieskoti_btn = st.button("Ieškoti informacijos") | |
if ieskoti_btn: | |
with st.sidebar: | |
try: | |
relevant_docs = full_retriever.get_relevant_documents(user_topic) | |
pretty_output_parts = [] | |
for doc in relevant_docs: | |
pretty_output_parts.append("\n\n**Documentų Meta duomenys**:\n") | |
metadata_str = "Dokumento Pavadinimas: " + doc.metadata.get('original_doc_name', 'unknown') | |
pretty_output_parts.append(metadata_str) | |
pretty_output_parts.append("\n\n**Dokumentu skirsniai**:\n") | |
content = doc.page_content.replace('\\n\\n\\n\\n', '\n\n').replace('\\n\\n', '\n\n') | |
pretty_output_parts.append(content) | |
pretty_output = "\n\n\n".join(pretty_output_parts) | |
st.markdown(pretty_output) | |
except Exception as e: | |
st.write(f"Error: {e}") | |
if __name__ == "__main__": | |
main() |