Spaces:
Sleeping
Sleeping
File size: 6,429 Bytes
6acaa77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import os
import streamlit as st
import asyncio
from langchain_community.utilities import SQLDatabase
from typing_extensions import TypedDict, List
from IPython.display import Image, display
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.schema import Document
from langgraph.graph import START, END, StateGraph
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from sqlalchemy import create_engine
import uuid
from langchain_groq import ChatGroq
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_chroma import Chroma
from langchain_community.document_loaders import NewsURLLoader
from langchain_community.retrievers.wikipedia import WikipediaRetriever
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.schema import Document
from langgraph.graph import START, END, StateGraph
from langchain_community.document_loaders.directory import DirectoryLoader
from functions import *
import pprint
from pathlib import Path
from sqlalchemy import create_engine
from langchain.retrievers import ContextualCompressionRetriever
from langchain.storage import InMemoryByteStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
import re
from functions import *
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.retrievers.document_compressors import FlashrankRerank
os.environ["LANGCHAIN_API_KEY"] = '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'
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]= "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "Lithuanian_law_v2_LT_Kalba"
os.environ["GROQ_API_KEY"] = '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'
os.environ["SERPER_API_KEY"] = '6f80701ecd004c2466e8bd7bcebacacf89c74b84'
def main():
st.set_page_config(page_title="Birutė, Teisės Asistentė: ",
page_icon=":books:")
st.header("Birutė, Lietuviškos Civilinės teisės asistentė :" ":books:")
db = None
custom_graph = None
search_type = st.selectbox(
" ##### Pasirinkite paieškos būdą. Pasirinkimai yra: [Max marginal relevance search (MMR), Similarity search (similarity). Default value (similarity)], MMR gražina dokumentus iš platesnio spectro infromacijos, similarity is mažesnio, konkrečiau susijusio su užklausa",
options=["mmr", "similarity"],
index=0
)
k = st.select_slider(
" ###Pasirinkite kieki dokumentų, kuriuos norėtumete naudoti generuojant atsakymą. Per daug dokumentų, kartais gali prikimšti asistentės atminti nereikalinga informacija ir dėl to gali atsirasti haliucinacijos. Parinktasis dokumentų kiekis: (4): ",
options=list(range(2, 9)),
value=3
)
full_retriever = create_retriever_from_chroma(vectorstore_path="docs/chroma/", search_type=search_type, k=k, chunk_size=9500, chunk_overlap=0)
#compression_full_retriever = ContextualCompressionRetriever(
# base_compressor=compressor, base_retriever=full_retriever
# )
# Tabs for Chat and Document Search
tab1, tab2 = st.tabs(["Teisės Asistentė", "Informacijos Paieška Dokumentuose"])
if "active_tab" not in st.session_state:
st.session_state["active_tab"] = "Teisės Asistentė "
with tab1:
st.markdown("""
###### Prisiminkite: Aš esu virtuali asistentė, ne profesionali teisininkė. Naudokite savo kritinį mąstymą priimdami sprendimus arba pasitarkite su teisininku.
""")
if "messages" not in st.session_state:
st.session_state["messages"] = [
{"role": "assistant", "content": "Labas, aš virtuali teisės asistentė Birutė. Kuo galėčiau jums padėti?"}
]
custom_graph = create_workflow(full_retriever)
user_question = st.text_input("Klauskite klausimus susijusius su civiline teise.:")
klausti_btn = st.button("Klausti")
if klausti_btn:
with st.spinner("Galvojama"):
try:
handle_userinput(user_question, custom_graph)
except Exception as e:
st.write(f"Error: {e}")
with tab2:
st.session_state["active_tab"] = "Informacijos Paieška Dokumentuose"
if st.session_state["active_tab"] == "Informacijos Paieška Dokumentuose":
user_topic = st.text_input("Paieška dokumentuose pagal tekstinį atitikimą:")
ieskoti_btn = st.button("Ieškoti informacijos")
if ieskoti_btn:
with st.sidebar:
try:
relevant_docs = full_retriever.get_relevant_documents(user_topic)
pretty_output_parts = []
for doc in relevant_docs:
pretty_output_parts.append("\n\n**Documentų Meta duomenys**:\n")
metadata_str = "Dokumento Pavadinimas: " + doc.metadata.get('original_doc_name', 'unknown')
pretty_output_parts.append(metadata_str)
pretty_output_parts.append("\n\n**Dokumentu skirsniai**:\n")
content = doc.page_content.replace('\\n\\n\\n\\n', '\n\n').replace('\\n\\n', '\n\n')
pretty_output_parts.append(content)
pretty_output = "\n\n\n".join(pretty_output_parts)
st.markdown(pretty_output)
except Exception as e:
st.write(f"Error: {e}")
if __name__ == "__main__":
main() |