import streamlit as st import os from streamlit_chat import message import tempfile #from langchain_community.documentloader.csv_loader import CSVLoader from langchain_community.document_loaders.csv_loader import CSVLoader from langchain_community.embeddings import HuggingFaceEmbeddings #from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS #from langchain_community.llms import CTransformers from langchain_community.llms.ctransformers import CTransformers from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain #from langchain.chains.conversational_retrieval.base import ConversationalRetreievalChain DB_FAISS_PATH = 'vectorstore/db_faiss' TEMP_DIR = 'temp' if not os.path.exists(TEMP_DIR): os.makedirs(TEMP_DIR) def load_llm(): # load model from hugging face repo llm = CTransformers( model = 'TheBloke/Llama-2-7B-Chat-GGML', model_type = 'llama', max_new_token = 512, temperature = 0.5 ) return llm st.title("Chat with CSV using Llma 2") st.markdown("

Chat with your PDF 📄

", unsafe_allow_html=True) st.markdown("

Built by MindSparks ❤️

", unsafe_allow_html=True) uploaded_file = st.sidebar.file_uploader('Upload your data', type=['csv']) if uploaded_file: # with tempfile.NamedTemporaryFile(delete=False)as temp_file : # temp_file.write(uploaded_file.getvalue()) # tempfile_path = temp_file.name file_path = os.path.join(TEMP_DIR, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.getvalue()) st.write(f"Uploaded file: {uploaded_file.name}") st.write("Processing CSV file...") loader = CSVLoader(file_path = file_path, encoding = 'utf-8', csv_args = {'delimiter': ','} ) data = loader.load() #st.json(data) text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500 , chunk_overlap = 20) text_chunks = text_splitter.split_documents(data) st.write (f"Total text chunks : {len(text_chunks)}") embeddings = HuggingFaceEmbeddings( model_name = 'sentence-transformers/all-MiniLM-L6-v2', # model_kwargs = {'device': 'cpu'} ) db = FAISS.from_documents(text_chunks, embeddings) db.save_local (DB_FAISS_PATH) llm = load_llm() chain = ConversationalRetrievalChain.from_llm(llm= llm , retriever = db.as_retriever()) def conversational_chat(query): result = chain({"question": query , "chat_history": st.session_state['history']}) st.session_state['history'].append((query , result['answer'])) return result['answer'] if 'history' not in st.session_state : st.session_state['history'] = [] if 'generated' not in st.session_state : st.session_state['generated'] = ['Hello, Ask me anything about ' + uploaded_file.name] if 'past' not in st.session_state : st.session_state['past'] = ['Hey !'] # Container for the chat history response_container = st.container() container = st.container() with container : with st.form(key = 'my_form', clear_on_submit=True): user_input = st.text_input('Query:', placeholder= "Talk to youur CSV Data here ") submit_button = st.form_submit_button(label = 'chat') if submit_button and user_input : output = conversational_chat(user_input) st.session_state['past'].append(user_input) st.session_state['generated'].append(output) if st.session_state['generated'] : with response_container: for i in range(len(st.session_state['generated'])): message(st.session_state['past'][i], is_user = True , key=str(i) + '_user', avatar_style='big-smile') message(st.session_state['generated'][i], key = str(i), avatar_style='thumb')