Nithi123 commited on
Commit
2caa545
1 Parent(s): 248b69c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +73 -0
  2. requirements.txt +26 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain_groq import ChatGroq
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain.chains import create_retrieval_chain
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
11
+ from dotenv import load_dotenv
12
+ import time
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+ # Retrieve the API keys from environment variables
18
+ huggingfacehub_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
19
+ groq_api_key = os.getenv("GROQ_API_KEY")
20
+
21
+ # Check if the keys are retrieved correctly
22
+ if huggingfacehub_api_token is None:
23
+ raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
24
+ if groq_api_key is None:
25
+ raise ValueError("GROQ_API_KEY environment variable is not set")
26
+
27
+ # Set environment variables for Hugging Face
28
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingfacehub_api_token
29
+
30
+ # Initialize the ChatGroq LLM with the retrieved API key
31
+ llm = ChatGroq(api_key=groq_api_key, model_name="Llama3-8b-8192")
32
+
33
+ st.title("DataScience Chatgroq With Llama3")
34
+
35
+ prompt = ChatPromptTemplate.from_template(
36
+ """
37
+ Answer the questions based on the provided context only.
38
+ Please provide the most accurate response based on the question.
39
+ <context>
40
+ {context}
41
+ <context>
42
+ Questions: {input}
43
+ """
44
+ )
45
+
46
+ def vector_embedding():
47
+ if "vectors" not in st.session_state:
48
+ st.session_state.embeddings = HuggingFaceEmbeddings()
49
+ st.session_state.loader = PyPDFDirectoryLoader("./Data_Science") # Data Ingestion
50
+ st.session_state.docs = st.session_state.loader.load() # Document Loading
51
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Chunk Creation
52
+ st.session_state.final_documents = st.session_state.text_splitter.split_documents(st.session_state.docs[:20]) # Splitting
53
+ st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings) # Vector HuggingFace embeddings
54
+
55
+ prompt1 = st.text_input("Enter Your Question From Documents")
56
+
57
+ if st.button("Documents Embedding"):
58
+ vector_embedding()
59
+ st.write("Vector Store DB Is Ready")
60
+
61
+ if prompt1:
62
+ document_chain = create_stuff_documents_chain(llm, prompt)
63
+ retriever = st.session_state.vectors.as_retriever()
64
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
65
+ start = time.process_time()
66
+ response = retrieval_chain.invoke({'input': prompt1})
67
+ st.write("Response time: ", time.process_time() - start)
68
+ st.write(response['answer'])
69
+
70
+ with st.expander("Document Similarity Search"):
71
+ for i, doc in enumerate(response["context"]):
72
+ st.write(doc.page_content)
73
+ st.write("--------------------------------")
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ groq==0.5.0
2
+ langchain==0.1.19
3
+ langchain-community==0.0.38
4
+ langchain-core==0.1.52
5
+ langchain-groq==0.1.3
6
+ langchain-text-splitters==0.0.1
7
+ langsmith==0.1.56
8
+ pandas==2.2.2
9
+ pillow==10.3.0
10
+ streamlit==1.34.0
11
+ langchain_openai
12
+ langchain_core
13
+ python-dotenv
14
+ langchain_community
15
+ langserve
16
+ fastapi
17
+ uvicorn
18
+ sse_starlette
19
+ pypdf
20
+ faiss-cpu
21
+ cassio
22
+ langchain-groq
23
+ langchainhub
24
+ sentence_transformers
25
+ PyPDF2
26
+ langchain-objectbox