ishaan-mital commited on
Commit
9bd7561
1 Parent(s): 0976241

initial commit

Browse files
Files changed (2) hide show
  1. app.py +47 -47
  2. requirements.txt +5 -5
app.py CHANGED
@@ -1,53 +1,53 @@
1
  from gradio_client import Client
2
  import gradio as gr
3
  import requests
4
- # from langchain.chains import RetrievalQA
5
- # import pinecone
6
- # from langchain.vectorstores import Pinecone
7
  import os
8
- # from langchain.embeddings.huggingface import HuggingFaceEmbeddings
9
- # import time
10
 
11
  API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
12
  headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
13
- retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
14
 
15
- # embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
16
- # # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
17
 
18
- # embed_model = HuggingFaceEmbeddings(
19
- # model_name=embed_model_id,
20
- # # model_kwargs={'device': device},
21
- # # encode_kwargs={'device': device, 'batch_size': 32}
22
- # )
23
 
24
 
25
- # pinecone.init(
26
- # api_key=os.environ.get('PINECONE_API_KEY'),
27
- # environment=os.environ.get('PINECONE_ENVIRONMENT')
28
- # )
29
 
30
- # index_name = 'llama-rag'
31
- # index = pinecone.Index(index_name)
32
- # text_field = 'text' # field in metadata that contains text content
33
- # docs = [
34
- # "this is one document",
35
- # "and another document"
36
- # ]
37
 
38
- # embeddings = embed_model.embed_documents(docs)
39
- # if index_name not in pinecone.list_indexes():
40
- # pinecone.create_index(
41
- # index_name,
42
- # dimension=len(embeddings[0]),
43
- # metric='cosine'
44
- # )
45
- # # wait for index to finish initialization
46
- # while not pinecone.describe_index(index_name).status['ready']:
47
- # time.sleep(1)
48
- # vectorstore = Pinecone(
49
- # index, embed_model.embed_query, text_field
50
- # )
51
 
52
  def call_llm_api(input_text,context):
53
  payload = {
@@ -56,19 +56,19 @@ def call_llm_api(input_text,context):
56
  response = requests.post(API_URL, headers=headers, json=payload)
57
  return response.json() # Adjust as needed based on your API response format
58
 
59
- # rag_pipeline = RetrievalQA.from_chain_type(
60
- # llm=call_llm_api, chain_type='stuff',
61
- # retriever=vectorstore.as_retriever()
62
- # )
63
 
64
 
65
  def main(question):
66
- # return rag_pipeline(question)
67
- global chatbot
68
- context = retrieval.predict(question, api_name = "/predict")
69
- answer=call_llm_api(question,context)
70
- # chatbot = answer[1]
71
- return answer[0]
72
 
73
  demo = gr.Interface(main, inputs = "text", outputs = "text")
74
 
 
1
  from gradio_client import Client
2
  import gradio as gr
3
  import requests
4
+ from langchain.chains import RetrievalQA
5
+ import pinecone
6
+ from langchain.vectorstores import Pinecone
7
  import os
8
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
9
+ import time
10
 
11
  API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
12
  headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
13
+ # retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
14
 
15
+ embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
16
+ # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
17
 
18
+ embed_model = HuggingFaceEmbeddings(
19
+ model_name=embed_model_id,
20
+ # model_kwargs={'device': device},
21
+ # encode_kwargs={'device': device, 'batch_size': 32}
22
+ )
23
 
24
 
25
+ pinecone.init(
26
+ api_key=os.environ.get('PINECONE_API_KEY'),
27
+ environment=os.environ.get('PINECONE_ENVIRONMENT')
28
+ )
29
 
30
+ index_name = 'llama-rag'
31
+ index = pinecone.Index(index_name)
32
+ text_field = 'text' # field in metadata that contains text content
33
+ docs = [
34
+ "this is one document",
35
+ "and another document"
36
+ ]
37
 
38
+ embeddings = embed_model.embed_documents(docs)
39
+ if index_name not in pinecone.list_indexes():
40
+ pinecone.create_index(
41
+ index_name,
42
+ dimension=len(embeddings[0]),
43
+ metric='cosine'
44
+ )
45
+ # wait for index to finish initialization
46
+ while not pinecone.describe_index(index_name).status['ready']:
47
+ time.sleep(1)
48
+ vectorstore = Pinecone(
49
+ index, embed_model.embed_query, text_field
50
+ )
51
 
52
  def call_llm_api(input_text,context):
53
  payload = {
 
56
  response = requests.post(API_URL, headers=headers, json=payload)
57
  return response.json() # Adjust as needed based on your API response format
58
 
59
+ rag_pipeline = RetrievalQA.from_chain_type(
60
+ llm=call_llm_api, chain_type='stuff',
61
+ retriever=vectorstore.as_retriever()
62
+ )
63
 
64
 
65
  def main(question):
66
+ return rag_pipeline(question)
67
+ # global chatbot
68
+ # context = retrieval.predict(question, api_name = "/predict")
69
+ # answer=call_llm_api(question,context)
70
+ # # chatbot = answer[1]
71
+ # return answer[0]
72
 
73
  demo = gr.Interface(main, inputs = "text", outputs = "text")
74
 
requirements.txt CHANGED
@@ -2,8 +2,8 @@ hugchat
2
  gradio
3
  gradio_client
4
  gtts
5
- # pydantic==1.10.9
6
- # langchain
7
- # pinecone-client==2.2.2
8
- # faiss-cpu
9
- # sentence_transformers
 
2
  gradio
3
  gradio_client
4
  gtts
5
+ pydantic==1.10.9
6
+ langchain
7
+ pinecone-client==2.2.2
8
+ faiss-cpu
9
+ sentence_transformers