emar commited on
Commit
5dd7727
1 Parent(s): d6fc481

test zero 2

Browse files
app.py CHANGED
@@ -1,32 +1,43 @@
1
  import os
2
  import gradio as gr
3
- import streamlit as st
4
  from llama_index.core import (
5
  StorageContext,
6
- load_index_from_storage,
7
  )
8
-
 
9
  PERSIST_DIR = './storage'
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # Load the existing index
 
13
  def load_context():
14
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
15
  index = load_index_from_storage(storage_context)
16
  query_engine = index.as_query_engine()
17
  return query_engine
18
 
19
- query_engine = None
20
-
21
- # Function to initialize the query engine with the API key
22
- def initialize_query_engine(api_key):
23
- os.environ["OPENAI_API_KEY"] = api_key
24
- global query_engine
25
- query_engine = load_context()
26
 
 
27
  def chatbot_response(message, history):
28
- if query_engine is None:
29
- return "Query engine is not initialized. Please enter your OpenAI API key."
30
  response = query_engine.query(message)
31
  return str(response)
32
 
@@ -38,17 +49,5 @@ iface = gr.ChatInterface(
38
  cache_examples=True,
39
  )
40
 
41
- with gr.Blocks() as app:
42
- api_key = gr.Textbox(label="Enter your OpenAI API key here:", type="password")
43
- initialize_button = gr.Button("Initialize Query Engine")
44
-
45
- def on_initialize_button(api_key):
46
- initialize_query_engine(api_key)
47
- return "Query engine initialized successfully."
48
-
49
- initialize_button.click(on_initialize_button, inputs=[api_key], outputs=[])
50
-
51
- iface.render()
52
-
53
  if __name__ == "__main__":
54
- app.launch()
 
1
  import os
2
  import gradio as gr
3
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
4
  from llama_index.core import (
5
  StorageContext,
6
+ load_index_from_storage, Settings,
7
  )
8
+ from llama_index.llms.huggingface import HuggingFaceLLM
9
+ import torch
10
  PERSIST_DIR = './storage'
11
 
12
+ # Configure the settings
13
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+
15
+ @spaces.GPU
16
+ def setup():
17
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
18
+
19
+ Settings.llm = HuggingFaceLLM(
20
+ model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
21
+ tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
22
+ context_window=2048,
23
+ max_new_tokens=256,
24
+ generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
25
+ device_map="auto",
26
+ )
27
+
28
 
29
+ # load the existing index
30
+ @spaces.GPU
31
  def load_context():
32
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
33
  index = load_index_from_storage(storage_context)
34
  query_engine = index.as_query_engine()
35
  return query_engine
36
 
37
+ query_engine = load_context()
 
 
 
 
 
 
38
 
39
+ @spaces.GPU
40
  def chatbot_response(message, history):
 
 
41
  response = query_engine.query(message)
42
  return str(response)
43
 
 
49
  cache_examples=True,
50
  )
51
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  if __name__ == "__main__":
53
+ iface.launch()
requirements.txt CHANGED
@@ -1,3 +1,14 @@
 
 
 
1
  llama_index
 
 
 
 
 
2
  gradio
 
 
 
3
  streamlit
 
1
+ // python-dotenv~=1.0.1
2
+ torch~=2.3.1
3
+ transformers~=4.41.2
4
  llama_index
5
+ llama-index-embeddings-huggingface
6
+ llama_index-llms-huggingface
7
+ // openai~=1.35.3
8
+ // requests~=2.32.3
9
+ // protobuf==3.20.1
10
  gradio
11
+ // bitsandbytes
12
+ // huggingface_hub
13
+ // deepspeed
14
  streamlit
storage/default__vector_store.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d8bb1f3318dca17bf9088901fe8969c3ecd5dce1f297c07a07e2077b87012c7
3
- size 410370554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f21290c9ca0d98d83762e6cbf1621ac8bce3cc98f15bd254d9653c36f44c87
3
+ size 200616584
storage/docstore.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8882bc2a112d2513786bd75c3f4288687c67a678d617fc698fedf2644c3d9bcf
3
- size 58001534
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:301cdfb0a3871ee4621110ace49fd2f8947e878c42dce6e0d684495dd7468b7e
3
+ size 55884294
storage/index_store.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6148fdd4bfa9872a6cffe9b1ff2dba409022d918dbc5126a868bd46853432564
3
- size 984727
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c34a251621567e1b713e8a6a3f4063f6375d07bef723bafa883629f9b2213bfe
3
+ size 958771
storage/openai_embeddings_specific.py DELETED
@@ -1,64 +0,0 @@
1
- import json
2
- import numpy as np
3
- import openai
4
- from dotenv import load_dotenv
5
- import os
6
- from sklearn.metrics.pairwise import cosine_similarity
7
-
8
- # Load environment variables
9
- load_dotenv()
10
-
11
- # Get the OpenAI API key
12
- openai.api_key = os.getenv('OPENAI_API_KEY')
13
- if not openai.api_key:
14
- raise ValueError("OpenAI API key not found. Please set the environment variable 'OPENAI_API_KEY'.")
15
-
16
- # Load vector store
17
- with open('default__vector_store.json', 'r') as f:
18
- vector_store = json.load(f)
19
-
20
- # Load document store
21
- with open('docstore.json', 'r') as f:
22
- doc_store = json.load(f)
23
-
24
- # Extract vectors and document IDs
25
- vectors = np.array(vector_store['vectors'])
26
- doc_ids = vector_store['doc_ids']
27
-
28
- # Create a dictionary to map document IDs to their content
29
- doc_id_to_content = {doc['doc_id']: doc for doc in doc_store['documents']}
30
-
31
- # Function to create embeddings using OpenAI API
32
- def create_embedding(text):
33
- response = openai.Embedding.create(input=text, model="text-embedding-ada-002")
34
- return response['data'][0]['embedding']
35
-
36
- # Function to generate responses using GPT-3.5-turbo
37
- def generate_response(prompt):
38
- response = openai.ChatCompletion.create(
39
- model="gpt-3.5-turbo",
40
- messages=[
41
- {"role": "system", "content": "You are a helpful assistant."},
42
- {"role": "user", "content": prompt}
43
- ]
44
- )
45
- return response.choices[0].message['content']
46
-
47
- # Create an embedding for the query
48
- query = "Who is Zaraphus?"
49
- query_embedding = create_embedding(query)
50
-
51
- # Compute cosine similarity between the query embedding and all stored vectors
52
- similarities = cosine_similarity([query_embedding], vectors)
53
-
54
- # Find the index of the most similar document
55
- most_similar_idx = np.argmax(similarities)
56
-
57
- # Retrieve the most similar document's content
58
- most_similar_doc_id = doc_ids[most_similar_idx]
59
- most_similar_doc_content = doc_id_to_content[most_similar_doc_id]['text']
60
-
61
- # Generate a response based on the most similar document
62
- prompt = f"Based on the following document, answer the question: {query}\n\nDocument:\n{most_similar_doc_content}"
63
- response = generate_response(prompt)
64
- print(response)