Spaces:

emarron
/

elderscrolls_lore_bot

Sleeping

App Files Files Community

emar commited on 17 days ago

Commit

5dd7727

•

1 Parent(s): d6fc481

test zero 2

Browse files

Files changed (6) hide show

app.py +25 -26
requirements.txt +11 -0
storage/default__vector_store.json +2 -2
storage/docstore.json +2 -2
storage/index_store.json +2 -2
storage/openai_embeddings_specific.py +0 -64

app.py CHANGED Viewed

@@ -1,32 +1,43 @@
 import os
 import gradio as gr
-import streamlit as st
 from llama_index.core import (
     StorageContext,
-    load_index_from_storage,
 )
 PERSIST_DIR = './storage'
-# Load the existing index
 def load_context():
     storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
     index = load_index_from_storage(storage_context)
     query_engine = index.as_query_engine()
     return query_engine
-query_engine = None
-# Function to initialize the query engine with the API key
-def initialize_query_engine(api_key):
-    os.environ["OPENAI_API_KEY"] = api_key
-    global query_engine
-    query_engine = load_context()
 def chatbot_response(message, history):
-    if query_engine is None:
-        return "Query engine is not initialized. Please enter your OpenAI API key."
     response = query_engine.query(message)
     return str(response)
@@ -38,17 +49,5 @@ iface = gr.ChatInterface(
     cache_examples=True,
 )
-with gr.Blocks() as app:
-    api_key = gr.Textbox(label="Enter your OpenAI API key here:", type="password")
-    initialize_button = gr.Button("Initialize Query Engine")
-    def on_initialize_button(api_key):
-        initialize_query_engine(api_key)
-        return "Query engine initialized successfully."
-    initialize_button.click(on_initialize_button, inputs=[api_key], outputs=[])
-    iface.render()
 if __name__ == "__main__":
-    app.launch()

 import os
 import gradio as gr
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import (
     StorageContext,
+    load_index_from_storage, Settings,
 )
+from llama_index.llms.huggingface import HuggingFaceLLM
+import torch
 PERSIST_DIR = './storage'
+# Configure the settings
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+@spaces.GPU
+def setup():
+    Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
+    Settings.llm = HuggingFaceLLM(
+        model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        context_window=2048,
+        max_new_tokens=256,
+        generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
+        device_map="auto",
+    )
+# load the existing index
+@spaces.GPU
 def load_context():
     storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
     index = load_index_from_storage(storage_context)
     query_engine = index.as_query_engine()
     return query_engine
+query_engine = load_context()
+@spaces.GPU
 def chatbot_response(message, history):
     response = query_engine.query(message)
     return str(response)
     cache_examples=True,
 )
 if __name__ == "__main__":
+    iface.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,14 @@
 llama_index
 gradio
 streamlit

+// python-dotenv~=1.0.1
+torch~=2.3.1
+transformers~=4.41.2
 llama_index
+llama-index-embeddings-huggingface
+llama_index-llms-huggingface
+// openai~=1.35.3
+// requests~=2.32.3
+// protobuf==3.20.1
 gradio
+// bitsandbytes
+// huggingface_hub
+// deepspeed
 streamlit

storage/default__vector_store.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d8bb1f3318dca17bf9088901fe8969c3ecd5dce1f297c07a07e2077b87012c7
-size 410370554

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7f21290c9ca0d98d83762e6cbf1621ac8bce3cc98f15bd254d9653c36f44c87
+size 200616584

storage/docstore.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8882bc2a112d2513786bd75c3f4288687c67a678d617fc698fedf2644c3d9bcf
-size 58001534

 version https://git-lfs.github.com/spec/v1
+oid sha256:301cdfb0a3871ee4621110ace49fd2f8947e878c42dce6e0d684495dd7468b7e
+size 55884294

storage/index_store.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6148fdd4bfa9872a6cffe9b1ff2dba409022d918dbc5126a868bd46853432564
-size 984727

 version https://git-lfs.github.com/spec/v1
+oid sha256:c34a251621567e1b713e8a6a3f4063f6375d07bef723bafa883629f9b2213bfe
+size 958771

storage/openai_embeddings_specific.py DELETED Viewed

@@ -1,64 +0,0 @@
-import json
-import numpy as np
-import openai
-from dotenv import load_dotenv
-import os
-from sklearn.metrics.pairwise import cosine_similarity
-# Load environment variables
-load_dotenv()
-# Get the OpenAI API key
-openai.api_key = os.getenv('OPENAI_API_KEY')
-if not openai.api_key:
-    raise ValueError("OpenAI API key not found. Please set the environment variable 'OPENAI_API_KEY'.")
-# Load vector store
-with open('default__vector_store.json', 'r') as f:
-    vector_store = json.load(f)
-# Load document store
-with open('docstore.json', 'r') as f:
-    doc_store = json.load(f)
-# Extract vectors and document IDs
-vectors = np.array(vector_store['vectors'])
-doc_ids = vector_store['doc_ids']
-# Create a dictionary to map document IDs to their content
-doc_id_to_content = {doc['doc_id']: doc for doc in doc_store['documents']}
-# Function to create embeddings using OpenAI API
-def create_embedding(text):
-    response = openai.Embedding.create(input=text, model="text-embedding-ada-002")
-    return response['data'][0]['embedding']
-# Function to generate responses using GPT-3.5-turbo
-def generate_response(prompt):
-    response = openai.ChatCompletion.create(
-        model="gpt-3.5-turbo",
-        messages=[
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": prompt}
-        ]
-    )
-    return response.choices[0].message['content']
-# Create an embedding for the query
-query = "Who is Zaraphus?"
-query_embedding = create_embedding(query)
-# Compute cosine similarity between the query embedding and all stored vectors
-similarities = cosine_similarity([query_embedding], vectors)
-# Find the index of the most similar document
-most_similar_idx = np.argmax(similarities)
-# Retrieve the most similar document's content
-most_similar_doc_id = doc_ids[most_similar_idx]
-most_similar_doc_content = doc_id_to_content[most_similar_doc_id]['text']
-# Generate a response based on the most similar document
-prompt = f"Based on the following document, answer the question: {query}\n\nDocument:\n{most_similar_doc_content}"
-response = generate_response(prompt)
-print(response)