emar commited on
Commit
25a6760
1 Parent(s): 4b94278

some modifications to hopefully fix gradio errors

Browse files
Files changed (1) hide show
  1. app.py +23 -12
app.py CHANGED
@@ -1,22 +1,23 @@
1
  import spaces
2
  import gradio as gr
3
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
4
- from llama_index.core import (
5
- StorageContext,
6
- load_index_from_storage, Settings,
7
- )
8
  from llama_index.llms.huggingface import HuggingFaceLLM
9
  import torch
 
10
 
11
  PERSIST_DIR = './storage'
12
 
13
  # Configure the settings
14
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
 
 
 
 
16
  @spaces.GPU(duration=240)
17
  def setup():
18
- Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="cpu")
19
-
20
  Settings.llm = HuggingFaceLLM(
21
  model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
22
  tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
@@ -28,7 +29,7 @@ def setup():
28
 
29
  setup()
30
 
31
- # load the existing index
32
  @spaces.GPU
33
  def load_context():
34
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
@@ -36,21 +37,31 @@ def load_context():
36
  query_engine = index.as_query_engine()
37
  return query_engine
38
 
 
 
 
 
 
39
 
 
 
40
 
 
41
  @spaces.GPU
42
  def chatbot_response(message, history):
 
 
43
  response = query_engine.query(message)
44
  return str(response)
45
 
46
-
47
- query_engine = load_context()
48
-
49
-
50
  iface = gr.ChatInterface(
51
  fn=chatbot_response,
52
  title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
53
- description="Low quality and extremely slow version of the ones you can find on the github page.: https://github.com/emarron/UESP-lore I am not paying to have Llama3 on here.",
 
 
 
54
  examples=["Who is Zaraphus?"],
55
  cache_examples=True,
56
  )
 
1
  import spaces
2
  import gradio as gr
3
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
4
+ from llama_index.core import StorageContext, load_index_from_storage, Settings
 
 
 
5
  from llama_index.llms.huggingface import HuggingFaceLLM
6
  import torch
7
+ from pydantic import BaseModel
8
 
9
  PERSIST_DIR = './storage'
10
 
11
  # Configure the settings
12
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
 
14
+ # Pydantic config to avoid protected namespace warning
15
+ class Config(BaseModel):
16
+ model_config = {'protected_namespaces': ()}
17
+
18
  @spaces.GPU(duration=240)
19
  def setup():
20
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device=DEVICE)
 
21
  Settings.llm = HuggingFaceLLM(
22
  model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
23
  tokenizer_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
 
29
 
30
  setup()
31
 
32
+ # Load the existing index
33
  @spaces.GPU
34
  def load_context():
35
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
 
37
  query_engine = index.as_query_engine()
38
  return query_engine
39
 
40
+ query_engine = None
41
+
42
+ def initialize_query_engine():
43
+ global query_engine
44
+ query_engine = load_context()
45
 
46
+ # Initialize query engine at the start
47
+ initialize_query_engine()
48
 
49
+ # Chatbot response function
50
  @spaces.GPU
51
  def chatbot_response(message, history):
52
+ if query_engine is None:
53
+ initialize_query_engine()
54
  response = query_engine.query(message)
55
  return str(response)
56
 
57
+ # Initialize Gradio interface
 
 
 
58
  iface = gr.ChatInterface(
59
  fn=chatbot_response,
60
  title="UESP Lore Chatbot: CPU bound version of Phi-3-mini",
61
+ description=(
62
+ "Low quality and extremely slow version of the ones you can find on the github page: "
63
+ "https://github.com/emarron/UESP-lore. I am not paying to have Llama3 on here."
64
+ ),
65
  examples=["Who is Zaraphus?"],
66
  cache_examples=True,
67
  )