Curranj commited on
Commit
8ba15b8
·
1 Parent(s): 86f95a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -46
app.py CHANGED
@@ -1,35 +1,21 @@
1
- import openai
2
  import sqlite3
3
  import numpy as np
4
  from sklearn.metrics.pairwise import cosine_similarity
5
- import gradio as gr
6
  import os
 
7
 
 
 
8
 
9
- # Your OpenAI API Key
10
- openai.api_key = os.environ["Secret"]
11
-
12
- # Connect to the SQLite database
13
- db_path = "text_chunks_with_embeddings.db" # Update with the path to your database
14
- conn = sqlite3.connect(db_path)
15
- cursor = conn.cursor()
16
-
17
- # Fetch the rows from the database
18
- cursor.execute("SELECT text, embedding FROM chunks")
19
- rows = cursor.fetchall()
20
-
21
- # Create a dictionary to store the text and embedding for each row
22
- dictionary_of_vectors = {}
23
- for row in rows:
24
- text = row[0]
25
- embedding_str = row[1]
26
- embedding = np.fromstring(embedding_str, sep=' ')
27
- dictionary_of_vectors[text] = embedding
28
-
29
- # Close the connection
30
- conn.close()
31
 
32
- def find_closest_neighbors(vector):
33
  cosine_similarities = {}
34
  for key, value in dictionary_of_vectors.items():
35
  cosine_similarities[key] = cosine_similarity(vector.reshape(1, -1), value.reshape(1, -1))[0][0]
@@ -37,33 +23,47 @@ def find_closest_neighbors(vector):
37
  sorted_cosine_similarities = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
38
  return sorted_cosine_similarities[0:4]
39
 
40
- def generate_embedding(text):
41
- response = openai.Embedding.create(
42
- input=text,
43
- engine="text-embedding-ada-002"
44
- )
45
- embedding = np.array(response['data'][0]['embedding'])
46
- return embedding
47
 
48
- def context_gpt_response(question):
49
- vector = generate_embedding(question)
50
- match_list = find_closest_neighbors(vector)
 
 
 
 
51
 
 
52
  context = ''
53
  for match in match_list:
54
  context += str(match[0])
55
-
56
- context = context[:1500] # Limit context to the last 1500 characters
57
 
58
- prep = f"This is an OpenAI model designed to answer questions specific to grant-making applications for an aquarium. Here is some question-specific context: {context}. Q: {question} A: "
59
- response = openai.Completion.create(
60
- engine="gpt-4",
61
- prompt=prep,
62
- temperature=0.7,
63
- max_tokens=220,
 
 
 
 
 
 
 
64
  )
65
 
66
- return response['choices'][0]['text']
 
 
 
 
 
 
67
 
68
- iface = gr.Interface(fn=context_gpt_response, inputs="text", outputs="text", title="Aquarium Grant Application Chatbot", description="Context-specific chatbot for grant writing", examples=[["What types of projects are eligible for funding?"], ["Tell me more about the application process."], ["What will be the most impactful grant opportunities?"]])
69
- iface.launch()
 
1
+ import sklearn
2
  import sqlite3
3
  import numpy as np
4
  from sklearn.metrics.pairwise import cosine_similarity
5
+ import openai
6
  import os
7
+ import gradio as gr
8
 
9
+ # Set OpenAI API key from environment variable
10
+ openai.api_key = os.environ["Secret"]
11
 
12
+ def find_closest_neighbors(vector1, dictionary_of_vectors):
13
+ vector = openai.Embedding.create(
14
+ input=vector1,
15
+ engine="text-embedding-ada-002"
16
+ )['data'][0]['embedding']
17
+ vector = np.array(vector)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
19
  cosine_similarities = {}
20
  for key, value in dictionary_of_vectors.items():
21
  cosine_similarities[key] = cosine_similarity(vector.reshape(1, -1), value.reshape(1, -1))[0][0]
 
23
  sorted_cosine_similarities = sorted(cosine_similarities.items(), key=lambda x: x[1], reverse=True)
24
  return sorted_cosine_similarities[0:4]
25
 
26
+ def predict(message, history):
27
+ # Connect to the database
28
+ conn = sqlite3.connect('text_chunks_with_embeddings.db') # Update the database name
29
+ cursor = conn.cursor()
30
+ cursor.execute("SELECT text, embedding FROM chunks")
31
+ rows = cursor.fetchall()
 
32
 
33
+ dictionary_of_vectors = {}
34
+ for row in rows:
35
+ text = row[0]
36
+ embedding_str = row[1]
37
+ embedding = np.fromstring(embedding_str, sep=' ')
38
+ dictionary_of_vectors[text] = embedding
39
+ conn.close()
40
 
41
+ match_list = find_closest_neighbors(message, dictionary_of_vectors)
42
  context = ''
43
  for match in match_list:
44
  context += str(match[0])
45
+ context = context[:1500] # Limit context to 1500 characters
 
46
 
47
+ prep = f"This is an OpenAI model designed to answer questions specific to grant-making applications for an aquarium. Here is some question-specific context: {context}. Q: {message} A: "
48
+
49
+ history_openai_format = []
50
+ for human, assistant in history:
51
+ history_openai_format.append({"role": "user", "content": human})
52
+ history_openai_format.append({"role": "assistant", "content": assistant})
53
+ history_openai_format.append({"role": "user", "content": prep})
54
+
55
+ response = openai.ChatCompletion.create(
56
+ model='gpt-4',
57
+ messages=history_openai_format,
58
+ temperature=1.0,
59
+ stream=True
60
  )
61
 
62
+ partial_message = ""
63
+ for chunk in response:
64
+ if len(chunk['choices'][0]['delta']) != 0:
65
+ partial_message += chunk['choices'][0]['delta']['content']
66
+ yield partial_message
67
+
68
+ gr.ChatInterface(predict).queue().launch()
69