Spaces:
Runtime error
Runtime error
File size: 4,680 Bytes
2980408 31ca135 a9ea810 e37d22f 1da5a81 9ddaca4 4494cde 9ddaca4 95a7161 9ddaca4 a9ea810 9ddaca4 7055aff 1da5a81 5cd4f42 1da5a81 86e6300 02bd0fb afea2c7 db1f228 79976b8 afea2c7 ca04288 743e3c4 3ea1e2b 26fa27b a7b587b 43093ce a7b587b 1da5a81 02bd0fb 79976b8 3ea1e2b 26fa27b a7b587b 1da5a81 5fd9368 a9ea810 aaa1f66 81f8cdc 18e6a04 a9baa59 3a9bd63 81f8cdc 9c9f12f 81f8cdc 098e8e8 81f8cdc 6195c27 2980408 81f8cdc 2980408 5fd9368 f5e51e0 81f8cdc cc21469 6b774a8 18e6a04 a7b587b 18e6a04 6b774a8 18e6a04 f2889c6 c84f2f8 bd01a7f cc21469 f2889c6 a657bab 3a2d3ef 2980408 8c2e21c 2980408 3a2d3ef 2980408 92012a0 f2889c6 92012a0 2980408 0bbe279 2980408 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
import openai
import pandas as pd
import numpy as np
import csv
import os
from datasets import load_dataset
openai.api_key= os.environ.get("openai.api_key")
from openai.embeddings_utils import get_embedding
import requests
model_id = "sentence-transformers/all-MiniLM-L6-v2"
import json
hf_token = os.environ.get("hugginface.api.token")
import re
from sklearn.metrics.pairwise import cosine_similarity
def generate_embeddings(texts, model_id, hf_token):
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
headers = {"Authorization": f"Bearer {hf_token}"}
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
embeddings = response.json()
return embeddings
AP_Bio = load_dataset('vjain/biology_AP_embeddings')
df1 = pd.DataFrame(AP_Bio['train'])
df1["similarity"] = 0
AP_Physics = load_dataset('vjain/AP_physics_embeddings')
df2 = pd.DataFrame(AP_Physics['train'])
df2["similarity"] = 0
Personality = load_dataset('vjain/Personality_em')
df3 = pd.DataFrame(Personality['train'])
df3["similarity"] = 0
AP_statistics = load_dataset('vjain/AP_statistics')
df4 = pd.DataFrame(AP_statistics['train'])
df4["similarity"] = 0
tax_embeddings = load_dataset('vjain/tax_embeddings')
df5 = pd.DataFrame(tax_embeddings['train'])
df5["similarity"] = 0
therapy = load_dataset('vjain/therapy')
df6 = pd.DataFrame(therapy['train'])
df6["similarity"] = 0
gurbani = load_dataset('vjain/gurbani')
df7 = pd.DataFrame(gurbani['train'])
df7["similarity"] = 0
dataframes = {
"AP_Bio": df1,
"AP_Physics": df2,
"Personality" : df3,
"AP_statistics": df4,
"tax_embeddings": df5,
"therapy": df6,
"gurbani":df7
}
#df = pd.read_csv("TA_embeddings.csv")
#df["embedding"]=df["embedding"].apply(eval).apply(np.array)
def reply(input, dataset_name):
try:
if dataset_name not in dataframes:
return "Invalid dataset selected. Please select a valid dataset."
if not input:
return "Please Enter a Question to get an Answer"
df = dataframes[dataset_name]
input = input
input_vector = generate_embeddings(input, model_id,hf_token)
df["similarities"]=df["embedding"].apply(lambda x: cosine_similarity([x],[input_vector])[0][0])
data = df.sort_values("similarities", ascending=False).head(5)
data.to_csv("sorted.csv")
context = []
for i, row in data.iterrows():
context.append(row['text'])
context
text = "\n".join(context)
context = text
prompt = f"""
Answer the following question using the context given below.If you don't know the answer for certain, say I don't know.
Context: {context}
Q: {input}
"""
response= openai.Completion.create(
prompt=prompt,
temperature=1,
max_tokens=500,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
model="text-davinci-003"
)["choices"][0]["text"].strip(" \n")
return response
except Exception as e:
return f"An error occurred: {e}"
csv_dropdown = gr.inputs.Dropdown(
label="Select the Book",
choices=["AP_Bio", "AP_Physics","Personality","AP_statistics","tax_embeddings","therapy","gurbani"],
default="AP_Bio"
)
input_text = gr.inputs.Textbox(
label="Enter your questions here",
placeholder="E.g. What is DNA?",
lines=3
)
text_output = gr.outputs.Textbox(label="Answer")
description = "Scholar Bot is a question answering system designed to provide accurate and relevant answers to questions from this book hosted by OpenStax https://openstax.org/details/books/biology-ap-courses. Simply enter your question in the text box above and Scholar Bot will use advanced natural language processing algorithms to search a large corpus of biology text to find the best answer for you. Scholar Bot uses the Sentence Transformers model to generate embeddings of text, and OpenAI's GPT-3 language model to provide answers to your questions."
ui = gr.Interface(fn=reply,
inputs=[input_text, csv_dropdown],
outputs=[text_output],
title="Scholar Bot",
description=description,
theme="light",
layout="vertical",
allow_flagging=False,
examples=[["What is the function of DNA polymerase?", "AP_Bio"]]
)
ui.launch() |