awacke1's picture
Update app.py
b04249f
from transformers import pipeline
import wikipedia
import random
import gradio as gr
import csv
model_name = "deepset/electra-base-squad2"
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
# dataset save ------------------------------------
import huggingface_hub
import os
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
# created new dataset as awacke1/MindfulStory.csv
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/WikipediaSearch"
DATASET_REPO_ID = "awacke1/WikipediaSearch"
DATA_FILENAME = "WikipediaSearch.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")
# Download dataset repo using hub download
try:
hf_hub_download(
repo_id=DATASET_REPO_ID,
filename=DATA_FILENAME,
cache_dir=DATA_DIRNAME,
force_filename=DATA_FILENAME
)
except:
print("file not found")
def AIMemory(title: str, story: str):
if title and story:
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["title", "story", "time"])
writer.writerow({"title": title, "story": story, "time": str(datetime.now())})
commit_url = repo.push_to_hub()
return ""
# Set up cloned dataset from repo for operations
repo = Repository(
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
# dataset save ------------------------------------
def get_wiki_article(topic):
topic=topic
try:
search = wikipedia.search(topic, results = 1)[0]
except wikipedia.DisambiguationError as e:
choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
search = random.choice(choices)
try:
p = wikipedia.page(search)
print(p)
except wikipedia.exceptions.DisambiguationError as e:
print("disambiguation error")
choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
print(choices)
s = random.choice(choices)
p = wikipedia.page(s)
saved = AIMemory(p.content, p.url)
return p.content, p.url
def get_answer(topic, question):
w_art, w_url=get_wiki_article(topic)
print(topic)
print(question)
print(w_art)
qa = {'question': question, 'context': w_art}
res = nlp(qa)
saved = AIMemory(res['answer'], w_url)
return res['answer'], w_url, {'confidence':res['score']}
inputs = [
gr.inputs.Textbox(lines=2, label="Topic"),
gr.inputs.Textbox(lines=2, label="Question")
]
outputs = [
gr.outputs.Textbox(type='str',label="Answer"),
gr.outputs.Textbox(type='str',label="Wikipedia Reference Article"),
gr.outputs.Label(type="confidences",label="Confidence in answer (assuming the correct wikipedia article)"),
]
title = "AI Wikipedia Search"
description = 'Contextual Question and Answer'
article = ''
examples = [
['Health and fitness', 'What is the DSM-IV?'],
['Technology and applied sciences', 'List of military strategies and concepts?'],
['Culture and the arts', 'What films are considered the best?'],
['Health and fitness', 'What are the types of psychotherapies?'],
['Health and fitness', 'What are macronutrients?'],
['Health and fitness', 'What are micronutrients?'],
['Health and fitness', 'What are Nootropics?'],
['Health and fitness', 'What is the timeline of psychology?'],
['Cicero', 'What quotes did Marcus Tullius Cicero make?'],
['Alzheimers', 'What causes alzheimers?'],
['Neuropathy', 'With neuropathy and neuro-muskoskeletal issues, and what are the treatments available?'],
['Chemotherapy', 'What are possible care options for patients in chemotherapy?'],
['Health', 'What is mindfulness and how does it affect health?'],
['Medicine', 'In medicine what is the Hippocratic Oath?'],
['Insurance', 'What is Medicare?'],
['Financial Services', 'Does Medicaid offer financial assistance?'],
['Ontology', 'Why is an anthology different than ontology?'],
['Taxonomy', 'What is a biology taxonomy?'],
['Pharmacy', 'What does a pharmacist do?']
]
gr.Interface(get_answer, inputs, outputs, title=title, description=description, examples=examples,
article="Saved dataset: https://huggingface.co/datasets/awacke1/WikipediaSearch stores search and the result url. List of topics is at https://en.wikipedia.org/wiki/Wikipedia:Contents/Lists and wikipedia library docs are here: https://pypi.org/project/wikipedia/",
flagging_options=["strongly related","related", "neutral", "unrelated", "strongly unrelated"]).launch(debug=True,share=False,enable_queue=False)