from transformers import pipeline import wikipedia import random import gradio as gr model_name = "deepset/electra-base-squad2" nlp = pipeline('question-answering', model=model_name, tokenizer=model_name) # dataset save ------------------------------------ import huggingface_hub import os from huggingface_hub import Repository, hf_hub_download, upload_file from datetime import datetime # created new dataset as awacke1/MindfulStory.csv DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/WikipediaSearch" DATASET_REPO_ID = "awacke1/WikipediaSearch" DATA_FILENAME = "WikipediaSearch.csv" DATA_FILE = os.path.join("data", DATA_FILENAME) HF_TOKEN = os.environ.get("HF_TOKEN") # Download dataset repo using hub download try: hf_hub_download( repo_id=DATASET_REPO_ID, filename=DATA_FILENAME, cache_dir=DATA_DIRNAME, force_filename=DATA_FILENAME ) except: print("file not found") def AIMemory(title: str, story: str): if title and story: with open(DATA_FILE, "a") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=["title", "story", "time"]) writer.writerow({"title": title, "story": story, "time": str(datetime.now())}) commit_url = repo.push_to_hub() return "" # Set up cloned dataset from repo for operations repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN ) # dataset save ------------------------------------ def get_wiki_article(topic): topic=topic try: search = wikipedia.search(topic, results = 1)[0] except wikipedia.DisambiguationError as e: choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)] search = random.choice(choices) try: p = wikipedia.page(search) except wikipedia.exceptions.DisambiguationError as e: choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)] s = random.choice(choices) p = wikipedia.page(s) saved = AIMemory(p, s) return p.content, p.url def get_answer(topic, question): w_art, w_url=get_wiki_article(topic) qa = {'question': question, 'context': w_art} res = nlp(qa) return res['answer'], w_url, {'confidence':res['score']} inputs = [ gr.inputs.Textbox(lines=2, label="Topic"), gr.inputs.Textbox(lines=2, label="Question") ] outputs = [ gr.outputs.Textbox(type='str',label="Answer"), gr.outputs.Textbox(type='str',label="Wikipedia Reference Article"), gr.outputs.Label(type="confidences",label="Confidence in answer (assuming the correct wikipedia article)"), ] title = "AI Wikipedia Search" description = 'Contextual Question and Answer' article = '' examples = [ ['Quantum', 'What is quanta in physics?'], ['Cicero', 'What quotes did Marcus Tullius Cicero make?'], ['Alzheimers', 'What causes alzheimers?'], ['Neuropathy', 'With neuropathy and neuro-muskoskeletal issues, and what are the treatments available?'], ['Chemotherapy', 'What are possible care options for patients in chemotherapy?'], ['Health', 'What is mindfulness and how does it affect health?'], ['Medicine', 'In medicine what is the Hippocratic Oath?'], ['Insurance', 'What is Medicare?'], ['Financial Services', 'Does Medicaid offer financial assistance?'], ['Ontology', 'Why is an anthology different than ontology?'], ['Taxonomy', 'What is a biology taxonomy?'], ['Pharmacy', 'What does a pharmacist do?'] ] gr.Interface(get_answer, inputs, outputs, title=title, description=description, examples=examples, article="Saved dataset: https://huggingface.co/datasets/awacke1/WikipediaSearch", flagging_options=["strongly related","related", "neutral", "unrelated", "strongly unrelated"]).launch(share=False,enable_queue=False)