File size: 4,776 Bytes
b281633
 
 
 
3da5b11
b281633
 
 
59feffc
 
a08e894
59feffc
 
 
a08e894
b40a7cf
a08e894
59feffc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b281633
 
 
 
 
 
 
 
 
b04249f
 
b281633
b04249f
b281633
b04249f
b281633
 
e1f11d0
b281633
 
 
 
b04249f
 
 
 
b281633
 
e1f11d0
b281633
 
 
 
24c9f0c
 
b281633
 
 
 
 
 
 
 
 
 
 
2a551bf
 
 
 
 
 
 
 
b78d1fc
27c4510
 
 
1a0b379
 
 
 
01e2624
 
 
b281633
 
94d28e2
b6bdeb4
b04249f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from transformers import pipeline
import wikipedia
import random
import gradio as gr
import csv
model_name = "deepset/electra-base-squad2"
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

# dataset save ------------------------------------
import huggingface_hub
import os
from huggingface_hub import Repository, hf_hub_download, upload_file
from datetime import datetime
# created new dataset as awacke1/MindfulStory.csv
DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/WikipediaSearch"
DATASET_REPO_ID = "awacke1/WikipediaSearch"
DATA_FILENAME = "WikipediaSearch.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN = os.environ.get("HF_TOKEN")
# Download dataset repo using hub download
try:
    hf_hub_download(
        repo_id=DATASET_REPO_ID,
        filename=DATA_FILENAME,
        cache_dir=DATA_DIRNAME,
        force_filename=DATA_FILENAME
    )
except:
    print("file not found")
def AIMemory(title: str, story: str):
    if title and story:
        with open(DATA_FILE, "a") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=["title", "story", "time"])
            writer.writerow({"title": title, "story": story, "time": str(datetime.now())})
        commit_url = repo.push_to_hub()
    return ""
# Set up cloned dataset from repo for operations
repo = Repository(
    local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)
# dataset save ------------------------------------


def get_wiki_article(topic):
    topic=topic
    try:
        search = wikipedia.search(topic, results = 1)[0]
    except wikipedia.DisambiguationError as e:
        choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
        search = random.choice(choices)
    try:
        p = wikipedia.page(search)
        print(p)
        
    except wikipedia.exceptions.DisambiguationError as e:
        print("disambiguation error")
        choices = [x for x in e.options if ('disambiguation' not in x) and ('All pages' not in x) and (x!=topic)]
        print(choices)
        s = random.choice(choices)
        p = wikipedia.page(s)
        saved = AIMemory(p.content, p.url)
    return p.content, p.url

def get_answer(topic, question):
    w_art, w_url=get_wiki_article(topic)
    print(topic)
    print(question)
    print(w_art)
    
    qa = {'question': question, 'context': w_art}
    res = nlp(qa)
    saved = AIMemory(res['answer'], w_url)
    return res['answer'], w_url, {'confidence':res['score']}


inputs = [
          gr.inputs.Textbox(lines=2, label="Topic"),
          gr.inputs.Textbox(lines=2, label="Question")
]
outputs = [
            gr.outputs.Textbox(type='str',label="Answer"),
            gr.outputs.Textbox(type='str',label="Wikipedia Reference Article"),
            gr.outputs.Label(type="confidences",label="Confidence in answer (assuming the correct wikipedia article)"),
]

title = "AI Wikipedia Search"
description = 'Contextual Question and Answer'
article = ''
examples = [
    ['Health and fitness', 'What is the DSM-IV?'],
    ['Technology and applied sciences', 'List of military strategies and concepts?'],
    ['Culture and the arts', 'What films are considered the best?'],
    ['Health and fitness', 'What are the types of psychotherapies?'],
    ['Health and fitness', 'What are macronutrients?'],
    ['Health and fitness', 'What are micronutrients?'],
    ['Health and fitness', 'What are Nootropics?'],
    ['Health and fitness', 'What is the timeline of psychology?'],
    ['Cicero', 'What quotes did Marcus Tullius Cicero make?'],
    ['Alzheimers', 'What causes alzheimers?'],
    ['Neuropathy', 'With neuropathy and neuro-muskoskeletal issues, and what are the treatments available?'],
    ['Chemotherapy', 'What are possible care options for patients in chemotherapy?'],
    ['Health', 'What is mindfulness and how does it affect health?'],
    ['Medicine', 'In medicine what is the Hippocratic Oath?'],
    ['Insurance', 'What is Medicare?'],
    ['Financial Services', 'Does Medicaid offer financial assistance?'],
    ['Ontology', 'Why is an anthology different than ontology?'],   
    ['Taxonomy', 'What is a biology taxonomy?'],
    ['Pharmacy', 'What does a pharmacist do?']     
]

gr.Interface(get_answer, inputs, outputs, title=title, description=description, examples=examples, 
article="Saved dataset: https://huggingface.co/datasets/awacke1/WikipediaSearch stores search and the result url.  List of topics is at https://en.wikipedia.org/wiki/Wikipedia:Contents/Lists and wikipedia library docs are here: https://pypi.org/project/wikipedia/",
flagging_options=["strongly related","related", "neutral", "unrelated", "strongly unrelated"]).launch(debug=True,share=False,enable_queue=False)