File size: 3,941 Bytes
a952ee9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
from pymed import PubMed
from typing import List
from haystack import component
from haystack import Document
from haystack.components.generators import HuggingFaceTGIGenerator
from dotenv import load_dotenv
import os
from haystack import Pipeline
from haystack.components.builders.prompt_builder import PromptBuilder
import gradio as gr
import time
# load_dotenv()
# os.environ['HUGGINGFACE_API_KEY'] = os.getenv('HUGGINGFACE_API_KEY')
pubmed = PubMed(tool="Haystack2.0Prototype", email="dummyemail@gmail.com")
def documentize(article):
return Document(content=article.abstract, meta={'title': article.title, 'keywords': article.keywords})
@component
class PubMedFetcher():
@component.output_types(articles=List[Document])
def run(self, queries: list[str]):
cleaned_queries = queries[0].strip().split('\n')
articles = []
try:
for query in cleaned_queries:
response = pubmed.query(query, max_results = 1)
documents = [documentize(article) for article in response]
articles.extend(documents)
except Exception as e:
print(e)
print(f"Couldn't fetch articles for queries: {queries}" )
results = {'articles': articles}
return results
keyword_llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1")
keyword_llm.warm_up()
llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1")
llm.warm_up()
keyword_prompt_template = """
Your task is to convert the following question into 3 keywords that can be used to find relevant medical research papers on PubMed.
Here is an examples:
question: "What are the latest treatments for major depressive disorder?"
keywords:
Antidepressive Agents
Depressive Disorder, Major
Treatment-Resistant depression
---
question: {{ question }}
keywords:
"""
prompt_template = """
Answer the question truthfully based on the given documents.
If the documents don't contain an answer, use your existing knowledge base.
q: {{ question }}
Articles:
{% for article in articles %}
{{article.content}}
keywords: {{article.meta['keywords']}}
title: {{article.meta['title']}}
{% endfor %}
"""
keyword_prompt_builder = PromptBuilder(template=keyword_prompt_template)
prompt_builder = PromptBuilder(template=prompt_template)
fetcher = PubMedFetcher()
pipe = Pipeline()
pipe.add_component("keyword_prompt_builder", keyword_prompt_builder)
pipe.add_component("keyword_llm", keyword_llm)
pipe.add_component("pubmed_fetcher", fetcher)
pipe.add_component("prompt_builder", prompt_builder)
pipe.add_component("llm", llm)
pipe.connect("keyword_prompt_builder.prompt", "keyword_llm.prompt")
pipe.connect("keyword_llm.replies", "pubmed_fetcher.queries")
pipe.connect("pubmed_fetcher.articles", "prompt_builder.articles")
pipe.connect("prompt_builder.prompt", "llm.prompt")
def ask(question):
output = pipe.run(data={"keyword_prompt_builder":{"question":question},
"prompt_builder":{"question": question},
"llm":{"generation_kwargs": {"max_new_tokens": 500}}})
print(question)
print(output['llm']['replies'][0])
return output['llm']['replies'][0]
# result = ask("How are mRNA vaccines being used for cancer treatment?")
# print(result)
iface = gr.Interface(fn=ask, inputs=gr.Textbox(
value="How are mRNA vaccines being used for cancer treatment?"),
outputs="markdown",
title="LLM Augmented Q&A over PubMed Search Engine",
description="Ask a question about BioMedical and get an answer from a friendly AI assistant.",
examples=[["How are mRNA vaccines being used for cancer treatment?"],
["Suggest me some Case Studies related to Pneumonia."],
["Tell me about HIV AIDS."],["Suggest some case studies related to Auto Immune Disorders."],
["How to treat a COVID infected Patient?"]],
theme=gr.themes.Soft(),
allow_flagging="never",)
iface.launch(debug=True) |