teganmosi commited on
Commit
988c7ef
1 Parent(s): d5c102b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import logging
4
+ import sys
5
+
6
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
7
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
8
+
9
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
10
+ from llama_index.llms import HuggingFaceLLM
11
+ from langchain.document_loaders import PyPDFLoader
12
+
13
+
14
+ **Mount Google Drive to access data (you may need to authenticate)**
15
+
16
+ import pandas as pd
17
+ from datasets import load_dataset, concatenate_datasets
18
+
19
+ dataset_names = [
20
+ "medalpaca/medical_meadow_mediqa",
21
+ "medalpaca/medical_meadow_medical_flashcards",
22
+ "medalpaca/medical_meadow_wikidoc_patient_information",
23
+ "medalpaca/medical_meadow_wikidoc",
24
+ "medalpaca/medical_meadow_pubmed_casual",
25
+ "medalpaca/medical_meadow_medqa",
26
+ "medalpaca/medical_meadow_health_advice",
27
+ "medalpaca/medical_meadow_cord19"
28
+
29
+ ]
30
+
31
+ datasets = [load_dataset(name, split = "train") for name in dataset_names]
32
+ combined_dataset = concatenate_datasets(datasets)
33
+
34
+ #from google.colab import drive
35
+ #drive.mount('/content/drive')
36
+
37
+ **Reading the data from the saved path in google drive**
38
+
39
+ #documents = SimpleDirectoryReader("/content/drive/MyDrive/Data").load_data()
40
+
41
+ from langchain.text_splitter import CharacterTextSplitter
42
+ from langchain import OpenAI
43
+ from langchain.document_loaders import PyPDFLoader
44
+
45
+ **Define a system prompt for the Q&A assistant**
46
+
47
+
48
+ from llama_index.prompts.prompts import SimpleInputPrompt
49
+
50
+
51
+ system_prompt = "You are a medical AI chatbot. Your goal is to answer questions as accurately as possible based on the instructions and context provided.Use only information from the previous context information. Do not invent stuff or give false information"
52
+
53
+
54
+
55
+ # This will wrap the default prompts that are internal to llama-index
56
+ query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
57
+
58
+ **Log in to Hugging Face**
59
+
60
+ #!huggingface-cli login
61
+
62
+ **Configure the HuggingFaceLLM (Language Model)**
63
+
64
+ import torch
65
+
66
+ llm = HuggingFaceLLM(
67
+ context_window=4096,
68
+ max_new_tokens=256,
69
+ generate_kwargs={"temperature": 0.5, "do_sample": False},
70
+ system_prompt=system_prompt,
71
+ query_wrapper_prompt=query_wrapper_prompt,
72
+ tokenizer_name="NousResearch/Llama-2-7b-chat-hf",
73
+ model_name="NousResearch/Llama-2-7b-chat-hf",
74
+ device_map="auto",
75
+ # uncomment this if using CUDA to reduce memory usage
76
+ model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True})
77
+
78
+ **Configure embeddings using Hugging Face model**
79
+
80
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
81
+ from llama_index import LangchainEmbedding, ServiceContext
82
+
83
+ embed_model = LangchainEmbedding(
84
+ HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
85
+ )
86
+
87
+
88
+ **Configure the service context**
89
+
90
+ service_context = ServiceContext.from_defaults(
91
+ chunk_size=1024,
92
+ llm=llm,
93
+ embed_model=embed_model
94
+ )
95
+
96
+ **Create a vector store index from the loaded documents**
97
+
98
+ index = VectorStoreIndex.from_documents(combined_dataset, service_context=service_context)
99
+
100
+ **Create a query engine for the index**
101
+
102
+ query_engine = index.as_query_engine()
103
+ response = query_engine.query("What is gross profit?")
104
+
105
+ print(response)
106
+
107
+ **To interact with the chatbot**
108
+
109
+ import gradio as gr
110
+
111
+ # Define your chatbot function
112
+ def chatbot_interface(query):
113
+ response = query_engine.query(query)
114
+ return response
115
+
116
+ # Create a Gradio interface
117
+ iface = gr.Interface(
118
+ fn=chatbot_interface,
119
+ inputs=gr.Textbox(placeholder="Enter your question here..."),
120
+ outputs=gr.Textbox(),
121
+ live=False,
122
+ title="Insurance Chatbot Demo",
123
+ description="Ask questions, and the chatbot will provide answers based on the provided context.",
124
+ )
125
+
126
+ # Launch the Gradio interface
127
+ iface.launch(debug=True)