Spaces:
Sleeping
Sleeping
adarsh
commited on
Commit
·
ba97fa9
1
Parent(s):
9c97c75
added helper functions
Browse files- src/helper.py +64 -0
- src/prompt.py +19 -0
src/helper.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.document_loaders import PyPDFDirectoryLoader
|
2 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
|
6 |
+
|
7 |
+
# loading the data
|
8 |
+
def load_data(path):
|
9 |
+
loader = PyPDFDirectoryLoader(path)
|
10 |
+
extracted_data = loader.load()
|
11 |
+
return extracted_data
|
12 |
+
|
13 |
+
|
14 |
+
#Create text chunks
|
15 |
+
def text_split(extracted_data):
|
16 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
|
17 |
+
text_chunks = text_splitter.split_documents(extracted_data)
|
18 |
+
|
19 |
+
return text_chunks
|
20 |
+
|
21 |
+
|
22 |
+
#download embedding model
|
23 |
+
def download_hf_embeddings():
|
24 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
25 |
+
return embeddings
|
26 |
+
|
27 |
+
|
28 |
+
# downloading any pdf on web
|
29 |
+
|
30 |
+
import os
|
31 |
+
import requests
|
32 |
+
|
33 |
+
def download_pdf(url):
|
34 |
+
if not os.path.exists('data'):
|
35 |
+
os.makedirs('data')
|
36 |
+
|
37 |
+
pdf_url = url
|
38 |
+
|
39 |
+
# Get the filename from the URL
|
40 |
+
filename = pdf_url.split("/")[-1]
|
41 |
+
|
42 |
+
# Full path where the PDF will be saved
|
43 |
+
save_path = os.path.join('data', filename)
|
44 |
+
|
45 |
+
# Download the PDF
|
46 |
+
response = requests.get(pdf_url)
|
47 |
+
|
48 |
+
# Check if the request was successful
|
49 |
+
if response.status_code == 200:
|
50 |
+
# Write the content to a file
|
51 |
+
with open(save_path, 'wb') as file:
|
52 |
+
file.write(response.content)
|
53 |
+
print(f"PDF downloaded and saved to {save_path}")
|
54 |
+
else:
|
55 |
+
print(f"Failed to download PDF. Status code: {response.status_code}")
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
def download_hf_model(model_name_or_path, model_basename):
|
62 |
+
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
|
63 |
+
return model_path
|
64 |
+
|
src/prompt.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
prompt_template = """
|
2 |
+
You are a knowledgeable medical AI assistant. Answer the user's health-related question based on the given context and your medical knowledge. Follow these guidelines:
|
3 |
+
|
4 |
+
1. Provide clear, concise, and medically accurate information.
|
5 |
+
2. Use the context provided, but interpret it with your medical expertise if needed.
|
6 |
+
3. Explain medical terms in simple language when used.
|
7 |
+
4. If the question involves diagnosis or treatment, advise consulting a healthcare professional.
|
8 |
+
5. Acknowledge if you don't have enough information to answer fully.
|
9 |
+
6. Never guess or make up medical information.
|
10 |
+
7. Offer general health tips related to the question if appropriate.
|
11 |
+
|
12 |
+
Context: {context}
|
13 |
+
|
14 |
+
Question: {question}
|
15 |
+
|
16 |
+
Remember: Your role is to inform, not diagnose. Prioritize accuracy and patient safety.
|
17 |
+
|
18 |
+
Helpful answer:
|
19 |
+
"""
|