gauravgulati619 commited on
Commit
f78f4fc
·
verified ·
1 Parent(s): 562ec5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -113
app.py CHANGED
@@ -10,132 +10,29 @@ from patientvoice import record_audio, transcribe_with_groq
10
  from doctorvoice import text_to_speech_with_gtts, text_to_speech_with_elevenlabs
11
  from dotenv import load_dotenv
12
  load_dotenv()
13
- from langchain_community.vectorstores import FAISS
14
- from langchain_core.embeddings import Embeddings
15
- from langchain_core.prompts import ChatPromptTemplate
16
- from langchain_community.document_loaders import PyPDFLoader
17
- from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
18
 
19
- # Check if CUDA is available
20
- device = "cuda" if torch.cuda.is_available() else "cpu"
21
- print(f"Using device: {device}")
22
-
23
- # Initialize embeddings model
24
- class SentenceTransformerEmbeddings(Embeddings):
25
- def __init__(self, model_name: str, device: str = None):
26
- self.model = SentenceTransformer(model_name, device=device)
27
-
28
- def embed_documents(self, texts: list[str]) -> list[list[float]]:
29
- embeddings = self.model.encode(texts, convert_to_tensor=False)
30
- return embeddings.tolist()
31
-
32
- def embed_query(self, text: str) -> list[float]:
33
- embedding = self.model.encode(text, convert_to_tensor=False)
34
- return embedding.tolist()
35
-
36
- embeddings = SentenceTransformerEmbeddings(
37
- model_name="sentence-transformers/all-MiniLM-L6-v2",
38
- device=device
39
- )
40
-
41
- # Define vectorstore paths consistently
42
- VECTORSTORE_DIR = "vectorstore/db_faiss"
43
- vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
44
-
45
- # Create vectorstore directory if it doesn't exist
46
- vectorstore_path.mkdir(parents=True, exist_ok=True)
47
-
48
- if not (vectorstore_path / "index.faiss").exists():
49
- print("Creating new vectorstore...")
50
- # Load and split the PDF
51
- loader = PyPDFLoader("medical.pdf")
52
- documents = loader.load()
53
-
54
- # Split documents into chunks
55
- text_splitter = RecursiveCharacterTextSplitter(
56
- chunk_size=2000,
57
- chunk_overlap=100,
58
- length_function=len,
59
- )
60
- texts = text_splitter.split_documents(documents)
61
-
62
- # Create and save the vectorstore
63
- vectorstore = FAISS.from_documents(texts, embeddings)
64
-
65
- # If CUDA is available, convert index to GPU
66
- if device == "cuda":
67
- res = faiss.StandardGpuResources() # Initialize GPU resources
68
- index = vectorstore.index
69
- gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
70
- vectorstore.index = gpu_index
71
-
72
- # Save the vectorstore
73
- vectorstore.save_local(VECTORSTORE_DIR)
74
- print("Vectorstore created and saved successfully.")
75
- else:
76
- print("Loading existing vectorstore...")
77
- # Load existing vectorstore
78
- vectorstore = FAISS.load_local(
79
- folder_path=VECTORSTORE_DIR,
80
- embeddings=embeddings,
81
- allow_dangerous_deserialization=True
82
- )
83
-
84
- # If CUDA is available, convert loaded index to GPU
85
- if device == "cuda":
86
- res = faiss.StandardGpuResources() # Initialize GPU resources
87
- index = vectorstore.index
88
- gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
89
- vectorstore.index = gpu_index
90
- print("Vectorstore loaded successfully.")
91
-
92
- def get_relevant_context(query):
93
- try:
94
- # Search the vector store for relevant documents
95
- docs = vectorstore.similarity_search(query, k=2)
96
-
97
- # Extract and combine the content from retrieved documents
98
- context = "\n".join([doc.page_content for doc in docs])
99
-
100
- return context
101
- except Exception as e:
102
- print(f"Error in similarity search: {e}")
103
- return "Could not retrieve relevant context."
104
-
105
- # Update system prompt to include retrieved context
106
- def get_enhanced_prompt(query, context):
107
- enhanced_prompt = f"""You have to act as a professional doctor, i know you are not but this is for learning purpose.
108
- Use the following medical context to inform your response: {context}
109
- What's in this image?. Do you find anything wrong with it medically?
110
- If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
111
- your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
112
- Donot say 'In the image I see' but say 'With what I see, I think you have ....'
113
- Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
114
- Keep your answer concise (max 2 sentences). No preamble, start your answer right away please.
115
- Question from patient: {query}"""
116
- return enhanced_prompt
117
 
118
  def process_inputs(audio_filepath, image_filepath):
119
  speech_to_text_output = transcribe_with_groq(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
120
  audio_filepath=audio_filepath,
121
  stt_model="whisper-large-v3")
122
 
123
- # Get relevant context from the vector store
124
- context = get_relevant_context(speech_to_text_output)
125
-
126
  # Handle the image input
127
  if image_filepath:
128
- enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
129
- doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-11b-vision-preview")
130
  else:
131
  doctor_response = "No image provided for me to analyze"
132
 
133
- # Generate audio response and return the filepath
134
- output_filepath = "output_audio.mp3"
135
- voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
136
-
137
- return speech_to_text_output, doctor_response, output_filepath
138
 
 
139
 
140
  # Create the interface
141
  iface = gr.Interface(
 
10
  from doctorvoice import text_to_speech_with_gtts, text_to_speech_with_elevenlabs
11
  from dotenv import load_dotenv
12
  load_dotenv()
13
+ system_prompt="""You have to act as a professional doctor, i know you are not but this is for learning purpose.
14
+ What's in this image?. Do you find anything wrong with it medically?
15
+ If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
16
+ your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
17
+ Donot say 'In the image I see' but say 'With what I see, I think you have ....'
18
+ Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
19
+ Keep your answer concise (max 5 sentences). No preamble, start your answer right away please"""
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def process_inputs(audio_filepath, image_filepath):
23
  speech_to_text_output = transcribe_with_groq(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
24
  audio_filepath=audio_filepath,
25
  stt_model="whisper-large-v3")
26
 
 
 
 
27
  # Handle the image input
28
  if image_filepath:
29
+ doctor_response = analyze_image_with_query(query=system_prompt+speech_to_text_output, encoded_image=encode_image(image_filepath), model="llama-3.2-11b-vision-preview")
 
30
  else:
31
  doctor_response = "No image provided for me to analyze"
32
 
33
+ voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath="final.mp3")
 
 
 
 
34
 
35
+ return speech_to_text_output, doctor_response, voice_of_doctor
36
 
37
  # Create the interface
38
  iface = gr.Interface(