Spaces:

gauravgulati619
/

MediVox

Running

App Files Files Community

gauravgulati619 commited on Feb 28

Commit

f78f4fc

verified ·

1 Parent(s): 562ec5a

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -113

app.py CHANGED Viewed

@@ -10,132 +10,29 @@ from patientvoice import record_audio, transcribe_with_groq
 from doctorvoice import text_to_speech_with_gtts, text_to_speech_with_elevenlabs
 from dotenv import load_dotenv
 load_dotenv()
-from langchain_community.vectorstores import FAISS
-from langchain_core.embeddings import Embeddings
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_community.document_loaders import PyPDFLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-# Check if CUDA is available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Initialize embeddings model
-class SentenceTransformerEmbeddings(Embeddings):
-    def __init__(self, model_name: str, device: str = None):
-        self.model = SentenceTransformer(model_name, device=device)
-    def embed_documents(self, texts: list[str]) -> list[list[float]]:
-        embeddings = self.model.encode(texts, convert_to_tensor=False)
-        return embeddings.tolist()
-    def embed_query(self, text: str) -> list[float]:
-        embedding = self.model.encode(text, convert_to_tensor=False)
-        return embedding.tolist()
-embeddings = SentenceTransformerEmbeddings(
-    model_name="sentence-transformers/all-MiniLM-L6-v2",
-    device=device
-)
-# Define vectorstore paths consistently
-VECTORSTORE_DIR = "vectorstore/db_faiss"
-vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
-# Create vectorstore directory if it doesn't exist
-vectorstore_path.mkdir(parents=True, exist_ok=True)
-if not (vectorstore_path / "index.faiss").exists():
-    print("Creating new vectorstore...")
-    # Load and split the PDF
-    loader = PyPDFLoader("medical.pdf")
-    documents = loader.load()
-    # Split documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=2000,
-        chunk_overlap=100,
-        length_function=len,
-    )
-    texts = text_splitter.split_documents(documents)
-    # Create and save the vectorstore
-    vectorstore = FAISS.from_documents(texts, embeddings)
-    # If CUDA is available, convert index to GPU
-    if device == "cuda":
-        res = faiss.StandardGpuResources()  # Initialize GPU resources
-        index = vectorstore.index
-        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)  # Move to GPU
-        vectorstore.index = gpu_index
-    # Save the vectorstore
-    vectorstore.save_local(VECTORSTORE_DIR)
-    print("Vectorstore created and saved successfully.")
-else:
-    print("Loading existing vectorstore...")
-    # Load existing vectorstore
-    vectorstore = FAISS.load_local(
-        folder_path=VECTORSTORE_DIR,
-        embeddings=embeddings,
-        allow_dangerous_deserialization=True
-    )
-    # If CUDA is available, convert loaded index to GPU
-    if device == "cuda":
-        res = faiss.StandardGpuResources()  # Initialize GPU resources
-        index = vectorstore.index
-        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)  # Move to GPU
-        vectorstore.index = gpu_index
-    print("Vectorstore loaded successfully.")
-def get_relevant_context(query):
-    try:
-        # Search the vector store for relevant documents
-        docs = vectorstore.similarity_search(query, k=2)
-        # Extract and combine the content from retrieved documents
-        context = "\n".join([doc.page_content for doc in docs])
-        return context
-    except Exception as e:
-        print(f"Error in similarity search: {e}")
-        return "Could not retrieve relevant context."
-# Update system prompt to include retrieved context
-def get_enhanced_prompt(query, context):
-    enhanced_prompt = f"""You have to act as a professional doctor, i know you are not but this is for learning purpose.
-    Use the following medical context to inform your response: {context}
-    What's in this image?. Do you find anything wrong with it medically?
-    If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
-    your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
-    Donot say 'In the image I see' but say 'With what I see, I think you have ....'
-    Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
-    Keep your answer concise (max 2 sentences). No preamble, start your answer right away please.
-    Question from patient: {query}"""
-    return enhanced_prompt
 def process_inputs(audio_filepath, image_filepath):
     speech_to_text_output = transcribe_with_groq(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
                                                  audio_filepath=audio_filepath,
                                                  stt_model="whisper-large-v3")
-    # Get relevant context from the vector store
-    context = get_relevant_context(speech_to_text_output)
     # Handle the image input
     if image_filepath:
-        enhanced_prompt = get_enhanced_prompt(speech_to_text_output, context)
-        doctor_response = analyze_image_with_query(query=enhanced_prompt, encoded_image=encode_image(image_filepath), model="llama-3.2-11b-vision-preview")
     else:
         doctor_response = "No image provided for me to analyze"
-    # Generate audio response and return the filepath
-    output_filepath = "output_audio.mp3"
-    voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath=output_filepath)
-    return speech_to_text_output, doctor_response, output_filepath
 # Create the interface
 iface = gr.Interface(

 from doctorvoice import text_to_speech_with_gtts, text_to_speech_with_elevenlabs
 from dotenv import load_dotenv
 load_dotenv()
+system_prompt="""You have to act as a professional doctor, i know you are not but this is for learning purpose.
+            What's in this image?. Do you find anything wrong with it medically?
+            If you make a differential, suggest some remedies for them. Donot add any numbers or special characters in
+            your response. Your response should be in one long paragraph. Also always answer as if you are answering to a real person.
+            Donot say 'In the image I see' but say 'With what I see, I think you have ....'
+            Dont respond as an AI model in markdown, your answer should mimic that of an actual doctor not an AI bot,
+            Keep your answer concise (max 5 sentences). No preamble, start your answer right away please"""
 def process_inputs(audio_filepath, image_filepath):
     speech_to_text_output = transcribe_with_groq(GROQ_API_KEY=os.environ.get("GROQ_API_KEY"),
                                                  audio_filepath=audio_filepath,
                                                  stt_model="whisper-large-v3")
     # Handle the image input
     if image_filepath:
+        doctor_response = analyze_image_with_query(query=system_prompt+speech_to_text_output, encoded_image=encode_image(image_filepath), model="llama-3.2-11b-vision-preview")
     else:
         doctor_response = "No image provided for me to analyze"
+    voice_of_doctor = text_to_speech_with_elevenlabs(input_text=doctor_response, output_filepath="final.mp3")
+    return speech_to_text_output, doctor_response, voice_of_doctor
 # Create the interface
 iface = gr.Interface(