import os import dotenv import gradio as gr import lancedb import logging from langchain.embeddings.cohere import CohereEmbeddings from langchain.llms import Cohere from langchain.prompts import PromptTemplate from langchain.chains import RetrievalQA from langchain.vectorstores import LanceDB from langchain.text_splitter import RecursiveCharacterTextSplitter # Assume these loaders are implemented based on your specific requirements from custom_document_loaders import TextLoader, PyPDFLoader, DocxLoader, ImageLoader import argostranslate.package import argostranslate.translate import shutil # Configuration and Logging dotenv.load_dotenv(".env") DB_PATH = "/tmp/lancedb" COHERE_MODEL_NAME = "multilingual-22-12" LANGUAGE_ISO_CODES = {"English": "en", "Hindi": "hi", "Turkish": "tr", "French": "fr"} logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Initialize argostranslate argostranslate.package.update_package_index() def initialize_documents_and_embeddings(input_file_path): logger.info(f"Processing file: {input_file_path}") file_extension = os.path.splitext(input_file_path)[1].lower() loader = None if file_extension in [".txt"]: loader = TextLoader(input_file_path) elif file_extension in [".pdf"]: loader = PyPDFLoader(input_file_path) elif file_extension in [".doc", ".docx"]: loader = DocxLoader(input_file_path) elif file_extension in [".jpg", ".jpeg", ".png"]: loader = ImageLoader(input_file_path) else: raise ValueError("Unsupported file type. Supported files are .txt, .pdf, .docx, and image files.") documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50) texts = text_splitter.split_documents(documents) embeddings = CohereEmbeddings(model=COHERE_MODEL_NAME) return texts, embeddings def initialize_database(texts, embeddings): if os.path.exists(DB_PATH): shutil.rmtree(DB_PATH) # Ensure a fresh start db = lancedb.connect(DB_PATH) table = db.create_table("multiling-rag", mode="overwrite") return LanceDB.from_documents(texts, embeddings, connection=table) def translate_text(text, from_code, to_code): installed_languages = argostranslate.translate.get_installed_languages() from_lang = next((lang for lang in installed_languages if lang.code == from_code), None) to_lang = next((lang for lang in installed_languages if lang.code == to_code), None) if not from_lang or not to_lang: logger.error("Translation languages not installed.") return "Translation error" translation = from_lang.get_translation(to_lang) return translation.translate(text) def answer_question(question, input_language, output_language, db): try: input_lang_code = LANGUAGE_ISO_CODES[input_language] output_lang_code = LANGUAGE_ISO_CODES[output_language] question_in_english = translate_text(question, input_lang_code, "en") if input_language != "English" else question # Simplified retrieval and response logic for demonstration response = "This is a simulated response based on the question." result_in_target_language = translate_text(response, "en", output_lang_code) if output_language != "English" else response return result_in_target_language except Exception as e: logger.error(f"Error in answer_question: {str(e)}") return "An error occurred while processing your question." def document_analysis_and_feedback(document_path, feedback): # Placeholder for document analysis logic response = "Document analysis and feedback processing is not fully implemented." return response def setup_gradio_interface(db): with gr.Blocks() as demo: gr.Markdown("# Multilingual Health and Wellness Chatbot") with gr.Tab("Ask a Question"): with gr.Row(): input_language = gr.Dropdown(list(LANGUAGE_ISO_CODES.keys()), label="Input Language") output_language = gr.Dropdown(list(LANGUAGE_ISO_CODES.keys()), label="Output Language") question = gr.Textbox(label="Your question") answer = gr.Textbox(label="Answer") question.submit(lambda q, i, o: answer_question(q, i, o, db), inputs=[question, input_language, output_language], outputs=answer) with gr.Tab("Upload Document"): with gr.Row(): document = gr.File(label="Upload your health document") feedback_box = gr.Textbox(label="Feedback (optional)") upload_response = gr.Textbox(label="Analysis Result") document.submit(document_analysis_and_feedback, inputs=[document, feedback_box], outputs=upload_response) return demo def main(): INPUT_FILE_PATH = "sample-text.txt" # Placeholder file path texts, embeddings = initialize_documents_and_embeddings(INPUT_FILE_PATH) db = initialize_database(texts, embeddings) demo = setup_gradio_interface(db) demo.launch() if __name__ == "__main__": main()