import requests import gradio as gr from ragatouille import RAGPretrainedModel import logging from pathlib import Path from time import perf_counter from sentence_transformers import CrossEncoder from huggingface_hub import InferenceClient from jinja2 import Environment, FileSystemLoader import numpy as np from os import getenv from backend.query_llm import generate_hf, generate_openai from backend.semantic_search import table, retriever from huggingface_hub import InferenceClient # Bhashini API translation function api_key = getenv('API_KEY') user_id = getenv('USER_ID') def bhashini_translate(text: str, from_code: str = "en", to_code: str = "hi") -> dict: """Translates text from source language to target language using the Bhashini API.""" if not text.strip(): print('Input text is empty. Please provide valid text for translation.') return {"status_code": 400, "message": "Input text is empty", "translated_content": None, "speech_content": None} else: print('Input text - ',text) print(f'Starting translation process from {from_code} to {to_code}...') print(f'Starting translation process from {from_code} to {to_code}...') gr.Warning(f'Translating to {to_code}...') url = 'https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline' headers = { "Content-Type": "application/json", "userID": user_id, "ulcaApiKey": api_key } payload = { "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}}}], "pipelineRequestConfig": {"pipelineId": "64392f96daac500b55c543cd"} } print('Sending initial request to get the pipeline...') response = requests.post(url, json=payload, headers=headers) if response.status_code != 200: print(f'Error in initial request: {response.status_code}') return {"status_code": response.status_code, "message": "Error in translation request", "translated_content": None} print('Initial request successful, processing response...') response_data = response.json() service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"] callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"] print(f'Service ID: {service_id}, Callback URL: {callback_url}') headers2 = { "Content-Type": "application/json", response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"] } compute_payload = { "pipelineTasks": [{"taskType": "translation", "config": {"language": {"sourceLanguage": from_code, "targetLanguage": to_code}, "serviceId": service_id}}], "inputData": {"input": [{"source": text}], "audio": [{"audioContent": None}]} } print(f'Sending translation request with text: "{text}"') compute_response = requests.post(callback_url, json=compute_payload, headers=headers2) if compute_response.status_code != 200: print(f'Error in translation request: {compute_response.status_code}') return {"status_code": compute_response.status_code, "message": "Error in translation", "translated_content": None} print('Translation request successful, processing translation...') compute_response_data = compute_response.json() translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"] print(f'Translation successful. Translated content: "{translated_content}"') return {"status_code": 200, "message": "Translation successful", "translated_content": translated_content} # Existing chatbot functions VECTOR_COLUMN_NAME = "vector" TEXT_COLUMN_NAME = "text" HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN") proj_dir = Path(__file__).parent logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=HF_TOKEN) env = Environment(loader=FileSystemLoader(proj_dir / 'templates')) template = env.get_template('template.j2') template_html = env.get_template('template_html.j2') def add_text(history, text): history = [] if history is None else history history = history + [(text, None)] return history, gr.Textbox(value="", interactive=False) def bot(history, cross_encoder): top_rerank = 25 top_k_rank = 20 query = history[-1][0] if not query: gr.Warning("Please submit a non-empty string as a prompt") raise ValueError("Empty string was submitted") logger.warning('Retrieving documents...') if cross_encoder == '(HIGH ACCURATE) ColBERT': gr.Warning('Retrieving using ColBERT.. First time query will take a minute for model to load..pls wait') RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") RAG_db = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index') documents_full = RAG_db.search(query, k=top_k_rank) documents = [item['content'] for item in documents_full] prompt = template.render(documents=documents, query=query) prompt_html = template_html.render(documents=documents, query=query) generate_fn = generate_hf history[-1][1] = "" for character in generate_fn(prompt, history[:-1]): history[-1][1] = character yield history, prompt_html else: document_start = perf_counter() query_vec = retriever.encode(query) doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank) documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list() documents = [doc[TEXT_COLUMN_NAME] for doc in documents] query_doc_pair = [[query, doc] for doc in documents] if cross_encoder == '(FAST) MiniLM-L6v2': cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') elif cross_encoder == '(ACCURATE) BGE reranker': cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base') cross_scores = cross_encoder1.predict(query_doc_pair) sim_scores_argsort = list(reversed(np.argsort(cross_scores))) documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]] document_time = perf_counter() - document_start prompt = template.render(documents=documents, query=query) prompt_html = template_html.render(documents=documents, query=query) generate_fn = generate_hf history[-1][1] = "" for character in generate_fn(prompt, history[:-1]): history[-1][1] = character yield history, prompt_html def translate_text(response_text, selected_language): iso_language_codes = { "Hindi": "hi", "Gom": "gom", "Kannada": "kn", "Dogri": "doi", "Bodo": "brx", "Urdu": "ur", "Tamil": "ta", "Kashmiri": "ks", "Assamese": "as", "Bengali": "bn", "Marathi": "mr", "Sindhi": "sd", "Maithili": "mai", "Punjabi": "pa", "Malayalam": "ml", "Manipuri": "mni", "Telugu": "te", "Sanskrit": "sa", "Nepali": "ne", "Santali": "sat", "Gujarati": "gu", "Odia": "or" } to_code = iso_language_codes[selected_language] translation = bhashini_translate(response_text, to_code=to_code) return translation['translated_content'] # Gradio interface with gr.Blocks(theme='NoCrypt/miku') as CHATBOT: with gr.Row(): with gr.Column(scale=10): gr.HTML(value="""

ADWITIYA-

Custom Manual Chatbot and Quizbot

""") gr.HTML(value=f"""

Using GenAI for CBIC Capacity Building - A free chat bot developed by National Customs Targeting Center using Open source LLMs for CBIC Officers

""") gr.HTML(value=f"""

Developed by NCTC,Mumbai. Suggestions may be sent to nctc-admin@gov.in.

""") with gr.Column(scale=3): gr.Image(value='logo.png', height=200, width=200) chatbot = gr.Chatbot( [], elem_id="chatbot", avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg', 'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'), bubble_full_width=False, show_copy_button=True, show_share_button=True, ) with gr.Row(): txt = gr.Textbox( scale=3, show_label=False, placeholder="Enter text and press enter", container=False, ) txt_btn = gr.Button(value="Submit text", scale=1) cross_encoder = gr.Radio(choices=['(FAST) MiniLM-L6v2', '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'], value='(ACCURATE) BGE reranker', label="Embeddings", info="Only First query to Colbert may take little time)") language_dropdown = gr.Dropdown( choices=[ "Hindi", "Gom", "Kannada", "Dogri", "Bodo", "Urdu", "Tamil", "Kashmiri", "Assamese", "Bengali", "Marathi", "Sindhi", "Maithili", "Punjabi", "Malayalam", "Manipuri", "Telugu", "Sanskrit", "Nepali", "Santali", "Gujarati", "Odia" ], value="Hindi", # default to Hindi label="Select Language for Translation" ) prompt_html = gr.HTML() translated_textbox = gr.Textbox(label="Translated Response") # Click event handler for submit button txt_btn.click( add_text, [chatbot, txt], [chatbot, txt], queue=False ).then( bot, [chatbot, cross_encoder], [chatbot, prompt_html] ).then( lambda history: history[-1][1], [chatbot], translated_textbox # Extract chatbot output ).then( translate_text, [txt, language_dropdown], translated_textbox ) # Submit event handler for pressing Enter key txt.submit( add_text, [chatbot, txt], [chatbot, txt], queue=False ).then( bot, [chatbot, cross_encoder], [chatbot, prompt_html] ).then( lambda history: history[-1][1], [chatbot], translated_textbox # Extract chatbot output ).then( translate_text, [txt, language_dropdown], translated_textbox ) # txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( # bot, [chatbot, cross_encoder], [chatbot, prompt_html]).then( # translate_text, [txt, language_dropdown], translated_textbox # ) # txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( # bot, [chatbot, cross_encoder], [chatbot, prompt_html]).then( # translate_text, [txt, language_dropdown], translated_textbox # ) # Launch the Gradio application CHATBOT.launch(share=True) # from ragatouille import RAGPretrainedModel # import subprocess # import json # import spaces # import firebase_admin # from firebase_admin import credentials, firestore # import logging # from pathlib import Path # from time import perf_counter # from datetime import datetime # import gradio as gr # from jinja2 import Environment, FileSystemLoader # import numpy as np # from sentence_transformers import CrossEncoder # from huggingface_hub import InferenceClient # from os import getenv # from backend.query_llm import generate_hf, generate_openai # from backend.semantic_search import table, retriever # from huggingface_hub import InferenceClient # VECTOR_COLUMN_NAME = "vector" # TEXT_COLUMN_NAME = "text" # HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN") # proj_dir = Path(__file__).parent # # Setting up the logging # logging.basicConfig(level=logging.INFO) # logger = logging.getLogger(__name__) # client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1",token=HF_TOKEN) # # Set up the template environment with the templates directory # env = Environment(loader=FileSystemLoader(proj_dir / 'templates')) # # Load the templates directly from the environment # template = env.get_template('template.j2') # template_html = env.get_template('template_html.j2') # def add_text(history, text): # history = [] if history is None else history # history = history + [(text, None)] # return history, gr.Textbox(value="", interactive=False) # def bot(history, cross_encoder): # top_rerank = 25 # top_k_rank = 20 # query = history[-1][0] # if not query: # gr.Warning("Please submit a non-empty string as a prompt") # raise ValueError("Empty string was submitted") # logger.warning('Retrieving documents...') # # if COLBERT RAGATATOUILLE PROCEDURE : # if cross_encoder=='(HIGH ACCURATE) ColBERT': # gr.Warning('Retrieving using ColBERT.. First time query will take a minute for model to load..pls wait') # RAG= RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") # RAG_db=RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index') # documents_full=RAG_db.search(query,k=top_k_rank) # documents=[item['content'] for item in documents_full] # # Create Prompt # prompt = template.render(documents=documents, query=query) # prompt_html = template_html.render(documents=documents, query=query) # generate_fn = generate_hf # history[-1][1] = "" # for character in generate_fn(prompt, history[:-1]): # history[-1][1] = character # yield history, prompt_html # print('Final history is ',history) # #store_message(db,history[-1][0],history[-1][1],cross_encoder) # else: # # Retrieve documents relevant to query # document_start = perf_counter() # query_vec = retriever.encode(query) # logger.warning(f'Finished query vec') # doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank) # logger.warning(f'Finished search') # documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_rerank).to_list() # documents = [doc[TEXT_COLUMN_NAME] for doc in documents] # logger.warning(f'start cross encoder {len(documents)}') # # Retrieve documents relevant to query # query_doc_pair = [[query, doc] for doc in documents] # if cross_encoder=='(FAST) MiniLM-L6v2' : # cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') # elif cross_encoder=='(ACCURATE) BGE reranker': # cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base') # cross_scores = cross_encoder1.predict(query_doc_pair) # sim_scores_argsort = list(reversed(np.argsort(cross_scores))) # logger.warning(f'Finished cross encoder {len(documents)}') # documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]] # logger.warning(f'num documents {len(documents)}') # document_time = perf_counter() - document_start # logger.warning(f'Finished Retrieving documents in {round(document_time, 2)} seconds...') # # Create Prompt # prompt = template.render(documents=documents, query=query) # prompt_html = template_html.render(documents=documents, query=query) # generate_fn = generate_hf # history[-1][1] = "" # for character in generate_fn(prompt, history[:-1]): # history[-1][1] = character # yield history, prompt_html # print('Final history is ',history) # #store_message(db,history[-1][0],history[-1][1],cross_encoder) # # def system_instructions(question_difficulty, topic,documents_str): # # return f""" [INST] Your are a great teacher and your task is to create 10 questions with 4 choices with a {question_difficulty} difficulty about topic request " {topic} " only from the below given documents, {documents_str} then create an answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". [/INST]""" # RAG_db = gr.State() # # def load_model(): # # try: # # # Initialize the model # # RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") # # # Load the RAG database # # RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index') # # return 'Ready to Go!!' # # except Exception as e: # # return f"Error loading model: {e}" # # def generate_quiz(question_difficulty, topic): # # if not topic.strip(): # # return ['Please enter a valid topic.'] + [gr.Radio(visible=False) for _ in range(10)] # # top_k_rank = 10 # # # Load the model and database within the generate_quiz function # # try: # # RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") # # RAG_db_ = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index') # # gr.Warning('Model loaded!') # # except Exception as e: # # return [f"Error loading model: {e}"] + [gr.Radio(visible=False) for _ in range(10)] # # RAG_db_ = RAG_db.value # # documents_full = RAG_db_.search(topic, k=top_k_rank) # # generate_kwargs = dict( # # temperature=0.2, # # max_new_tokens=4000, # # top_p=0.95, # # repetition_penalty=1.0, # # do_sample=True, # # seed=42, # # ) # # question_radio_list = [] # # count = 0 # # while count <= 3: # # try: # # documents = [item['content'] for item in documents_full] # # document_summaries = [f"[DOCUMENT {i+1}]: {summary}{count}" for i, summary in enumerate(documents)] # # documents_str = '\n'.join(document_summaries) # # formatted_prompt = system_instructions(question_difficulty, topic, documents_str) # # pre_prompt = [ # # {"role": "system", "content": formatted_prompt} # # ] # # response = client.text_generation( # # formatted_prompt, **generate_kwargs, stream=False, details=False, return_full_text=False, # # ) # # output_json = json.loads(f"{response}") # # global quiz_data # # quiz_data = output_json # # for question_num in range(1, 11): # # question_key = f"Q{question_num}" # # answer_key = f"A{question_num}" # # question = quiz_data.get(question_key) # # answer = quiz_data.get(quiz_data.get(answer_key)) # # if not question or not answer: # # continue # # choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)] # # choice_list = [quiz_data.get(choice_key, "Choice not found") for choice_key in choice_keys] # # radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True) # # question_radio_list.append(radio) # # if len(question_radio_list) == 10: # # break # # else: # # count += 1 # # continue # # except Exception as e: # # count += 1 # # if count == 3: # # return ['Sorry. Pls try with another topic!'] + [gr.Radio(visible=False) for _ in range(10)] # # continue # # return ['Quiz Generated!'] + question_radio_list # # def compare_answers(*user_answers): # # user_answer_list = user_answers # # answers_list = [quiz_data.get(quiz_data.get(f"A{question_num}")) for question_num in range(1, 11)] # # score = sum(1 for answer in user_answer_list if answer in answers_list) # # if score > 7: # # message = f"### Excellent! You got {score} out of 10!" # # elif score > 5: # # message = f"### Good! You got {score} out of 10!" # # else: # # message = f"### You got {score} out of 10! Don’t worry, you can prepare well and try better next time!" # # return message # #with gr.Blocks(theme='Insuz/SimpleIndigo') as demo: # with gr.Blocks(theme='NoCrypt/miku') as CHATBOT: # with gr.Row(): # with gr.Column(scale=10): # # gr.Markdown( # # """ # # # Theme preview: `paris` # # To use this theme, set `theme='earneleh/paris'` in `gr.Blocks()` or `gr.Interface()`. # # You can append an `@` and a semantic version expression, e.g. @>=1.0.0,<2.0.0 to pin to a given version # # of this theme. # # """ # # ) # gr.HTML(value="""

ADWITIYA-

Custom Manual Chatbot and Quizbot

#
""", elem_id='heading') # gr.HTML(value=f""" #

# Using GenAI for CBIC Capacity Building - A free chat bot developed by National Customs Targeting Center using Open source LLMs for CBIC Officers #

# """, elem_id='Sub-heading') # #usage_count = get_and_increment_value_count(db,collection_name, field_name) # gr.HTML(value=f"""

Developed by NCTC,Mumbai . Suggestions may be sent to ramyadevi1607@yahoo.com.

""", elem_id='Sub-heading1 ') # with gr.Column(scale=3): # gr.Image(value='logo.png',height=200,width=200) # chatbot = gr.Chatbot( # [], # elem_id="chatbot", # avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg', # 'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'), # bubble_full_width=False, # show_copy_button=True, # show_share_button=True, # ) # with gr.Row(): # txt = gr.Textbox( # scale=3, # show_label=False, # placeholder="Enter text and press enter", # container=False, # ) # txt_btn = gr.Button(value="Submit text", scale=1) # cross_encoder = gr.Radio(choices=['(FAST) MiniLM-L6v2','(ACCURATE) BGE reranker','(HIGH ACCURATE) ColBERT'], value='(ACCURATE) BGE reranker',label="Embeddings", info="Only First query to Colbert may take litte time)") # prompt_html = gr.HTML() # # Turn off interactivity while generating if you click # txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( # bot, [chatbot, cross_encoder], [chatbot, prompt_html])#.then(update_count_html,[],[count_html]) # # Turn it back on # txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False) # # Turn off interactivity while generating if you hit enter # txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( # bot, [chatbot, cross_encoder], [chatbot, prompt_html])#.then(update_count_html,[],[count_html]) # # Turn it back on # txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False) # # Examples # gr.Examples(examples, txt) # # with gr.Blocks(title="Quiz Maker", theme=gr.themes.Default(primary_hue="green", secondary_hue="green"), css="style.css") as QUIZBOT: # # with gr.Column(scale=4): # # gr.HTML(""" # #
# #

ADWITIYA Customs Manual Quizbot

# #

Generative AI-powered Capacity building for Training Officers

# # ⚠️ NACIN Faculties create quiz from any topic dynamically for classroom evaluation after their sessions! ⚠️ # #
# # """) # # with gr.Column(scale=2): # # gr.HTML(""" # #
# #

Ready!

# #
# # """) # # # load_btn = gr.Button("Click to Load!🚀") # # # load_text = gr.Textbox() # # # load_btn.click(fn=load_model, outputs=load_text) # # topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual") # # with gr.Row(): # # radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?") # # generate_quiz_btn = gr.Button("Generate Quiz!🚀") # # quiz_msg = gr.Textbox() # # question_radios = [gr.Radio(visible=False) for _ in range(10)] # # generate_quiz_btn.click( # # fn=generate_quiz, # # inputs=[radio, topic], # # outputs=[quiz_msg] + question_radios # # ) # # check_button = gr.Button("Check Score") # # score_textbox = gr.Markdown() # # check_button.click( # # fn=compare_answers, # # inputs=question_radios, # # outputs=score_textbox # # ) # #demo = gr.TabbedInterface([CHATBOT, QUIZBOT], ["AI ChatBot", "AI Quizbot"]) # CHATBOT.queue() # CHATBOT.launch(debug=True)