import gradio as gr import re import numpy as np from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceBgeEmbeddings import os HUGGINGFACEHUB_API_TOKEN = os.environ["token"] def clean_(l): s = list(l)[0][1] s = s.replace("\n", "=") return re.split('=', s, maxsplit=1)[-1].strip() def similarity_search2(vectordb, query, k, unique="True"): print(f"\nQuery Key: {query}, \nrows requested:{k}\nUnique values:{unique}") D = vectordb.similarity_search(query,k) temp = [] for d in D: temp.append(clean_(d)) del D if unique == "True": return str(np.unique(np.array(temp)))[1:-1] else: return str(np.array(temp))[1:-1] with gr.Blocks() as demo: gr.Markdown( """

Query Retrieval

""") with gr.Row(): with gr.Column(): query = gr.Textbox(placeholder="your query", label="Query") k = gr.Slider(10,100000,5, label="number of samples to check") unique = gr.Radio(["True", "False"], label="Return Unique values") with gr.Row(): btn = gr.Button("Submit") def mmt_query(query, k, unique): model_id = "BAAI/bge-large-en-v1.5" model_kwargs = {"device": "cpu"} embedding = HuggingFaceBgeEmbeddings( model_name = model_id, model_kwargs = model_kwargs, encode_kwargs = {'normalize_embeddings':True} ) persist_directory = "db_book_mmt" vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) return similarity_search2(vectordb, query, k, unique) with gr.Column(): output = gr.Textbox(scale=10, label="Output") btn.click(mmt_query, [query, k, unique], output) # demo.queue() demo.launch()