|
import gradio as gr |
|
import pandas as pd |
|
import faiss |
|
import numpy as np |
|
import os |
|
from FlagEmbedding import BGEM3FlagModel |
|
from io import BytesIO |
|
|
|
|
|
model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True) |
|
|
|
|
|
df = pd.read_json('White-Stride-Red-68.json') |
|
df['embeding_context'] = df['embeding_context'].astype(str).fillna('') |
|
|
|
|
|
df = df[df['embeding_context'] != ''] |
|
|
|
|
|
index = faiss.read_index('vector_store_bge_m3.index') |
|
|
|
|
|
def search_query(query_text): |
|
num_records = 50 |
|
|
|
|
|
embeddings_query = model.encode([query_text], batch_size=12, max_length=1024)['dense_vecs'] |
|
embeddings_query_np = np.array(embeddings_query).astype('float32') |
|
|
|
|
|
distances, indices = index.search(embeddings_query_np, num_records) |
|
|
|
|
|
result_df = df.iloc[indices[0]].drop(columns=['embeding_context']).drop_duplicates().reset_index(drop=True) |
|
|
|
return result_df |
|
|
|
|
|
def gradio_interface(query_text): |
|
search_results = search_query(query_text) |
|
|
|
|
|
output = BytesIO() |
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
search_results.to_excel(writer, index=False) |
|
excel_data = output.getvalue() |
|
|
|
|
|
return search_results, gr.update(value=excel_data) |
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("<h1>White Stride Red Search (BEG-M3)</h1>") |
|
|
|
|
|
search_input = gr.Textbox(label="Search Query", placeholder="Enter search text", interactive=True) |
|
|
|
|
|
search_button = gr.Button("Search") |
|
|
|
|
|
search_output = gr.DataFrame(label="Search Results") |
|
|
|
|
|
download_button = gr.DownloadButton(label="Download Excel", file_name="search_results.xlsx") |
|
|
|
|
|
search_button.click(fn=gradio_interface, inputs=search_input, outputs=[search_output, download_button]) |
|
|
|
|
|
app.launch() |
|
|