|
from datasets import load_from_disk, load_dataset |
|
import pandas as pd |
|
import os |
|
import gradio as gr |
|
|
|
|
|
ds_with_embeddings = load_dataset("svjack/context-dialogue-generate-ds-zh-v1", split="train") |
|
ds_with_embeddings.add_faiss_index(column='L_emb') |
|
from sentence_transformers import SentenceTransformer |
|
encoder = SentenceTransformer("sentence-transformers/LaBSE") |
|
|
|
|
|
def retrieve_search_df(question = "今天天气怎么样?", top_k = 10): |
|
question_embedding = encoder.encode(question) |
|
scores, retrieved_examples = ds_with_embeddings.get_nearest_examples('L_emb', question_embedding, k=top_k) |
|
sdf = pd.DataFrame(retrieved_examples) |
|
sdf["scores"] = scores |
|
return sdf[["sent", "dialogue", "scores"]] |
|
|
|
example_sample = [ |
|
["有哪些有名的显卡?", 3], |
|
["今天天气怎么样?", 3], |
|
|
|
] |
|
|
|
def demo_func(prefix, max_length): |
|
max_length = max(int(max_length), 3) |
|
l = retrieve_search_df(prefix, max_length)[["sent" ,"dialogue"]].values.tolist() |
|
assert type(l) == type([]) |
|
return { |
|
"Dialogue Context": l |
|
} |
|
|
|
demo = gr.Interface( |
|
fn=demo_func, |
|
inputs=[gr.Text(label = "Prefix"), |
|
gr.Number(label = "Top K", value = 10) |
|
], |
|
outputs="json", |
|
title=f"Chinese Context Dialogue Generator 🐰 sample search demonstration", |
|
|
|
description = 'This _example_ was **drive** from <br/><b><h4>[https://github.com/svjack/Context2Dialogue](https://github.com/svjack/Context2Dialogue)</h4></b>\n', |
|
examples=example_sample if example_sample else None, |
|
cache_examples = False |
|
) |
|
|
|
demo.launch(server_name=None, server_port=None) |
|
|