Call2Vec / app.py
simonschoe's picture
update app layout
c00a75f
raw
history blame
3.78 kB
import gradio as gr
import numpy as np
import pandas as pd
from datetime import datetime
import os
import re
from huggingface_hub import hf_hub_url, cached_download
from gensim.models.fasttext import load_facebook_model
ACCESS_KEY = os.environ.get('ACCESS_KEY')
# Setup model
url = hf_hub_url(repo_id="simonschoe/call2vec", filename="model.bin")
cached_download(url)
model = load_facebook_model(cached_download(url))
def semantic_search(_input, n):
""" Perform semantic search """
_input = re.split('[,;\n]', _input)
_input = [s.strip().lower().replace(' ', '_') for s in _input if s]
if _input[0] != ACCESS_KEY:
with open('log.txt', 'a') as f:
f.write(str(datetime.now()) + '+++' + '___'.join(_input) + '\n')
if len(_input) > 1:
avg_input = np.stack([model.wv[w] for w in _input], axis=0).mean(axis=0)
nearest_neighbours = model.wv.most_similar(positive=avg_input, topn=n)
frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbours]
else:
nearest_neighbours = model.wv.most_similar(positive=_input[0], topn=n)
frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbours]
if _input[0] == ACCESS_KEY:
with open('log.txt', 'r') as f:
prompts = f.readlines()
prompts = [p.strip().split('+++') for p in prompts]
result = pd.DataFrame(prompts,
columns=['Time', 'Prompt'])
else:
result = pd.DataFrame([(a[0],a[1],b) for a,b in zip(nearest_neighbours, frequencies)],
columns=['Token', 'Cosine Similarity', 'Corpus Frequency'])
result.to_csv('result.csv')
return result, 'result.csv', '\n'.join(_input)
def save(df):
df.to_csv('result.csv')
return 'result.csv'
demo = gr.Blocks()
with demo:
gr.Markdown("# Call2Vec")
gr.Markdown("## Subtitle")
with gr.Row():
with gr.Column():
gr.Markdown(
"""
#### Project Description
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
"""
)
gr.Markdown(
"""
#### App usage:
Add your input prompts to the text field on the right. To use multiple input prompts at once separate
them by comma, semicolon or a new line
##### Examples
- Climate change
- Financial risk, energy dependency, climate neutrality
"""
)
with gr.Column():
text_in = gr.Textbox(lines=1)
with gr.Row():
n = gr.Slider(minimum=5, maximum=250, step=5)
compute_bt = gr.Button("Compute")
df_out = gr.Dataframe(interactive=False)
f_out = gr.File(interactive=False)
gr.Markdown(
"""
<div style='text-align: center;'>Call2Vec by X and Y</center></div>
![visitors](https://visitor-badge.glitch.me/badge?page_id=simonschoe.call2vec&left_color=green&right_color=red)
"""
)
compute_bt.click(semantic_search, inputs=[text_in, n], outputs=[df_out, f_out, text_in])
demo.launch()