simonschoe commited on
Commit
c00a75f
β€’
1 Parent(s): 64ab470

update app layout

Browse files
Files changed (2) hide show
  1. README.md +3 -3
  2. app.py +36 -30
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Call2vec
3
- emoji: πŸŒ–
4
  colorFrom: blue
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.0.24
8
  app_file: app.py
 
1
  ---
2
+ title: Call2Vec
3
+ emoji: πŸ“²πŸ“Š
4
  colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.0.24
8
  app_file: app.py
app.py CHANGED
@@ -10,43 +10,40 @@ from gensim.models.fasttext import load_facebook_model
10
 
11
  ACCESS_KEY = os.environ.get('ACCESS_KEY')
12
 
13
- # download model from huggingface hub
 
14
  url = hf_hub_url(repo_id="simonschoe/call2vec", filename="model.bin")
15
  cached_download(url)
16
-
17
- # load model via gensim
18
  model = load_facebook_model(cached_download(url))
19
 
20
- def process(_input, topn):
 
21
 
22
- # split by delimiting characters
23
  _input = re.split('[,;\n]', _input)
24
-
25
- # convert input to lower, replace whitespaces by underscores
26
  _input = [s.strip().lower().replace(' ', '_') for s in _input if s]
27
 
28
  if _input[0] != ACCESS_KEY:
29
  with open('log.txt', 'a') as f:
30
  f.write(str(datetime.now()) + '+++' + '___'.join(_input) + '\n')
31
 
32
- # apply model
33
- if len(_input)>1:
34
- # compute average seed embedding
35
  avg_input = np.stack([model.wv[w] for w in _input], axis=0).mean(axis=0)
36
- # find similarities
37
- nearest_neighbors = model.wv.most_similar(positive=avg_input, topn=topn)
38
- frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbors]
39
  else:
40
- # find similarities
41
- nearest_neighbors = model.wv.most_similar(positive=_input[0], topn=topn)
42
- frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbors]
43
-
44
- result = pd.DataFrame([(a[0],a[1],b) for a,b in zip(nearest_neighbors, frequencies)], columns=['Token', 'Cosine Similarity', 'Frequency'])
45
  if _input[0] == ACCESS_KEY:
46
  with open('log.txt', 'r') as f:
47
  prompts = f.readlines()
48
  prompts = [p.strip().split('+++') for p in prompts]
49
- result = pd.DataFrame(prompts, columns=['Time', 'Prompt'])
 
 
 
 
 
50
  result.to_csv('result.csv')
51
  return result, 'result.csv', '\n'.join(_input)
52
 
@@ -58,14 +55,18 @@ demo = gr.Blocks()
58
 
59
  with demo:
60
  gr.Markdown("# Call2Vec")
61
- gr.Markdown("## Earnings call transformation project")
62
  with gr.Row():
63
  with gr.Column():
64
- gr.Markdown("""
 
65
  #### Project Description
66
- Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.""")
 
 
67
  gr.Markdown(
68
- """#### App usage:
 
69
  Add your input prompts to the text field on the right. To use multiple input prompts at once separate
70
  them by comma, semicolon or a new line
71
  ##### Examples
@@ -74,13 +75,18 @@ with demo:
74
  """
75
  )
76
  with gr.Column():
77
- text_input = gr.Textbox(lines=1)
78
  with gr.Row():
79
- n_output = gr.Slider(minimum=5, maximum=50, step=1)
80
- compute_button = gr.Button("Compute")
81
- df_output = gr.Dataframe(interactive=False)
82
- file_out = gr.File(interactive=False)
83
-
84
- compute_button.click(process, inputs=[text_input, n_output], outputs=[df_output, file_out, text_input])
 
 
 
 
 
85
 
86
  demo.launch()
 
10
 
11
  ACCESS_KEY = os.environ.get('ACCESS_KEY')
12
 
13
+
14
+ # Setup model
15
  url = hf_hub_url(repo_id="simonschoe/call2vec", filename="model.bin")
16
  cached_download(url)
 
 
17
  model = load_facebook_model(cached_download(url))
18
 
19
+ def semantic_search(_input, n):
20
+ """ Perform semantic search """
21
 
 
22
  _input = re.split('[,;\n]', _input)
 
 
23
  _input = [s.strip().lower().replace(' ', '_') for s in _input if s]
24
 
25
  if _input[0] != ACCESS_KEY:
26
  with open('log.txt', 'a') as f:
27
  f.write(str(datetime.now()) + '+++' + '___'.join(_input) + '\n')
28
 
29
+ if len(_input) > 1:
 
 
30
  avg_input = np.stack([model.wv[w] for w in _input], axis=0).mean(axis=0)
31
+ nearest_neighbours = model.wv.most_similar(positive=avg_input, topn=n)
32
+ frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbours]
 
33
  else:
34
+ nearest_neighbours = model.wv.most_similar(positive=_input[0], topn=n)
35
+ frequencies = [model.wv.get_vecattr(nn[0], 'count') for nn in nearest_neighbours]
36
+
 
 
37
  if _input[0] == ACCESS_KEY:
38
  with open('log.txt', 'r') as f:
39
  prompts = f.readlines()
40
  prompts = [p.strip().split('+++') for p in prompts]
41
+ result = pd.DataFrame(prompts,
42
+ columns=['Time', 'Prompt'])
43
+ else:
44
+ result = pd.DataFrame([(a[0],a[1],b) for a,b in zip(nearest_neighbours, frequencies)],
45
+ columns=['Token', 'Cosine Similarity', 'Corpus Frequency'])
46
+
47
  result.to_csv('result.csv')
48
  return result, 'result.csv', '\n'.join(_input)
49
 
 
55
 
56
  with demo:
57
  gr.Markdown("# Call2Vec")
58
+ gr.Markdown("## Subtitle")
59
  with gr.Row():
60
  with gr.Column():
61
+ gr.Markdown(
62
+ """
63
  #### Project Description
64
+ Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
65
+ """
66
+ )
67
  gr.Markdown(
68
+ """
69
+ #### App usage:
70
  Add your input prompts to the text field on the right. To use multiple input prompts at once separate
71
  them by comma, semicolon or a new line
72
  ##### Examples
 
75
  """
76
  )
77
  with gr.Column():
78
+ text_in = gr.Textbox(lines=1)
79
  with gr.Row():
80
+ n = gr.Slider(minimum=5, maximum=250, step=5)
81
+ compute_bt = gr.Button("Compute")
82
+ df_out = gr.Dataframe(interactive=False)
83
+ f_out = gr.File(interactive=False)
84
+ gr.Markdown(
85
+ """
86
+ <div style='text-align: center;'>Call2Vec by X and Y</center></div>
87
+ ![visitors](https://visitor-badge.glitch.me/badge?page_id=simonschoe.call2vec&left_color=green&right_color=red)
88
+ """
89
+ )
90
+ compute_bt.click(semantic_search, inputs=[text_in, n], outputs=[df_out, f_out, text_in])
91
 
92
  demo.launch()