Abhay Mishra commited on
Commit
b9b440a
·
1 Parent(s): 1960024

add voice based queries

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. app.py +55 -18
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  venv/
2
  flagged/
3
  **/__pycache__/**
 
 
 
1
  venv/
2
  flagged/
3
  **/__pycache__/**
4
+ .ipynb_checkpoints/
5
+ .vscode/
app.py CHANGED
@@ -1,16 +1,30 @@
1
  from sentence_transformers import SentenceTransformer
2
  import pickle
3
  import numpy as np
 
4
  import torch
5
  import gradio as gr
6
 
7
- model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  with open("dep_course_title_to_content_embed.pickle", "rb") as handle:
10
  loaded_map = pickle.load(handle)
11
 
12
  dep_name_course_name = list(loaded_map.keys())
13
- deps = list(set([x for (x,y) in dep_name_course_name]))
14
  dep_to_course_name = {}
15
  dep_to_course_embedding = {}
16
 
@@ -21,30 +35,53 @@ for dep in deps:
21
  for (dep_name, course_name), embedding in loaded_map.items():
22
  # print(embedding.shape)
23
  dep_to_course_name[dep_name].append(course_name)
24
- dep_to_course_embedding[dep_name].append(np.array(embedding, dtype = np.float32))
25
 
26
  cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
27
 
28
- def give_best_match(query, Department):
 
29
  if not Department:
30
  Department = deps
31
  course_titles = []
32
  course_content_embeddings = []
33
  for dep in Department:
34
- course_titles += dep_to_course_name[dep]
35
  course_content_embeddings += dep_to_course_embedding[dep]
36
- course_content_embeddings = np.stack(course_content_embeddings)
 
 
 
37
  embed = model.encode(query)
38
- result = cos(torch.from_numpy(course_content_embeddings),torch.from_numpy(embed))
39
  indices = reversed(np.argsort(result))
40
- predictions = {course_titles[i] : float(result[i]) for i in indices}
41
- return predictions
42
-
43
- demo = gr.Interface(fn = give_best_match,
44
- inputs=[
45
- gr.Textbox(label="Describe the course",lines = 5, placeholder = "Type anything related to course/s\n\nTitle, Topics/Sub Topics, Refernce books, Questions asked in exams or some random fun stuff."),
46
- gr.CheckboxGroup(deps, label = "(Optional) Departments"),
47
- ],
48
- outputs=gr.Label(label = "Most Relevant Courses", num_top_classes=5)
49
- )
50
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from sentence_transformers import SentenceTransformer
2
  import pickle
3
  import numpy as np
4
+
5
  import torch
6
  import gradio as gr
7
 
8
+ import os
9
+ os.system("pip install git+https://github.com/openai/whisper.git")
10
+ import whisper
11
+
12
+
13
+ infer_model = whisper.load_model("tiny")
14
+
15
+
16
+ def infer(audio):
17
+ result = infer_model.transcribe(audio)
18
+ return result["text"]
19
+
20
+
21
+ model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
22
 
23
  with open("dep_course_title_to_content_embed.pickle", "rb") as handle:
24
  loaded_map = pickle.load(handle)
25
 
26
  dep_name_course_name = list(loaded_map.keys())
27
+ deps = list(set([x for (x, y) in dep_name_course_name]))
28
  dep_to_course_name = {}
29
  dep_to_course_embedding = {}
30
 
 
35
  for (dep_name, course_name), embedding in loaded_map.items():
36
  # print(embedding.shape)
37
  dep_to_course_name[dep_name].append(course_name)
38
+ dep_to_course_embedding[dep_name].append(np.array(embedding, dtype=np.float32))
39
 
40
  cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
41
 
42
+
43
+ def give_best_match(query, audio, Department):
44
  if not Department:
45
  Department = deps
46
  course_titles = []
47
  course_content_embeddings = []
48
  for dep in Department:
49
+ course_titles += dep_to_course_name[dep]
50
  course_content_embeddings += dep_to_course_embedding[dep]
51
+ course_content_embeddings = np.stack(course_content_embeddings)
52
+
53
+ if audio:
54
+ query = infer(audio)
55
  embed = model.encode(query)
56
+ result = cos(torch.from_numpy(course_content_embeddings), torch.from_numpy(embed))
57
  indices = reversed(np.argsort(result))
58
+ predictions = {course_titles[i]: float(result[i]) for i in indices}
59
+ return query, predictions
60
+
61
+
62
+ demo = gr.Interface(
63
+ fn=give_best_match,
64
+ inputs=[
65
+ gr.Textbox(
66
+ label="Describe the course",
67
+ lines=5,
68
+ placeholder="Type anything related to course/s\n\nTitle, Topics/Sub Topics, Refernce books, Questions asked in exams or some random fun stuff.",
69
+ ),
70
+ gr.Audio(source="microphone", type="filepath", label = "Don't want to type, Try Describing using your sweet voice!!", interactive= True),
71
+ gr.CheckboxGroup(deps, label="(Optional) Departments"),
72
+ ],
73
+ outputs=[
74
+ gr.Textbox(
75
+ label="Query",
76
+ lines=2,
77
+ ),
78
+ gr.Label(label="Most Relevant Courses", num_top_classes=5),
79
+ ],
80
+ )
81
+
82
+
83
+ # demo = gr.Interface(
84
+ # fn=infer, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text"
85
+ # )
86
+
87
+ demo.launch()