tdnathmlenthusiast commited on
Commit
ced12f5
1 Parent(s): 81597a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -12
app.py CHANGED
@@ -1,27 +1,45 @@
1
  import gradio as gr
2
  import onnxruntime as rt
3
  from transformers import AutoTokenizer
4
- import torch, json
 
5
 
 
6
  tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
7
 
8
- with open("genre_types_encoded.json", "r") as fp:
9
- encode_genre_types = json.load(fp)
 
 
 
 
 
10
 
 
11
  genres = list(encode_genre_types.keys())
12
 
13
- inf_session = rt.InferenceSession('udemy-classifier-quantized.onnx')
14
- input_name = inf_session.get_inputs()[0].name
15
- output_name = inf_session.get_outputs()[0].name
 
 
 
 
 
16
 
 
17
  def classify_courses_genre(description):
18
- input_ids = tokenizer(description)['input_ids'][:512]
19
- logits = inf_session.run([output_name], {input_name: [input_ids]})[0]
20
- logits = torch.FloatTensor(logits)
21
- probs = torch.sigmoid(logits)[0]
22
- return dict(zip(genres, map(float, probs)))
23
 
 
24
  label = gr.outputs.Label(num_top_classes=5)
 
 
25
  iface = gr.Interface(fn=classify_courses_genre, inputs="text", outputs=label)
 
 
26
  iface.launch(inline=False)
27
-
 
1
  import gradio as gr
2
  import onnxruntime as rt
3
  from transformers import AutoTokenizer
4
+ import torch
5
+ import json
6
 
7
+ # Initialize the tokenizer
8
  tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
9
 
10
+ # Load genre types from a JSON file
11
+ try:
12
+ with open("genre_types_encoded.json", "r") as fp:
13
+ encode_genre_types = json.load(fp)
14
+ except FileNotFoundError:
15
+ print("Error: 'genre_types_encoded.json' not found. Make sure the file exists.")
16
+ exit(1)
17
 
18
+ # Extract genres from the loaded data
19
  genres = list(encode_genre_types.keys())
20
 
21
+ # Load the ONNX inference session
22
+ try:
23
+ inf_session = rt.InferenceSession('udemy-classifier-quantized.onnx')
24
+ input_name = inf_session.get_inputs()[0].name
25
+ output_name = inf_session.get_outputs()[0].name
26
+ except FileNotFoundError:
27
+ print("Error: 'udemy-classifier-quantized.onnx' not found. Make sure the file exists.")
28
+ exit(1)
29
 
30
+ # Define the function for classifying courses' genres
31
  def classify_courses_genre(description):
32
+ input_ids = tokenizer(description, truncation=True, padding=True, return_tensors="pt")['input_ids'][:,:512]
33
+ logits = inf_session.run([output_name], {input_name: input_ids.cpu().numpy()})[0]
34
+ logits = torch.FloatTensor(logits)
35
+ probs = torch.sigmoid(logits)[0]
36
+ return dict(zip(genres, map(float, probs)))
37
 
38
+ # Define the output label with the top 5 classes
39
  label = gr.outputs.Label(num_top_classes=5)
40
+
41
+ # Create the Gradio interface
42
  iface = gr.Interface(fn=classify_courses_genre, inputs="text", outputs=label)
43
+
44
+ # Launch the Gradio interface
45
  iface.launch(inline=False)