File size: 2,483 Bytes
94e37d8
 
 
 
8a65341
 
 
835eaac
94e37d8
 
c0cca0a
94e37d8
8a65341
 
 
 
 
 
49829de
8a65341
835eaac
 
8a65341
 
c0cca0a
 
8a65341
9e89ce1
 
 
 
8a65341
 
 
 
 
 
 
 
 
 
 
b029c51
 
 
 
 
 
 
 
 
 
8a65341
 
b03e1fb
 
 
 
 
 
 
 
 
 
 
 
835eaac
 
 
 
 
8a65341
 
835eaac
 
 
8a65341
 
 
 
94e37d8
8a65341
 
 
94e37d8
8a65341
94e37d8
8a65341
94e37d8
8a65341
 
94e37d8
8a65341
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
from flask import Flask, jsonify, request
import requests
import streamlit as st
import time


model = whisper.load_model("small")

app = Flask(__name__)

@app.route("/")
def indexApi():
    return jsonify({"output": "okay"})

@app.route("/run", methods=['POST'])
def runApi():
    start_time = time.time()

    audio_url = request.form.get("audio_url")
    # key = request.form.get("key")
    # modelSelection = request.form.get("model")
    # print(audio_url)

    # if (modelSelection == None):
    #     modelSelection = "small"
    # model = whisper.load_model(modelSelection)
    # print(model)

    # # reject if key not the same 
    # apiKey = st.secrets["Api-Key"]
    # if apiKey != key:
    #     return jsonify({
    #         "image_url": image_url,
    #         "model": model,
    #         "result": "Invalid API Key",
    #     }), 400


    response = requests.get(audio_url)

    if response.status_code == requests.codes.ok:
        with open("audio.mp3", "wb") as f:
            f.write(response.content)
      
    else:
        return jsonify({
            "result": "Unable to save file, status code:  {response.status_code}" ,
        }), 400

    # arr = np.asarray(bytearray(response.content), dtype=np.uint8)
    # result = model.transcribe("audio.mp3")
    audio = "audio.mp3"

    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)
    
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    _, probs = model.detect_language(mel)
    
    options = whisper.DecodingOptions(fp16 = False)
    result = whisper.decode(model, mel, options)


    end_time = time.time()
    total_time = end_time - start_time

    return jsonify({
        "audio_url": audio_url,
        # "model": model,
        "result": result.text,
        "exec_time_sec": total_time
    })

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)
        
# def inference(audio):
#     audio = whisper.load_audio(audio)
#     audio = whisper.pad_or_trim(audio)
    
#     mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
#     _, probs = model.detect_language(mel)
    
#     options = whisper.DecodingOptions(fp16 = False)
#     result = whisper.decode(model, mel, options)
    
#     # print(result.text)
#     return result.text, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)