from flask import Flask, request, jsonify from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor import numpy as np import torch app = Flask(__name__) processor = Wav2Vec2Processor.from_pretrained("oyqiz/uzbek_stt") model = Wav2Vec2ForCTC.from_pretrained("oyqiz/uzbek_stt") SAMPLE_RATE = 16000 @app.route('/', methods=['GET']) def index(): return jsonify({"message": "Welcome to whisper uz!"}) @app.route('/transcribe', methods=['POST']) def transcribe(): data_frames = request.data audio_np = np.frombuffer(data_frames, dtype=np.int16) audio_np = audio_np / np.iinfo(np.int16).max inputs = processor(audio_np, sampling_rate=SAMPLE_RATE, return_tensors="pt") with torch.no_grad(): logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.decode(predicted_ids[0]) return transcription if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)