Delete audio2text
Browse files- audio2text/a2t.py +0 -43
- audio2text/init.py +0 -16
audio2text/a2t.py
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import librosa
|
3 |
-
import io
|
4 |
-
|
5 |
-
from .init import pipe
|
6 |
-
|
7 |
-
TASK = "transcribe"
|
8 |
-
BATCH_SIZE = 8
|
9 |
-
|
10 |
-
class A2T:
|
11 |
-
def __init__(self, mic):
|
12 |
-
self.mic = mic
|
13 |
-
|
14 |
-
def __generate_text(self, inputs, task: str = None):
|
15 |
-
if inputs is None:
|
16 |
-
raise Exception("Inputs is None")
|
17 |
-
|
18 |
-
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
19 |
-
return transcribed_text
|
20 |
-
|
21 |
-
def __preprocces(self, raw: bytes):
|
22 |
-
print(f"Raw type : {type(raw)}")
|
23 |
-
chunk = io.BytesIO(raw)
|
24 |
-
audio, sample_rate = librosa.load(chunk, sr=16000)
|
25 |
-
print(f"Sample rate : {sample_rate}")
|
26 |
-
return audio
|
27 |
-
|
28 |
-
def predict(self):
|
29 |
-
try:
|
30 |
-
if self.mic is not None:
|
31 |
-
raw = self.mic
|
32 |
-
audio = self.__preprocces(raw=raw)
|
33 |
-
print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
|
34 |
-
else:
|
35 |
-
raise Exception("please provide audio")
|
36 |
-
|
37 |
-
if isinstance(audio , np.ndarray):
|
38 |
-
return self.__generate_text(inputs=audio, task=TASK)
|
39 |
-
else:
|
40 |
-
raise Exception("Audio is not np array")
|
41 |
-
|
42 |
-
except Exception as e:
|
43 |
-
return f"Oops some kinda error : {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audio2text/init.py
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
from transformers import pipeline
|
4 |
-
|
5 |
-
# ArticMonkey:19.03.24:1700 example of version name in plaintext will be convert into hex using this site -> https://magictool.ai/tool/text-to-hex-converter/
|
6 |
-
# Here ArticMonkey is name of version and rest of all is data and time
|
7 |
-
|
8 |
-
device = 0 if torch.cuda.is_available() else "cpu"
|
9 |
-
|
10 |
-
checkpoint = "openai/whisper-medium"
|
11 |
-
pipe = pipeline(
|
12 |
-
"automatic-speech-recognition",
|
13 |
-
model=checkpoint,
|
14 |
-
device=device,
|
15 |
-
chunk_length_s=30,
|
16 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|