CineAI commited on
Commit
aa955a3
·
verified ·
1 Parent(s): 4d6b36a

Delete audio2text

Browse files
Files changed (2) hide show
  1. audio2text/a2t.py +0 -43
  2. audio2text/init.py +0 -16
audio2text/a2t.py DELETED
@@ -1,43 +0,0 @@
1
- import numpy as np
2
- import librosa
3
- import io
4
-
5
- from .init import pipe
6
-
7
- TASK = "transcribe"
8
- BATCH_SIZE = 8
9
-
10
- class A2T:
11
- def __init__(self, mic):
12
- self.mic = mic
13
-
14
- def __generate_text(self, inputs, task: str = None):
15
- if inputs is None:
16
- raise Exception("Inputs is None")
17
-
18
- transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
19
- return transcribed_text
20
-
21
- def __preprocces(self, raw: bytes):
22
- print(f"Raw type : {type(raw)}")
23
- chunk = io.BytesIO(raw)
24
- audio, sample_rate = librosa.load(chunk, sr=16000)
25
- print(f"Sample rate : {sample_rate}")
26
- return audio
27
-
28
- def predict(self):
29
- try:
30
- if self.mic is not None:
31
- raw = self.mic
32
- audio = self.__preprocces(raw=raw)
33
- print(f"audio type : {type(audio)} \n shape : {audio.shape} \n audio max value : {np.max(audio)}")
34
- else:
35
- raise Exception("please provide audio")
36
-
37
- if isinstance(audio , np.ndarray):
38
- return self.__generate_text(inputs=audio, task=TASK)
39
- else:
40
- raise Exception("Audio is not np array")
41
-
42
- except Exception as e:
43
- return f"Oops some kinda error : {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
audio2text/init.py DELETED
@@ -1,16 +0,0 @@
1
- import torch
2
-
3
- from transformers import pipeline
4
-
5
- # ArticMonkey:19.03.24:1700 example of version name in plaintext will be convert into hex using this site -> https://magictool.ai/tool/text-to-hex-converter/
6
- # Here ArticMonkey is name of version and rest of all is data and time
7
-
8
- device = 0 if torch.cuda.is_available() else "cpu"
9
-
10
- checkpoint = "openai/whisper-medium"
11
- pipe = pipeline(
12
- "automatic-speech-recognition",
13
- model=checkpoint,
14
- device=device,
15
- chunk_length_s=30,
16
- )