apenasissso commited on
Commit
1c732f7
·
1 Parent(s): be86be9

breakdown files and process

Browse files
Files changed (4) hide show
  1. .gitignore +5 -0
  2. handler.py +62 -13
  3. requirements.txt +2 -0
  4. test_handler.py +15 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ venv
2
+ __pycache__
3
+ .vscode
4
+ pretrained_models
5
+ *.mp3
handler.py CHANGED
@@ -1,24 +1,73 @@
1
  import logging
2
  from speechbrain.pretrained import EncoderClassifier
3
  from typing import Dict, List, Any
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  class EndpointHandler:
7
  def __init__(self, path=""):
8
- self.model = EncoderClassifier.from_hparams("speechbrain/lang-id-voxlingua107-ecapa")
9
- print('model loaded')
10
- logging.info('model loaded')
 
 
 
 
 
 
 
 
11
 
12
- def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
13
- inputs = data.pop("inputs",data)
14
 
15
- print('audio_url', inputs)
16
- logging.info(f'audio_url {inputs}')
 
 
17
 
18
- # run normal prediction
19
- output = self.model.classify_file(inputs)
20
- return {
21
- "prediction": float(output[1].exp()[0]),
22
- "language": output[3][0],
23
- }
 
24
 
 
 
1
  import logging
2
  from speechbrain.pretrained import EncoderClassifier
3
  from typing import Dict, List, Any
4
+ import requests
5
+ from pydub import AudioSegment
6
+ from io import BytesIO
7
+ import tempfile
8
+ import os
9
+
10
+
11
+ def save_chunks_to_temp_files(url, chunk_length=10000): # chunk_length in milliseconds
12
+ # Download the audio file from the URL
13
+ response = requests.get(url)
14
+ response.raise_for_status()
15
+
16
+ # Ensure the content type is audio
17
+ if "audio" not in response.headers["Content-Type"]:
18
+ raise ValueError("URL does not seem to be an audio file")
19
+
20
+ # Convert the downloaded bytes into a file-like object
21
+ audio_file = BytesIO(response.content)
22
+
23
+ # Load audio into an AudioSegment
24
+ audio_segment = AudioSegment.from_file(audio_file)
25
+
26
+ # Split audio into 10-second chunks
27
+ chunks = [
28
+ audio_segment[i : i + chunk_length]
29
+ for i in range(0, len(audio_segment), chunk_length)
30
+ ]
31
+
32
+ # Save each chunk to a temporary file and store file paths in a list
33
+ temp_files = []
34
+ for idx, chunk in enumerate(chunks):
35
+ with tempfile.NamedTemporaryFile(
36
+ delete=False, suffix=f"_chunk{idx}.mp3"
37
+ ) as temp_file:
38
+ chunk.export(temp_file.name, format="mp3")
39
+ temp_files.append(temp_file.name)
40
+
41
+ return temp_files
42
 
43
 
44
  class EndpointHandler:
45
  def __init__(self, path=""):
46
+ self.model = EncoderClassifier.from_hparams(
47
+ "speechbrain/lang-id-voxlingua107-ecapa"
48
+ )
49
+ print("model loaded")
50
+ logging.info("model loaded")
51
+
52
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
53
+ url = data.pop("inputs", data)
54
+
55
+ print("audio_url", url)
56
+ logging.info(f"audio_url {url}")
57
 
58
+ response = []
 
59
 
60
+ temp_filepaths = save_chunks_to_temp_files(url)
61
+ for i, path in enumerate(temp_filepaths):
62
+ logging.info(f"processing chunk {i} / {len(temp_filepaths)}")
63
+ output = self.model.classify_file(path)
64
 
65
+ response.append(
66
+ {
67
+ "prediction": float(output[1].exp()[0]),
68
+ "language": output[3][0],
69
+ }
70
+ )
71
+ os.remove(path)
72
 
73
+ return response
requirements.txt CHANGED
@@ -1 +1,3 @@
1
  speechbrain
 
 
 
1
  speechbrain
2
+ pydub
3
+ requests
test_handler.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from handler import EndpointHandler
2
+
3
+ # init handler
4
+ my_handler = EndpointHandler()
5
+
6
+ # prepare sample payload
7
+ holiday_payload = {
8
+ "inputs": "https://pl-bots-public-media.s3.amazonaws.com/5511976170855_daa87950-5e1b-49e0-9daf-ba73d568a291.mp3"
9
+ }
10
+
11
+ # test the handler
12
+ holiday_payload = my_handler(holiday_payload)
13
+
14
+ # show results
15
+ print("holiday_payload", holiday_payload)