commit files to HF hub
Browse files- .gitattributes +1 -0
- README.md +12 -0
- args_2021-07-30-22-56-56.yaml +26 -0
- best-states-dev.ckpt +3 -0
- config_2021-07-30-22-56-56.yaml +34 -0
- events.out.tfevents.1627678623.vorace +3 -0
- hub_repo +1 -0
- model.ckpt +3 -0
- model.py +39 -0
.gitattributes
CHANGED
@@ -15,3 +15,4 @@
|
|
15 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
17 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
15 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
17 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: superb
|
3 |
+
benchmark: superb
|
4 |
+
task: sd
|
5 |
+
datasets:
|
6 |
+
- superb
|
7 |
+
tags:
|
8 |
+
- speaker-diarization
|
9 |
+
- osanseviero/hubert_base
|
10 |
+
---
|
11 |
+
|
12 |
+
# Fine-tuned s3prl model for SD
|
args_2021-07-30-22-56-56.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
auto_resume: false
|
2 |
+
backend: nccl
|
3 |
+
cache_dir: null
|
4 |
+
config: ./downstream/diarization/config.yaml
|
5 |
+
device: cuda
|
6 |
+
downstream: diarization
|
7 |
+
downstream_variant: null
|
8 |
+
evaluate_split: test
|
9 |
+
expdir: result/downstream/sd-push-to-hub
|
10 |
+
expname: sd-push-to-hub
|
11 |
+
from_hf_hub: true
|
12 |
+
hub: huggingface
|
13 |
+
init_ckpt: null
|
14 |
+
local_rank: null
|
15 |
+
mode: train
|
16 |
+
override: config.downstream_expert.loaderrc.train_dir='/data/lewis/superb/Libri2Mix/train',,config.downstream_expert.loaderrc.dev_dir='/data/lewis/superb/Libri2Mix/dev',,config.downstream_expert.loaderrc.test_dir='/data/lewis/superb/Libri2Mix/test',,config.runner.total_steps=2000
|
17 |
+
past_exp: null
|
18 |
+
seed: 1337
|
19 |
+
upstream: osanseviero/hubert_base
|
20 |
+
upstream_ckpt: null
|
21 |
+
upstream_feature_selection: hidden_states
|
22 |
+
upstream_model_config: null
|
23 |
+
upstream_model_name: model.pt
|
24 |
+
upstream_refresh: false
|
25 |
+
upstream_trainable: false
|
26 |
+
verbose: false
|
best-states-dev.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dd37726e49314bea4ef308eb079de842b4400d803777b6c5c47fb9dda3975d3
|
3 |
+
size 31526636
|
config_2021-07-30-22-56-56.yaml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
downstream_expert:
|
2 |
+
datarc:
|
3 |
+
chunk_size: 2000
|
4 |
+
frame_shift: 160
|
5 |
+
label_delay: 0
|
6 |
+
num_speakers: 2
|
7 |
+
rate: 16000
|
8 |
+
subsampling: 1
|
9 |
+
loaderrc:
|
10 |
+
dev_dir: /data/lewis/superb/Libri2Mix/dev
|
11 |
+
eval_batchsize: 1
|
12 |
+
num_workers: 8
|
13 |
+
test_dir: /data/lewis/superb/Libri2Mix/test
|
14 |
+
train_batchsize: 8
|
15 |
+
train_dir: /data/lewis/superb/Libri2Mix/train
|
16 |
+
modelrc:
|
17 |
+
hidden_size: 512
|
18 |
+
rnn_layers: 1
|
19 |
+
scorerc:
|
20 |
+
save_predictions: true
|
21 |
+
optimizer:
|
22 |
+
lr: 0.0001
|
23 |
+
name: TorchOptim
|
24 |
+
torch_optim_name: Adam
|
25 |
+
runner:
|
26 |
+
eval_dataloaders:
|
27 |
+
- dev
|
28 |
+
eval_step: 500
|
29 |
+
gradient_accumulate_steps: 4
|
30 |
+
gradient_clipping: 1
|
31 |
+
log_step: 500
|
32 |
+
max_keep: 1
|
33 |
+
save_step: 500
|
34 |
+
total_steps: 2000
|
events.out.tfevents.1627678623.vorace
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fb8837a8aa7b3e84a8af54ef3adab01f3b9a8ea91982bc931f8834f931fc8ea
|
3 |
+
size 748
|
hub_repo
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 6adc9cb4f0b6f1dc428b178979c526a94f353494
|
model.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dd37726e49314bea4ef308eb079de842b4400d803777b6c5c47fb9dda3975d3
|
3 |
+
size 31526636
|
model.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from s3prl.downstream.runner import Runner
|
2 |
+
from typing import Dict
|
3 |
+
import torch
|
4 |
+
import os
|
5 |
+
|
6 |
+
|
7 |
+
class PreTrainedModel(Runner):
|
8 |
+
def __init__(self, path=""):
|
9 |
+
"""
|
10 |
+
Initialize downstream model.
|
11 |
+
"""
|
12 |
+
ckp_file = os.path.join(path, "model.ckpt")
|
13 |
+
ckp = torch.load(ckp_file, map_location='cpu')
|
14 |
+
ckp["Args"].init_ckpt = ckp_file
|
15 |
+
ckp["Args"].mode = "inference"
|
16 |
+
ckp["Args"].device = "cpu"
|
17 |
+
|
18 |
+
Runner.__init__(self, ckp["Args"], ckp["Config"])
|
19 |
+
|
20 |
+
def __call__(self, inputs)-> Dict[str, str]:
|
21 |
+
"""
|
22 |
+
Args:
|
23 |
+
inputs (:obj:`np.array`):
|
24 |
+
The raw waveform of audio received. By default at 16KHz.
|
25 |
+
Return:
|
26 |
+
A :obj:`dict`:. The object should return a dictionary like
|
27 |
+
{"frames": "XXX"} which contains the frames where one, both, or none
|
28 |
+
of the speakers are speaking.
|
29 |
+
"""
|
30 |
+
for entry in self.all_entries:
|
31 |
+
entry.model.eval()
|
32 |
+
|
33 |
+
inputs = [torch.FloatTensor(inputs)]
|
34 |
+
|
35 |
+
with torch.no_grad():
|
36 |
+
features = self.upstream.model(inputs)
|
37 |
+
features = self.featurizer.model(inputs, features)
|
38 |
+
preds = self.downstream.model.inference(features, [])
|
39 |
+
return {"frames": preds[0]}
|