leo19941227
commited on
Commit
β’
fa98f1c
1
Parent(s):
b7f22bb
Submission: update-template
Browse files
{{cookiecutter.repo_name}}/cli.py
CHANGED
@@ -1,31 +1,16 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
import subprocess
|
4 |
from pathlib import Path
|
5 |
|
6 |
-
|
7 |
-
import typer
|
8 |
|
9 |
SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
|
|
|
|
|
10 |
|
11 |
app = typer.Typer()
|
12 |
|
13 |
-
def _update_submission_name(submission_name: str):
|
14 |
-
replacement = ""
|
15 |
-
with open("README.md", "r") as f:
|
16 |
-
lines = f.readlines()
|
17 |
-
|
18 |
-
for line in lines:
|
19 |
-
if line.startswith("submission_name:"):
|
20 |
-
changes = re.sub(r"submission_name:.+", f"submission_name: {submission_name}", line)
|
21 |
-
replacement += changes
|
22 |
-
else:
|
23 |
-
replacement += line
|
24 |
-
|
25 |
-
with open("README.md", "w") as f:
|
26 |
-
f.write(replacement)
|
27 |
-
|
28 |
-
|
29 |
@app.command()
|
30 |
def validate():
|
31 |
# Check that all the expected files exist
|
@@ -33,24 +18,42 @@ def validate():
|
|
33 |
if not Path(file).is_file():
|
34 |
raise ValueError(f"File {file} not found! Please include {file} in your submission")
|
35 |
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
@app.command()
|
41 |
-
def submit(submission_name: str
|
42 |
subprocess.call("git pull origin main".split())
|
43 |
-
|
44 |
-
subprocess.call(["git", "add", "model.pt", "README.md"])
|
45 |
subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
|
46 |
subprocess.call(["git", "push"])
|
47 |
-
|
48 |
-
today = datetime.date.today()
|
49 |
-
# MON = 0, SUN = 6 -> SUN = 0 .. SAT = 6
|
50 |
-
idx = (today.weekday() + 1) % 7
|
51 |
-
sun = today + datetime.timedelta(7 - idx)
|
52 |
-
typer.echo("Submission successful! π π₯³ π")
|
53 |
-
typer.echo(f"Your submission will be evaulated on {sun:%A %d %B %Y} β³")
|
54 |
|
55 |
|
56 |
if __name__ == "__main__":
|
|
|
1 |
+
import typer
|
2 |
+
import torch
|
3 |
import subprocess
|
4 |
from pathlib import Path
|
5 |
|
6 |
+
from expert import UpstreamExpert
|
|
|
7 |
|
8 |
SUBMISSION_FILES = ["README.md", "expert.py", "model.pt"]
|
9 |
+
SAMPLE_RATE = 16000
|
10 |
+
SECONDS = [2, 1.8, 3.7]
|
11 |
|
12 |
app = typer.Typer()
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
@app.command()
|
15 |
def validate():
|
16 |
# Check that all the expected files exist
|
|
|
18 |
if not Path(file).is_file():
|
19 |
raise ValueError(f"File {file} not found! Please include {file} in your submission")
|
20 |
|
21 |
+
try:
|
22 |
+
upstream = UpstreamExpert(ckpt="model.pt")
|
23 |
+
wavs = [torch.rand(round(SAMPLE_RATE * sec)) for sec in SECONDS]
|
24 |
+
results = upstream(wavs)
|
25 |
+
|
26 |
+
assert isinstance(results, dict)
|
27 |
+
tasks = ["PR", "SID", "ER", "ASR", "ASV", "SD", "QbE", "ST", "SS", "SE", "secret"]
|
28 |
+
for task in tasks:
|
29 |
+
hidden_states = results.get(task, "hidden_states")
|
30 |
+
assert isinstance(hidden_states, list)
|
31 |
+
|
32 |
+
for state in hidden_states:
|
33 |
+
assert isinstance(state, torch.Tensor)
|
34 |
+
assert state.dim() == 3, "(batch_size, max_sequence_length_of_batch, hidden_size)"
|
35 |
+
assert state.shape == hidden_states[0].shape
|
36 |
+
|
37 |
+
for task in tasks:
|
38 |
+
downsample_rate = upstream.get_downsample_rates(task)
|
39 |
+
assert isinstance(downsample_rate, int)
|
40 |
+
print(f"The upstream's representation for {task}"
|
41 |
+
f" has the downsample rate of {downsample_rate}.")
|
42 |
+
except:
|
43 |
+
print("Please check the Upstream Specification on https://superbbenchmark.org/challenge")
|
44 |
+
raise
|
45 |
+
|
46 |
+
typer.echo("All submission files validated!")
|
47 |
+
typer.echo("Now you can make a submission.")
|
48 |
|
49 |
|
50 |
@app.command()
|
51 |
+
def submit(submission_name: str):
|
52 |
subprocess.call("git pull origin main".split())
|
53 |
+
subprocess.call(["git", "add", "."])
|
|
|
54 |
subprocess.call(["git", "commit", "-m", f"Submission: {submission_name} "])
|
55 |
subprocess.call(["git", "push"])
|
56 |
+
typer.echo("Submission successful!")
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
|
59 |
if __name__ == "__main__":
|
{{cookiecutter.repo_name}}/expert.py
CHANGED
@@ -1,50 +1,54 @@
|
|
1 |
from collections import OrderedDict
|
2 |
from typing import List, Union, Dict
|
3 |
|
|
|
4 |
import torch.nn as nn
|
5 |
from torch import Tensor
|
6 |
from torch.nn.utils.rnn import pad_sequence
|
7 |
|
8 |
HIDDEN_DIM = 8
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
class UpstreamExpert(nn.Module):
|
12 |
-
def __init__(self, ckpt: str =
|
13 |
"""
|
14 |
Args:
|
15 |
ckpt:
|
16 |
The checkpoint path for loading your pretrained weights.
|
17 |
-
|
18 |
-
|
19 |
-
model_config:
|
20 |
-
The config path for constructing your model.
|
21 |
-
Might not needed if you also save that in your checkpoint file.
|
22 |
-
Can be assigned by the -g option in run_downstream.py
|
23 |
"""
|
24 |
super().__init__()
|
25 |
self.name = "[Example UpstreamExpert]"
|
26 |
|
27 |
-
print(
|
28 |
-
f"{self.name} - You can use model_config to construct your customized model: {model_config}"
|
29 |
-
)
|
30 |
print(f"{self.name} - You can use ckpt to load your pretrained weights: {ckpt}")
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
)
|
35 |
-
|
36 |
-
# The model needs to be a nn.Module for finetuning, not required for representation extraction
|
37 |
-
self.model1 = nn.Linear(1, HIDDEN_DIM)
|
38 |
-
self.model2 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
|
39 |
|
40 |
def get_downsample_rates(self, key: str) -> int:
|
41 |
"""
|
42 |
Since we do not do any downsampling in this example upstream
|
43 |
All keys' corresponding representations have downsample rate of 1
|
|
|
44 |
"""
|
45 |
return 1
|
46 |
|
47 |
-
def forward(self, wavs: List[Tensor]) -> Dict[str,
|
48 |
"""
|
49 |
When the returning Dict contains the List with more than one Tensor,
|
50 |
those Tensors should be in the same shape to train a weighted-sum on them.
|
@@ -53,25 +57,21 @@ class UpstreamExpert(nn.Module):
|
|
53 |
wavs = pad_sequence(wavs, batch_first=True).unsqueeze(-1)
|
54 |
# wavs: (batch_size, max_len, 1)
|
55 |
|
56 |
-
|
57 |
-
# hidden: (batch_size, max_len, hidden_dim)
|
58 |
-
|
59 |
-
feature = self.model2(hidden)
|
60 |
-
# feature: (batch_size, max_len, hidden_dim)
|
61 |
|
62 |
# The "hidden_states" key will be used as default in many cases
|
63 |
# Others keys in this example are presented for SUPERB Challenge
|
64 |
return {
|
65 |
-
"hidden_states":
|
66 |
-
"PR":
|
67 |
-
"
|
68 |
-
"
|
69 |
-
"
|
70 |
-
"
|
71 |
-
"
|
72 |
-
"
|
73 |
-
"
|
74 |
-
"SE":
|
75 |
-
"SS":
|
76 |
-
"secret":
|
77 |
-
}
|
|
|
1 |
from collections import OrderedDict
|
2 |
from typing import List, Union, Dict
|
3 |
|
4 |
+
import torch
|
5 |
import torch.nn as nn
|
6 |
from torch import Tensor
|
7 |
from torch.nn.utils.rnn import pad_sequence
|
8 |
|
9 |
HIDDEN_DIM = 8
|
10 |
|
11 |
+
class Model(nn.Module):
|
12 |
+
def __init__(self):
|
13 |
+
super().__init__()
|
14 |
+
# The model needs to be a nn.Module for finetuning, not required for representation extraction
|
15 |
+
self.model1 = nn.Linear(1, HIDDEN_DIM)
|
16 |
+
self.model2 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
|
17 |
+
|
18 |
+
def forward(self, wavs):
|
19 |
+
hidden = self.model1(wavs)
|
20 |
+
# hidden: (batch_size, max_len, hidden_dim)
|
21 |
+
|
22 |
+
feature = self.model2(hidden)
|
23 |
+
# feature: (batch_size, max_len, hidden_dim)
|
24 |
+
|
25 |
+
return [hidden, feature]
|
26 |
|
27 |
class UpstreamExpert(nn.Module):
|
28 |
+
def __init__(self, ckpt: str = "model.pt", **kwargs):
|
29 |
"""
|
30 |
Args:
|
31 |
ckpt:
|
32 |
The checkpoint path for loading your pretrained weights.
|
33 |
+
Should be fixed as model.pt for SUPERB Challenge.
|
|
|
|
|
|
|
|
|
|
|
34 |
"""
|
35 |
super().__init__()
|
36 |
self.name = "[Example UpstreamExpert]"
|
37 |
|
|
|
|
|
|
|
38 |
print(f"{self.name} - You can use ckpt to load your pretrained weights: {ckpt}")
|
39 |
+
ckpt = torch.load(ckpt, map_location="cpu")
|
40 |
+
self.model = Model()
|
41 |
+
self.model.load_state_dict(ckpt)
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def get_downsample_rates(self, key: str) -> int:
|
44 |
"""
|
45 |
Since we do not do any downsampling in this example upstream
|
46 |
All keys' corresponding representations have downsample rate of 1
|
47 |
+
Eg. 10ms stride representation has the downsample rate 160 (input wavs are all in 16kHz)
|
48 |
"""
|
49 |
return 1
|
50 |
|
51 |
+
def forward(self, wavs: List[Tensor]) -> Dict[str, List[Tensor]]:
|
52 |
"""
|
53 |
When the returning Dict contains the List with more than one Tensor,
|
54 |
those Tensors should be in the same shape to train a weighted-sum on them.
|
|
|
57 |
wavs = pad_sequence(wavs, batch_first=True).unsqueeze(-1)
|
58 |
# wavs: (batch_size, max_len, 1)
|
59 |
|
60 |
+
hidden_states = self.model(wavs)
|
|
|
|
|
|
|
|
|
61 |
|
62 |
# The "hidden_states" key will be used as default in many cases
|
63 |
# Others keys in this example are presented for SUPERB Challenge
|
64 |
return {
|
65 |
+
"hidden_states": hidden_states,
|
66 |
+
"PR": hidden_states,
|
67 |
+
"SID": hidden_states,
|
68 |
+
"ER": hidden_states,
|
69 |
+
"ASR": hidden_states,
|
70 |
+
"QbE": hidden_states,
|
71 |
+
"ASV": hidden_states,
|
72 |
+
"SD": hidden_states,
|
73 |
+
"ST": hidden_states,
|
74 |
+
"SE": hidden_states,
|
75 |
+
"SS": hidden_states,
|
76 |
+
"secret": hidden_states,
|
77 |
+
}
|
{{cookiecutter.repo_name}}/requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
typer
|
|
|
|
1 |
+
typer
|
2 |
+
torch
|