File size: 4,257 Bytes
38548f2
 
 
 
 
 
 
1d37aeb
38548f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d37aeb
 
 
 
 
 
 
 
38548f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fdcaa3
 
38548f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d37aeb
 
 
 
 
 
 
 
 
 
 
 
38548f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d37aeb
 
 
 
 
 
38548f2
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import shutil
import gradio as gr
import zipfile
import tempfile
from infer.modules.train.preprocess import PreProcess
from typing import Tuple
from huggingface_hub import snapshot_download


def extract_audio_files(zip_file: str, target_dir: str) -> list[str]:
    with zipfile.ZipFile(zip_file, "r") as zip_ref:
        zip_ref.extractall(target_dir)

    audio_files = [
        os.path.join(target_dir, f)
        for f in os.listdir(target_dir)
        if f.endswith((".wav", ".mp3", ".ogg"))
    ]
    if not audio_files:
        raise gr.Error("No audio files found at the top level of the zip file")

    return audio_files


def create_new_expdir(zip_file: str) -> Tuple[str, str]:
    temp_dir = tempfile.mkdtemp()
    print(f"Using exp dir: {temp_dir}")

    data_dir = os.path.join(temp_dir, "_data")
    os.makedirs(data_dir)
    audio_files = extract_audio_files(zip_file, data_dir)

    pp = PreProcess(40000, temp_dir, 3.0, False)
    pp.pipeline_mp_inp_dir(data_dir, 4)

    pp.logfile.seek(0)
    log = pp.logfile.read()

    return temp_dir, f"Preprocessed {len(audio_files)} audio files.\n{log}"


def restore_expdir(zip: str) -> str:
    exp_dir = tempfile.mkdtemp()
    shutil.unpack_archive(zip, exp_dir)
    return exp_dir


def restore_from_huggingface(repo: str, token: str) -> str:
    exp_dir = os.path.join(tempfile.mkdtemp(), repo.lower())
    snapshot_download(
        repo, local_dir=exp_dir, token=token if token.startswith("hf_") else None
    )
    return exp_dir


def set_dir(dir_val: str) -> str:
    if not dir_val.startswith("/tmp/"):
        dir_val = os.path.join("/tmp", dir_val)
    if not os.path.isdir(dir_val):
        raise gr.Error("Directory does not exist")

    return dir_val


class SetupTab:
    def __init__(self):
        pass

    def ui(self):
        gr.Markdown("# Setup Experiment")
        gr.Markdown(
            "You can upload a zip file containing audio files to start a new experiment, or upload an experiment directory zip file to restore an existing experiment.\n"
            "The suggested dataset size is > 5 minutes of audio."
        )

        with gr.Row():
            with gr.Column():
                self.zip_file = gr.File(
                    label="Upload a zip file containing audio files for training",
                    file_types=["zip"],
                )
                self.preprocess_log = gr.Textbox(label="Log", lines=5)

            self.preprocess_btn = gr.Button(
                value="Start New Experiment", variant="primary"
            )

        with gr.Row():
            self.restore_zip_file = gr.File(
                label="Upload the experiment directory zip file",
                file_types=["zip"],
            )
            self.restore_btn = gr.Button(value="Restore Experiment", variant="primary")

        gr.Markdown("You can also restore from a Hugging Face repo.")
        with gr.Row():
            self.hf_repo = gr.Textbox(
                label="Restore from Hugging Face repo",
                placeholder="username/repo",
            )
            self.hf_token = gr.Textbox(
                label="Hugging Face token (optional)",
                placeholder="hf_...",
            )
            self.restore_hf_btn = gr.Button(value="Restore from Hugging Face")

        with gr.Row():
            self.dir_val = gr.Textbox(
                label="Manually set the experiment directory (don't touch it unless you know what you are doing)",
                placeholder="/tmp/...",
            )
            self.set_dir_btn = gr.Button(value="Set Directory")

    def build(self, exp_dir: gr.Textbox):
        self.preprocess_btn.click(
            fn=create_new_expdir,
            inputs=[self.zip_file],
            outputs=[exp_dir, self.preprocess_log],
        )

        self.restore_btn.click(
            fn=restore_expdir,
            inputs=[self.restore_zip_file],
            outputs=[exp_dir],
        )

        self.restore_hf_btn.click(
            fn=restore_from_huggingface,
            inputs=[self.hf_repo, self.hf_token],
            outputs=[exp_dir],
        )

        self.set_dir_btn.click(
            fn=set_dir,
            inputs=[self.dir_val],
            outputs=[exp_dir],
        )