Spaces:

Wataru
/

Miipher

Build error

App Files Files Community

Wataru commited on Sep 30, 2023

Commit

aeef433

1 Parent(s): 34e77e5

added basic files

Browse files

Files changed (4) hide show

.gitignore +1 -0
README.md +0 -1
app.py +48 -0
requirements.txt +181 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv/

README.md CHANGED Viewed

@@ -10,4 +10,3 @@ pinned: false
 license: cc-by-nc-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


10	license: cc-by-nc-2.0
11	---
12

app.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import gradio as gr
+from miipher.dataset.preprocess_for_infer import PreprocessForInfer
+from miipher.lightning_module import MiipherLightningModule
+from lightning_vocoders.models.hifigan.xvector_lightning_module import HiFiGANXvectorLightningModule
+import torch
+import torchaudio
+import hydra
+import tempfile
+miipher_path = "miipher.ckpt"
+miipher = MiipherLightningModule.load_from_checkpoint(miipher_path,map_location='cpu')
+vocoder = HiFiGANXvectorLightningModule.load_from_checkpoint("vocoder_finetuned.ckpt",map_location='cpu')
+xvector_model = hydra.utils.instantiate(vocoder.cfg.data.xvector.model)
+xvector_model = xvector_model.to('cpu')
+preprocessor = PreprocessForInfer(miipher.cfg)
+@torch.inference_mode()
+def main(wav_path,transcript,lang_code):
+    wav,sr =torchaudio.load(wav_path)
+    wav = wav[0].unsqueeze(0)
+    batch = preprocessor.process(
+        'test',
+        (torch.tensor(wav),sr),
+        word_segmented_text=transcript,
+        lang_code=lang_code
+    )
+    miipher.feature_extractor(batch)
+    (
+        phone_feature,
+        speaker_feature,
+        degraded_ssl_feature,
+        _,
+    ) = miipher.feature_extractor(batch)
+    cleaned_ssl_feature, _ = miipher(phone_feature,speaker_feature,degraded_ssl_feature)
+    vocoder_xvector = xvector_model.encode_batch(batch['degraded_wav_16k'].view(1,-1).cpu()).squeeze(1)
+    cleaned_wav = vocoder.generator_forward({"input_feature": cleaned_ssl_feature, "xvector": vocoder_xvector})[0].T
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as fp:
+        torchaudio.save(fp,cleaned_wav.view(1,-1), sample_rate=22050,format='wav')
+        return fp.name
+inputs = [gr.Audio(label="noisy audio",type='filepath'),gr.Textbox(label="Transcript", value="Your transcript here", max_lines=1),
+            gr.Radio(label="Language", choices=["eng-us", "jpn"], value="eng-us")]
+outputs = gr.Audio(label="Output")
+demo = gr.Interface(fn=main, inputs=inputs, outputs=outputs)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,181 @@

+absl-py==2.0.0
+aiofiles==23.2.1
+aiohttp==3.8.5
+aiosignal==1.3.1
+altair==5.1.1
+annotated-types==0.5.0
+antlr4-python3-runtime==4.9.3
+anyio==3.7.1
+appdirs==1.4.4
+arrow==1.2.3
+async-timeout==4.0.3
+attrs==23.1.0
+Babel==2.12.1
+backoff==2.2.1
+beautifulsoup4==4.12.2
+blessed==1.20.0
+braceexpand==0.1.7
+cachetools==5.3.1
+certifi==2023.7.22
+cffi==1.16.0
+charset-normalizer==3.3.0
+click==8.1.7
+clldutils==3.20.0
+cmake==3.27.5
+colorama==0.4.6
+colorlog==6.7.0
+contourpy==1.1.1
+croniter==1.4.1
+csvw==3.1.3
+cycler==0.12.0
+Cython==3.0.2
+dateutils==0.6.12
+deepdiff==6.5.0
+dill==0.3.7
+docker-pycreds==0.4.0
+exceptiongroup==1.1.3
+fastapi==0.103.2
+ffmpy==0.3.1
+filelock==3.12.4
+fonttools==4.43.0
+frozenlist==1.4.0
+fsspec==2023.9.2
+gitdb==4.0.10
+GitPython==3.1.37
+google-auth==2.23.2
+google-auth-oauthlib==1.0.0
+gradio==3.45.2
+gradio_client==0.5.3
+grpcio==1.59.0
+h11==0.14.0
+httpcore==0.18.0
+httpx==0.25.0
+huggingface-hub==0.17.3
+hydra-core==1.3.2
+HyperPyYAML==1.2.2
+idna==3.4
+importlib-resources==6.1.0
+inquirer==3.1.3
+isodate==0.6.1
+itsdangerous==2.1.2
+Jinja2==3.1.2
+joblib==1.3.2
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
+kiwisolver==1.4.5
+language-tags==1.2.0
+lightning==2.0.9.post0
+lightning-cloud==0.5.39
+lightning-utilities==0.9.0
+lightning-vocoders @ git+https://github.com/Wataru-Nakata/ssl-vocoders@8a628630a45fa2c034d464db7db98901eb1091e4
+lit==17.0.1
+llvmlite==0.40.1
+lxml==4.9.3
+Markdown==3.4.4
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+matplotlib==3.7.3
+mdurl==0.1.2
+mecab-python3==1.0.8
+miipher @ git+https://github.com/Wataru-Nakata/miipher/@5a326adb732e0c5ba11b5232f0644f0f19b696be
+mpmath==1.3.0
+multidict==6.0.4
+networkx==3.1
+numpy==1.26.0
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+oauthlib==3.2.2
+omegaconf==2.3.0
+ordered-set==4.1.0
+orjson==3.9.7
+packaging==23.1
+pandarallel==1.6.5
+pandas==2.1.1
+pathtools==0.1.2
+Pillow==10.0.1
+plac==1.4.0
+protobuf==4.24.3
+psutil==5.9.5
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pybind11==2.11.1
+pycparser==2.21
+pydantic==2.1.1
+pydantic_core==2.4.0
+pydub==0.25.1
+Pygments==2.16.1
+PyJWT==2.8.0
+pylatexenc==2.10
+pyparsing==3.1.1
+pyroomacoustics==0.7.3
+pyrootutils==1.0.4
+python-dateutil==2.8.2
+python-dotenv==1.0.0
+python-editor==1.0.4
+python-multipart==0.0.6
+pytorch-lightning==2.0.9.post0
+pytz==2023.3.post1
+PyYAML==6.0.1
+rdflib==7.0.0
+readchar==4.0.5
+referencing==0.30.2
+regex==2023.8.8
+requests==2.31.0
+requests-oauthlib==1.3.1
+rfc3986==1.5.0
+rich==13.6.0
+rpds-py==0.10.3
+rsa==4.9
+ruamel.yaml==0.17.33
+ruamel.yaml.clib==0.2.7
+scipy==1.11.3
+segments==2.2.1
+semantic-version==2.10.0
+sentencepiece==0.1.99
+sentry-sdk==1.31.0
+setproctitle==1.3.2
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.0
+soundfile==0.12.1
+soupsieve==2.5
+speechbrain==0.5.15
+starlette==0.27.0
+starsessions==1.3.0
+sympy==1.12
+tabulate==0.9.0
+tensorboard==2.13.0
+tensorboard-data-server==0.7.1
+text2phonemesequence==0.1.4
+tokenizers==0.13.3
+toolz==0.12.0
+torch==2.0.1
+torchaudio==2.0.2
+torchmetrics==1.2.0
+tqdm==4.66.1
+traitlets==5.10.1
+transformers==4.29.2
+triton==2.0.0
+typing_extensions==4.8.0
+tzdata==2023.3
+unidic==1.1.0
+uritemplate==4.1.1
+urllib3==2.0.5
+uvicorn==0.23.2
+wandb==0.15.11
+wasabi==0.10.1
+wcwidth==0.2.8
+webdataset==0.2.57
+websocket-client==1.6.3
+websockets==11.0.3
+Werkzeug==3.0.0
+yarl==1.9.2