johann22 commited on
Commit
c7cebb0
β€’
1 Parent(s): 7c5664b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -39
app.py CHANGED
@@ -1,43 +1,18 @@
1
- import outetts
 
 
 
 
2
 
3
- # Configure the model
4
- model_config = outetts.HFModelConfig_v1(
5
- model_path="OuteAI/OuteTTS-0.2-500M",
6
- language="en", # Supported languages in v0.2: en, zh, ja, ko
7
- )
8
 
9
- # Initialize the interface
10
- interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)
11
 
12
- # Optional: Create a speaker profile (use a 10-15 second audio clip)
13
- # speaker = interface.create_speaker(
14
- # audio_path="path/to/audio/file",
15
- # transcript="Transcription of the audio file."
16
- # )
17
 
18
- # Optional: Save and load speaker profiles
19
- # interface.save_speaker(speaker, "speaker.json")
20
- # speaker = interface.load_speaker("speaker.json")
21
-
22
- # Optional: Load speaker from default presets
23
- interface.print_default_speakers()
24
- speaker = interface.load_default_speaker(name="male_1")
25
-
26
- output = interface.generate(
27
- text="Speech synthesis is the artificial production of human speech. A computer system used for this purpose is called a speech synthesizer, and it can be implemented in software or hardware products.",
28
- # Lower temperature values may result in a more stable tone,
29
- # while higher values can introduce varied and expressive speech
30
- temperature=0.1,
31
- repetition_penalty=1.1,
32
- max_length=4096,
33
-
34
- # Optional: Use a speaker profile for consistent voice characteristics
35
- # Without a speaker profile, the model will generate a voice with random characteristics
36
- speaker=speaker,
37
- )
38
-
39
- # Save the synthesized speech to a file
40
- output.save("output.wav")
41
-
42
- # Optional: Play the synthesized speech
43
- output.play()
 
1
+ import torch
2
+ from diffusers import MochiPipeline
3
+ from diffusers.utils import export_to_video
4
+ import gradio as gr
5
+ pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", variant="bf16", torch_dtype=torch.bfloat16)
6
 
7
+ # Enable memory savings
8
+ pipe.enable_model_cpu_offload()
9
+ pipe.enable_vae_tiling()
 
 
10
 
11
+ prompt = "Close-up of a chameleon's eye, with its scaly skin changing color. Ultra high resolution 4k."
12
+ frames = pipe(prompt, num_frames=84).frames[0]
13
 
14
+ export_to_video(frames, "mochi.mp4", fps=30)
 
 
 
 
15
 
16
+ with gr.Blocks() as b:
17
+ vid=gr.Video("mochi.mp4")
18
+ b.launch()