supermy fffiloni commited on
Commit
c4e2725
0 Parent(s):

Duplicate from fffiloni/speech-to-image

Browse files

Co-authored-by: Sylvain Filoni <fffiloni@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +33 -0
  2. README.md +13 -0
  3. app.py +60 -0
  4. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.npy filter=lfs diff=lfs merge=lfs -text
14
+ *.npz filter=lfs diff=lfs merge=lfs -text
15
+ *.onnx filter=lfs diff=lfs merge=lfs -text
16
+ *.ot filter=lfs diff=lfs merge=lfs -text
17
+ *.parquet filter=lfs diff=lfs merge=lfs -text
18
+ *.pb filter=lfs diff=lfs merge=lfs -text
19
+ *.pickle filter=lfs diff=lfs merge=lfs -text
20
+ *.pkl filter=lfs diff=lfs merge=lfs -text
21
+ *.pt filter=lfs diff=lfs merge=lfs -text
22
+ *.pth filter=lfs diff=lfs merge=lfs -text
23
+ *.rar filter=lfs diff=lfs merge=lfs -text
24
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
25
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
26
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
27
+ *.tflite filter=lfs diff=lfs merge=lfs -text
28
+ *.tgz filter=lfs diff=lfs merge=lfs -text
29
+ *.wasm filter=lfs diff=lfs merge=lfs -text
30
+ *.xz filter=lfs diff=lfs merge=lfs -text
31
+ *.zip filter=lfs diff=lfs merge=lfs -text
32
+ *.zst filter=lfs diff=lfs merge=lfs -text
33
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Speech To Image • Community pipeline
3
+ emoji: 🎙🖼🧩
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.6
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: fffiloni/speech-to-image
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import whisper
4
+ from diffusers import DiffusionPipeline
5
+ from transformers import (
6
+ WhisperForConditionalGeneration,
7
+ WhisperProcessor,
8
+ )
9
+
10
+ import os
11
+ MY_SECRET_TOKEN=os.environ.get('HF_TOKEN_SD')
12
+
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
15
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
16
+
17
+ diffuser_pipeline = DiffusionPipeline.from_pretrained(
18
+ "CompVis/stable-diffusion-v1-4",
19
+ custom_pipeline="speech_to_image_diffusion",
20
+ speech_model=model,
21
+ speech_processor=processor,
22
+ use_auth_token=MY_SECRET_TOKEN,
23
+ revision="fp16",
24
+ torch_dtype=torch.float16,
25
+ )
26
+
27
+ diffuser_pipeline.enable_attention_slicing()
28
+ diffuser_pipeline = diffuser_pipeline.to(device)
29
+
30
+
31
+ #————————————————————————————————————————————
32
+ # GRADIO SETUP
33
+ title = "Speech to Diffusion • Community Pipeline"
34
+ description = """
35
+ <p style='text-align: center;'>This demo can generate an image from an audio sample using pre-trained OpenAI whisper-small and Stable Diffusion.<br />
36
+ Community examples consist of both inference and training examples that have been added by the community.<br />
37
+ <a href='https://github.com/huggingface/diffusers/tree/main/examples/community#speech-to-image' target='_blank'> Click here for more information about community pipelines </a>
38
+ </p>
39
+ """
40
+ article = """
41
+ <p style='text-align: center;'>Community pipeline by Mikail Duzenli • Gradio demo by Sylvain Filoni & Ahsen Khaliq<p>
42
+ """
43
+ audio_input = gr.Audio(source="microphone", type="filepath")
44
+ image_output = gr.Image()
45
+
46
+ def speech_to_text(audio_sample):
47
+
48
+ process_audio = whisper.load_audio(audio_sample)
49
+ output = diffuser_pipeline(process_audio)
50
+
51
+ print(f"""
52
+ ————————
53
+ output: {output}
54
+ ————————
55
+ """)
56
+
57
+ return output.images[0]
58
+
59
+ demo = gr.Interface(fn=speech_to_text, inputs=audio_input, outputs=image_output, title=title, description=description, article=article)
60
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch
3
+ scipy
4
+ ftfy
5
+ git+https://github.com/huggingface/transformers
6
+ git+https://github.com/huggingface/diffusers
7
+ git+https://github.com/openai/whisper.git