Ffftdtd5dtft commited on
Commit
d3b8501
verified
1 Parent(s): 44349b8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +409 -0
app.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import (
3
+ AutoTokenizer,
4
+ AutoModelForCausalLM,
5
+ AutoModelForSeq2SeqLM,
6
+ AutoProcessor,
7
+ AutoModelForSpeechSeq2Seq,
8
+ AutoModelForTextToWaveform
9
+ )
10
+ from diffusers import DiffusionPipeline
11
+ import time
12
+ import os
13
+ from dotenv import load_dotenv
14
+ from huggingface_hub import HfApi, HfFolder, Repository
15
+ import gradio as gr
16
+
17
+ load_dotenv()
18
+
19
+ def prune_model(model, amount=0.5):
20
+ from torch.nn.utils import prune
21
+ for name, module in model.named_modules():
22
+ if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d)):
23
+ prune.l1_unstructured(module, name='weight', amount=amount)
24
+ prune.remove(module, 'weight')
25
+ return model
26
+
27
+ def quantize_to_q1_with_min(tensor, min_value=-1):
28
+ tensor = torch.sign(tensor)
29
+ tensor[tensor < min_value] = min_value
30
+ return tensor
31
+
32
+ def quantize_model_to_q1_with_min(model, min_value=-1):
33
+ for name, param in model.named_parameters():
34
+ if param.dtype in [torch.float32, torch.float16]:
35
+ with torch.no_grad():
36
+ param.copy_(quantize_to_q1_with_min(param.data, min_value))
37
+
38
+ def disable_unnecessary_components(model):
39
+ for name, module in model.named_modules():
40
+ if isinstance(module, torch.nn.Dropout):
41
+ module.p = 0.0
42
+ elif isinstance(module, torch.nn.BatchNorm1d):
43
+ module.eval()
44
+
45
+ def ultra_max_compress(model):
46
+ model = prune_model(model, amount=0.8)
47
+ quantize_model_to_q1_with_min(model, min_value=-0.05)
48
+ disable_unnecessary_components(model)
49
+ with torch.no_grad():
50
+ for name, param in model.named_parameters():
51
+ if param.requires_grad:
52
+ param.requires_grad = False
53
+ param.data = torch.nn.functional.hardtanh(param.data, min_val=-1.0, max_val=1.0)
54
+ param.data = param.data.half()
55
+ try:
56
+ model = torch.jit.script(model)
57
+ except Exception:
58
+ pass
59
+ prune_model(model, amount=0.9)
60
+ model.eval()
61
+ for buffer_name, buffer in model.named_buffers():
62
+ if buffer.numel() == 0:
63
+ model._buffers.pop(buffer_name)
64
+ return model
65
+
66
+ def optimize_model_resources(model):
67
+ torch.set_grad_enabled(False)
68
+ model.eval()
69
+ for name, param in model.named_parameters():
70
+ param.requires_grad = False
71
+ if param.dtype == torch.float32:
72
+ param.data = param.data.half()
73
+ if hasattr(model, 'config'):
74
+ if hasattr(model.config, 'max_position_embeddings'):
75
+ model.config.max_position_embeddings = min(model.config.max_position_embeddings, 512)
76
+ if hasattr(model.config, 'hidden_size'):
77
+ model.config.hidden_size = min(model.config.hidden_size, 768)
78
+ model = torch.jit.optimize_for_inference(model)
79
+ return model
80
+
81
+ def generate_random_responses(model, tokenizer, prompt, num_responses=5, max_length=50):
82
+ responses = []
83
+ for _ in range(num_responses):
84
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
85
+ output = model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50)
86
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
87
+ responses.append(response)
88
+ return responses
89
+
90
+ def patched_distilbert_forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, output_attentions=None, output_hidden_states=None, return_dict=None):
91
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
92
+ outputs = DistilBertModel.forward(self, input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict)
93
+ if not return_dict:
94
+ output_tuple = []
95
+ for v in [outputs.last_hidden_state, outputs.hidden_states, outputs.attentions]:
96
+ if v is not None:
97
+ output_tuple.append(v)
98
+ return tuple(output_tuple)
99
+ return outputs
100
+
101
+ def patched_forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None):
102
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
103
+ outputs = self.distilbert(input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict)
104
+ hidden_state = outputs[0]
105
+ pooled_output = self.pre_classifier(hidden_state[:, 0])
106
+ pooled_output = self.dropout(pooled_output)
107
+ logits = self.classifier(pooled_output)
108
+ if not return_dict:
109
+ output = (logits,) + outputs[1:]
110
+ return output
111
+ return logits
112
+
113
+ def patched_roberta_forward(self, input_ids=None, attention_mask=None, head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None):
114
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
115
+ outputs = self.roberta(input_ids, attention_mask=attention_mask, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict)
116
+ hidden_state = outputs[0]
117
+ pooled_output = hidden_state[:, 0]
118
+ pooled_output = self.dropout(pooled_output)
119
+ logits = self.classifier(pooled_output)
120
+ if not return_dict:
121
+ output = (logits,) + outputs[1:]
122
+ return output
123
+ return logits
124
+
125
+ def optimize_for_low_resources(model):
126
+ model = ultra_max_compress(model)
127
+ model = optimize_model_resources(model)
128
+ model.config.max_position_embeddings = 256
129
+ model.config.hidden_size = 384
130
+ return model
131
+
132
+ def optimize_for_very_low_resources(model):
133
+ model = ultra_max_compress(model)
134
+ model = optimize_model_resources(model)
135
+ model.config.max_position_embeddings = 128
136
+ model.config.hidden_size = 256
137
+ return model
138
+
139
+ def remove_unused_model_components(model):
140
+ for name, param in model.named_parameters():
141
+ if param.numel() == 0:
142
+ model._parameters.pop(name)
143
+ return model
144
+
145
+ def auto_train_model(model, train_data, epochs=3):
146
+ optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
147
+ model.train()
148
+ for epoch in range(epochs):
149
+ for batch in train_data:
150
+ inputs, labels = batch
151
+ optimizer.zero_grad()
152
+ outputs = model(**inputs, labels=labels)
153
+ loss = outputs.loss
154
+ loss.backward()
155
+ optimizer.step()
156
+ return model
157
+
158
+ def apply_extreme_filters(model):
159
+ model = ultra_max_compress(model)
160
+ model = optimize_model_resources(model)
161
+ model.config.max_position_embeddings = 128
162
+ model.config.hidden_size = 256
163
+ model = torch.jit.optimize_for_inference(model)
164
+ model = prune_model(model, amount=0.95)
165
+ quantize_model_to_q1_with_min(model, min_value=-0.1)
166
+ return model
167
+
168
+ def reduce_latency(model, tokenizer, prompt, num_responses=5, max_length=50):
169
+ responses = []
170
+ start_time = time.time()
171
+ for _ in range(num_responses):
172
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
173
+ output = model.generate(input_ids, max_length=max_length, do_sample=True, top_k=50)
174
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
175
+ responses.append(response)
176
+ end_time = time.time()
177
+ latency = (end_time - start_time) / num_responses * 1000
178
+ return responses, latency
179
+
180
+ def create_gpt_distill_model():
181
+ gpt_model = GPT2LMHeadModel.from_pretrained("gpt2")
182
+ gpt_tokenizer = AutoTokenizer.from_pretrained("gpt2")
183
+ return gpt_model, gpt_tokenizer
184
+
185
+ def create_gemma_distill_model():
186
+ gemma_model = AutoModelForCausalLM.from_pretrained("google/gemma-2-9b")
187
+ gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b")
188
+ return gemma_model, gemma_tokenizer
189
+
190
+ def measure_performance(model, tokenizer, sequence_length=20, num_tokens=100):
191
+ inputs = tokenizer("A" * sequence_length, return_tensors="pt")
192
+ start_time = time.time()
193
+ for _ in range(num_tokens):
194
+ model.generate(**inputs)
195
+ end_time = time.time()
196
+ latency = (end_time - start_time) / num_tokens * 1000
197
+ tokens_per_second = num_tokens / (end_time - start_time)
198
+ return latency, tokens_per_second
199
+
200
+ def apply_diffusion_pipeline(prompt):
201
+ diffusion_pipeline = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell")
202
+ images = diffusion_pipeline(prompt).images
203
+ return images
204
+
205
+ def generate_responses_with_diffusion(prompt, use_diffusion):
206
+ if "imagina" in prompt.lower() or "imagine" in prompt.lower():
207
+ images = apply_diffusion_pipeline(prompt)
208
+ return images
209
+ return None
210
+
211
+ def generate_summary_with_bart(prompt):
212
+ tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
213
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
214
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
215
+ summary_ids = model.generate(inputs, max_length=130, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
216
+ summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
217
+ return summary
218
+
219
+ def generate_responses_with_bart(prompt):
220
+ if "resumir" in prompt.lower() or "resumime" in prompt.lower():
221
+ summary = generate_summary_with_bart(prompt)
222
+ return summary
223
+ return None
224
+
225
+ def apply_whisper_pipeline(prompt):
226
+ processor = AutoProcessor.from_pretrained("openai/whisper-small")
227
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small")
228
+ inputs = processor(prompt, return_tensors="pt")
229
+ outputs = model.generate(**inputs)
230
+ transcription = processor.batch_decode(outputs, skip_special_tokens=True)
231
+ return transcription
232
+
233
+ def generate_transcription_with_whisper(prompt):
234
+ if "transcribe" in prompt.lower() or "transcribime" in prompt.lower():
235
+ transcription = apply_whisper_pipeline(prompt)
236
+ return transcription
237
+ return None
238
+
239
+ def apply_translation_pipeline(prompt):
240
+ tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-base")
241
+ model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
242
+ inputs = tokenizer.encode(prompt, return_tensors="pt")
243
+ translated_ids = model.generate(inputs, max_length=50)
244
+ translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
245
+ return translated_text
246
+
247
+ def generate_translation_with_t5(prompt):
248
+ if "traducir" in prompt.lower() or "traducime" in prompt.lower():
249
+ translation = apply_translation_pipeline(prompt)
250
+ return translation
251
+ return None
252
+
253
+ def apply_musicgen_pipeline(prompt):
254
+ tokenizer = AutoTokenizer.from_pretrained("facebook/musicgen-small")
255
+ model = AutoModelForTextToWaveform.from_pretrained("facebook/musicgen-small")
256
+ inputs = tokenizer(prompt, return_tensors="pt")
257
+ audio = model.generate(inputs)
258
+ return audio
259
+
260
+ def generate_music_with_musicgen(prompt):
261
+ if "m煤sica" in prompt.lower() or "canci贸n" in prompt.lower():
262
+ music = apply_musicgen_pipeline(prompt)
263
+ return music
264
+ return None
265
+
266
+ def apply_musicgen_melody_pipeline(prompt):
267
+ tokenizer = AutoTokenizer.from_pretrained("facebook/musicgen-melody")
268
+ model = AutoModelForTextToWaveform.from_pretrained("facebook/musicgen-melody")
269
+ inputs = tokenizer(prompt, return_tensors="pt")
270
+ audio = model.generate(inputs)
271
+ return audio
272
+
273
+ def generate_music_with_musicgen_melody(prompt):
274
+ if "melod铆a" in prompt.lower() or "melodia" in prompt.lower():
275
+ music = apply_musicgen_melody_pipeline(prompt)
276
+ return music
277
+ return None
278
+
279
+ def apply_stable_diffusion_pipeline(prompt):
280
+ pipeline = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1")
281
+ images = pipeline(prompt).images
282
+ return images
283
+
284
+ def generate_responses_with_stable_diffusion(prompt):
285
+ if "imagen" in prompt.lower() or "image" in prompt.lower():
286
+ images = apply_stable_diffusion_pipeline(prompt)
287
+ return images
288
+ return None
289
+
290
+ def unify_models(*models):
291
+ combined_model = torch.nn.ModuleList(models)
292
+ return combined_model
293
+
294
+ def combined_filter(model):
295
+ model = ultra_max_compress(model)
296
+ model = optimize_model_resources(model)
297
+ model.config.max_position_embeddings = 128
298
+ model.config.hidden_size = 256
299
+ model = torch.jit.optimize_for_inference(model)
300
+ model = prune_model(model, amount=0.95)
301
+ quantize_model_to_q1_with_min(model, min_value=-0.1)
302
+ return model
303
+
304
+ def apply_filters_and_unify(model):
305
+ model = combined_filter(model)
306
+ model = remove_unused_model_components(model)
307
+ return model
308
+
309
+ def upload_to_huggingface(model, repo_name):
310
+ api = HfApi()
311
+ try:
312
+ api.create_repo(repo_id=repo_name, repo_type="model")
313
+ except Exception:
314
+ pass
315
+ model.save_pretrained(repo_name)
316
+ tokenizer.save_pretrained(repo_name)
317
+ repo = Repository(repo_name)
318
+ repo.push_to_hub()
319
+
320
+ def apply_extreme_filters_and_upload(model, repo_name):
321
+ model = apply_extreme_filters(model)
322
+ upload_to_huggingface(model, repo_name)
323
+
324
+ def start_gradio_interface():
325
+ def process_prompt(prompt):
326
+ response = {
327
+ "summary": generate_responses_with_bart(prompt),
328
+ "transcription": generate_transcription_with_whisper(prompt),
329
+ "translation": generate_translation_with_t5(prompt),
330
+ "music": generate_music_with_musicgen(prompt),
331
+ "melody_music": generate_music_with_musicgen_melody(prompt),
332
+ "image": generate_responses_with_stable_diffusion(prompt),
333
+ "diffusion": generate_responses_with_diffusion(prompt, True)
334
+ }
335
+ return response
336
+
337
+ interface = gr.Interface(
338
+ fn=process_prompt,
339
+ inputs=gr.Textbox(label="Enter Prompt"),
340
+ outputs=[gr.Textbox(label="Summary"), gr.Textbox(label="Transcription"), gr.Textbox(label="Translation"),
341
+ gr.Audio(label="Music"), gr.Audio(label="Melody Music"), gr.Image(label="Image"), gr.Image(label="Diffusion")],
342
+ title="Multi-Function AI Model",
343
+ description="Generate summaries, transcriptions, translations, music, melodies, images, and diffusion responses."
344
+ )
345
+ interface.launch()
346
+
347
+ start_gradio_interface()
348
+
349
+ model_infos = [
350
+ {"model_name": "gpt2", "class": GPT2LMHeadModel},
351
+ {"model_name": "google/gemma-2-9b", "class": AutoModelForCausalLM}
352
+ ]
353
+
354
+ for model_info in model_infos:
355
+ model = model_info["class"].from_pretrained(model_info["model_name"])
356
+ tokenizer = AutoTokenizer.from_pretrained(model_info["model_name"])
357
+ optimized_model, responses, latency = optimize_model_with_all_optimizations(model, tokenizer, "Sample prompt for optimization.")
358
+ print(f"Model: {model_info['model_name']}")
359
+ print(f"Latency: {latency:.2f} ms")
360
+ print(f"Sample Responses: {responses}")
361
+
362
+ gpt_model, gpt_tokenizer = create_gpt_distill_model()
363
+ gemma_model, gemma_tokenizer = create_gemma_distill_model()
364
+
365
+ optimized_gpt_model, gpt_responses, gpt_latency = optimize_model_with_all_optimizations(gpt_model, gpt_tokenizer, "Sample prompt for GPT optimization.")
366
+ optimized_gemma_model, gemma_responses, gemma_latency = optimize_model_with_all_optimizations(gemma_model, gemma_tokenizer, "Sample prompt for Gemma optimization.")
367
+
368
+ combined_model = unify_models(optimized_gpt_model, optimized_gemma_model)
369
+
370
+ optimized_gpt_model_1gb = optimize_for_1gb_ram(optimized_gpt_model)
371
+ optimized_gemma_model_1gb = optimize_for_1gb_ram(optimized_gemma_model)
372
+ optimized_gpt_model_low = optimize_for_very_low_resources(optimized_gpt_model)
373
+ optimized_gemma_model_low = optimize_for_very_low_resources(optimized_gemma_model)
374
+ optimized_gpt_model_cpu = optimize_for_old_cpu(optimized_gpt_model)
375
+ optimized_gemma_model_cpu = optimize_for_old_cpu(optimized_gemma_model)
376
+ optimized_gpt_model_gpu = optimize_for_old_gpu(optimized_gpt_model)
377
+ optimized_gemma_model_gpu = optimize_for_old_gpu(optimized_gemma_model)
378
+
379
+ print("Models optimized for various resource constraints.")
380
+
381
+ diffusion_response = generate_responses_with_diffusion("Imagine a serene landscape", True)
382
+ if diffusion_response:
383
+ print("Diffusion response generated.")
384
+
385
+ summary_response = generate_responses_with_bart("Resumir este texto para obtener un resumen efectivo.", True)
386
+ if summary_response:
387
+ print("Summary response generated.")
388
+
389
+ transcription_response = generate_transcription_with_whisper("Transcribe this audio file.", True)
390
+ if transcription_response:
391
+ print("Transcription response generated.")
392
+
393
+ translation_response = generate_translation_with_t5("Traducir este texto al ingl茅s.", True)
394
+ if translation_response:
395
+ print("Translation response generated.")
396
+
397
+ music_response = generate_music_with_musicgen("M煤sica para una tarde tranquila.", True)
398
+ if music_response:
399
+ print("Music response generated.")
400
+
401
+ melody_music_response = generate_music_with_musicgen_melody("Melod铆a para relajaci贸n.", True)
402
+ if melody_music_response:
403
+ print("Melody music response generated.")
404
+
405
+ image_response = generate_responses_with_stable_diffusion("Imagen de un paisaje sereno.", True)
406
+ if image_response:
407
+ print("Image response generated.")
408
+
409
+ upload_to_huggingface(combined_model, "Ffftdtd5dtft/my_model")