amosyou commited on
Commit
e042f04
β€’
1 Parent(s): 51c3022

add requirements + gradio demo

Browse files
Files changed (2) hide show
  1. app.py +128 -0
  2. requirements.txt +5 -10
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+
4
+ import os
5
+ import pickle
6
+ import torch
7
+
8
+ from argparse import Namespace
9
+ from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
10
+ from io import BytesIO
11
+ from model import get_model
12
+ from src.utils.output_utils import prepare_output
13
+ from torchvision import transforms
14
+
15
+
16
+ REPO_ID = "Launchpad/inversecooking"
17
+ HF_TOKEN = os.environ.get("HF_TOKEN")
18
+
19
+ use_gpu = True
20
+ device = torch.device('cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
21
+ map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'
22
+
23
+ # Inverse Cooking
24
+ ingrs_vocab = pickle.load(
25
+ hf_hub_download(REPO_ID, 'data/ingr_vocab.pkl', HF_TOKEN), 'rb'
26
+ )
27
+ vocab = pickle.load(
28
+ hf_hub_download(REPO_ID, 'data/instr_vocab.pkl', HF_TOKEN), 'rb'
29
+ )
30
+
31
+ ingr_vocab_size = len(ingrs_vocab)
32
+ instrs_vocab_size = len(vocab)
33
+
34
+ # Hardcoded args
35
+ args = Namespace(
36
+ aux_data_dir='../data', batch_size=128, beam=-1, crop_size=224,
37
+ decay_lr=True, dropout_decoder_i=0.3, dropout_decoder_r=0.3,
38
+ dropout_encoder=0.3, embed_size=512, es_metric='loss',
39
+ eval_split='val', finetune_after=-1, get_perplexity=False,
40
+ greedy=False, image_model='resnet50', image_size=256,
41
+ ingrs_only=True, label_smoothing_ingr=0.1, learning_rate=0.001,
42
+ log_step=10, log_term=False, loss_weight=[1.0, 0.0, 0.0, 0.0],
43
+ lr_decay_every=1, lr_decay_rate=0.99, max_eval=4096, maxnumims=5,
44
+ maxnuminstrs=10, maxnumlabels=20, maxseqlen=15, model_name='model',
45
+ n_att=8, n_att_ingrs=4, num_epochs=400, num_workers=8, numgens=3,
46
+ patience=50, project_name='inversecooking',
47
+ recipe1m_dir='path/to/recipe1m', recipe_only=False, resume=False,
48
+ save_dir='path/to/save/models', scale_learning_rate_cnn=0.01,
49
+ suff='', temperature=1.0, tensorboard=True, transf_layers=16,
50
+ transf_layers_ingrs=4, transfer_from='', use_lmdb=True,
51
+ use_true_ingrs=False, weight_decay=0.0
52
+ )
53
+ args.maxseqlen = 15
54
+ args.ingrs_only = False
55
+
56
+ # Load the trained model parameters
57
+ model = get_model(args, ingr_vocab_size, instrs_vocab_size)
58
+ model.load_state_dict(torch.load(
59
+ hf_hub_download(REPO_ID, 'data/modelbest.ckpt', HF_TOKEN), map_location=map_loc)
60
+ )
61
+ model = model.to(device)
62
+ model.eval()
63
+ model.ingrs_only = False
64
+ model.recipe_only = False
65
+
66
+ transform_list = []
67
+ transform_list.append(transforms.Resize(256))
68
+ transform_list.append(transforms.CenterCrop(224))
69
+ transform_list.append(transforms.ToTensor())
70
+ transform_list.append(transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
71
+ transform = transforms.Compose(transform_list)
72
+
73
+
74
+ greedy = [True, False, False, False]
75
+ beam = [-1, -1, -1, -1]
76
+ temperature = 1.0
77
+ numgens = 1
78
+
79
+ # StableDiffusion
80
+ pipe = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4').to('cuda')
81
+
82
+ def generate_image(input_img):
83
+
84
+ # Inverse Cooking
85
+ image_tensor = transform(input_img).unsqueeze(0).to(device)
86
+
87
+ for i in range(numgens):
88
+ with torch.no_grad():
89
+ outputs = model.sample(image_tensor, greedy=greedy[i],
90
+ temperature=temperature, beam=beam[i], true_ingrs=None)
91
+
92
+ ingr_ids = outputs['ingr_ids'].cpu().numpy()
93
+ recipe_ids = outputs['recipe_ids'].cpu().numpy()
94
+
95
+ outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)
96
+
97
+ recipe_name = outs['title']
98
+ ingredients = outs['ingrs'] # ingredient list
99
+
100
+ # Create hardcoded StableDiffusion prompt
101
+ ingredients = ', '.join(ingredients)
102
+ prompt = "Fancy food plating of " + recipe_name + " with ingredients " + ingredients
103
+ print(prompt)
104
+
105
+ # {"prompt": prompt, "ingredients": ingredients, "ingr_ids": ingr_ids}
106
+
107
+ # StableDiffusion
108
+ new_image = pipe(prompt).images[0]
109
+ return new_image
110
+
111
+ with gr.Blocks() as demo:
112
+ with gr.Row():
113
+ with gr.Column(scale=1):
114
+ gr.Image("https://www.ocf.berkeley.edu/~launchpad/media/uploads/project_logos/414478903_2298162417059609_260250523028403756_n_yt9pGFm.png", elem_id="logo-img", show_label=False, show_share_button=False, show_download_button=False)
115
+
116
+ with gr.Column(scale=3):
117
+ gr.Markdown("""Lunchpad is a [Launchpad](https://launchpad.studentorg.berkeley.edu/) project (Spring 2023) that transforms pictures of food to fancy plated versions through a novel transformer architecture and latent diffusion models.
118
+ <br/><br/>
119
+ **Model**: [Inverse Cooking](https://arxiv.org/abs/1812.06164), [Stable-Diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4)
120
+ <br/>
121
+ **Developed by**: Sebastian Zhao, Annabelle Park, Nikhil Pitta, Tanush Talati, Rahul Vijay, Jade Wang, Tony Xin
122
+ """
123
+ )
124
+ with gr.Row():
125
+ gr.Interface(generate_image, gr.Image(), "image")
126
+
127
+ if __name__ == '__main__':
128
+ demo.launch()
requirements.txt CHANGED
@@ -1,11 +1,6 @@
1
  numpy
2
- scipy
3
- matplotlib
4
- torch==0.4.1
5
- torchvision==0.2.1
6
- nltk
7
- Pillow
8
- tqdm
9
- lmdb
10
- tensorflow
11
- tensorboardX
 
1
  numpy
2
+ torch
3
+ torchvision
4
+ diffusers
5
+ transformers
6
+ tokenizers