import gradio as gr from PIL import Image import numpy as np from io import BytesIO import glob import os import time from data.dataset import load_itw_samples, crop_ import torch import cv2 import os import numpy as np from models.model import TRGAN from params import * from torch import nn from data.dataset import get_transform import pickle from PIL import Image import tqdm import shutil wellcomingMessage = """
🚀 This app is a demo for the ICCV'21 paper "Handwriting Transformer". Visit our github paper for more information - https://github.com/ankanbhunia/Handwriting-Transformers
🚀 You can either choose from an existing style gallery or upload your own handwriting. If you choose to upload, please ensure that you provide a sufficient number of (~15) cropped handwritten word images for the model to work effectively. The demo is made available for research purposes, and any other use is not intended.
""" model_path = 'files/iam_model.pth' batch_size = 1 print ('(1) Loading model...') model = TRGAN(batch_size = batch_size) model.netG.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')) ) print (model_path+' : Model loaded Successfully') model.eval() # Define a function to generate an image based on text and images def generate_image(text,folder, _ch3, images): # Your image generation logic goes here (replace with your actual implementation) # For demonstration purposes, we'll just concatenate the uploaded images horizontally. if images: style_inputs, width_length = load_itw_samples(images) elif folder: style_inputs, width_length = load_itw_samples(folder) else: return None # Load images text = text.replace("\n", "").replace("\t", "") text_encode = [j.encode() for j in text.split(' ')] eval_text_encode, eval_len_text = model.netconverter.encode(text_encode) eval_text_encode = eval_text_encode.to(DEVICE).repeat(batch_size, 1, 1) input_styles, page_val = model._generate_page(style_inputs.to(DEVICE).clone(), width_length, eval_text_encode, eval_len_text, no_concat = True) page_val = crop_(page_val[0]*255) input_styles = crop_(input_styles[0]*255) max_width = max(page_val.shape[1],input_styles.shape[1]) if page_val.shape[1]!=max_width: page_val = np.concatenate([page_val, np.ones((page_val.shape[0],max_width-page_val.shape[1]))*255], 1) else: input_styles = np.concatenate([input_styles, np.ones((input_styles.shape[0],max_width-input_styles.shape[1]))*255], 1) upper_pad = np.ones((45,input_styles.shape[1]))*255 input_styles = np.concatenate([upper_pad, input_styles], 0) page_val = np.concatenate([upper_pad, page_val], 0) page_val = Image.fromarray(page_val).convert('RGB') input_styles = Image.fromarray(input_styles).convert('RGB') return input_styles, page_val # Define Gradio Interface iface = gr.Interface( fn=generate_image, inputs=[ gr.Textbox(value = "In the quiet hum of everyday life, the dance of existence unfolds. Time, an ever-flowing river, carries the stories of triumph and heartache. Each fleeting moment is a brushstroke on the canvas of our memories.",label = "Input text"), gr.Dropdown(value = "files/example_data/style-30", choices=glob.glob('files/example_data/*'), label="Choose from provided writer styles"), gr.Markdown("### OR"), gr.File(label="Upload multiple word images", file_count="multiple") ], outputs=[#gr.Markdown("## Output"), gr.Image(type="pil", label="Style Image"), gr.Image(type="pil", label="Generated Image")], description = wellcomingMessage, thumbnail = "Handwriting Synthesis - Mimic anyone's handwriting!" ) # Launch the Gradio Interface iface.launch(debug=True, share=True)