from __future__ import print_function from collections import defaultdict import matplotlib.pyplot as plt import numpy as np from scipy.signal import savgol_filter from scipy.interpolate import interp1d alphabet = [ '\x00', ' ', '!', '"', '#', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'Y', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] alphabet_ord = list(map(ord, alphabet)) alpha_to_num = defaultdict(int, list(map(reversed, enumerate(alphabet)))) num_to_alpha = dict(enumerate(alphabet_ord)) MAX_STROKE_LEN = 1200 MAX_CHAR_LEN = 75 def align(coords): """ corrects for global slant/offset in handwriting strokes """ coords = np.copy(coords) X, Y = coords[:, 0].reshape(-1, 1), coords[:, 1].reshape(-1, 1) X = np.concatenate([np.ones([X.shape[0], 1]), X], axis=1) offset, slope = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y).squeeze() theta = np.arctan(slope) rotation_matrix = np.array( [[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]] ) coords[:, :2] = np.dot(coords[:, :2], rotation_matrix) - offset return coords def skew(coords, degrees): """ skews strokes by given degrees """ coords = np.copy(coords) theta = degrees * np.pi/180 A = np.array([[np.cos(-theta), 0], [np.sin(-theta), 1]]) coords[:, :2] = np.dot(coords[:, :2], A) return coords def stretch(coords, x_factor, y_factor): """ stretches strokes along x and y axis """ coords = np.copy(coords) coords[:, :2] *= np.array([x_factor, y_factor]) return coords def add_noise(coords, scale): """ adds gaussian noise to strokes """ coords = np.copy(coords) coords[1:, :2] += np.random.normal(loc=0.0, scale=scale, size=coords[1:, :2].shape) return coords def encode_ascii(ascii_string): """ encodes ascii string to array of ints """ return np.array(list(map(lambda x: alpha_to_num[x], ascii_string)) + [0]) def denoise(coords): """ smoothing filter to mitigate some artifacts of the data collection """ coords = np.split(coords, np.where(coords[:, 2] == 1)[0] + 1, axis=0) new_coords = [] for stroke in coords: if len(stroke) != 0: x_new = savgol_filter(stroke[:, 0], 7, 3, mode='nearest') y_new = savgol_filter(stroke[:, 1], 7, 3, mode='nearest') xy_coords = np.hstack([x_new.reshape(-1, 1), y_new.reshape(-1, 1)]) stroke = np.concatenate([xy_coords, stroke[:, 2].reshape(-1, 1)], axis=1) new_coords.append(stroke) coords = np.vstack(new_coords) return coords def interpolate(coords, factor=2): """ interpolates strokes using cubic spline """ coords = np.split(coords, np.where(coords[:, 2] == 1)[0] + 1, axis=0) new_coords = [] for stroke in coords: if len(stroke) == 0: continue xy_coords = stroke[:, :2] if len(stroke) > 3: f_x = interp1d(np.arange(len(stroke)), stroke[:, 0], kind='cubic') f_y = interp1d(np.arange(len(stroke)), stroke[:, 1], kind='cubic') xx = np.linspace(0, len(stroke) - 1, factor*(len(stroke))) yy = np.linspace(0, len(stroke) - 1, factor*(len(stroke))) x_new = f_x(xx) y_new = f_y(yy) xy_coords = np.hstack([x_new.reshape(-1, 1), y_new.reshape(-1, 1)]) stroke_eos = np.zeros([len(xy_coords), 1]) stroke_eos[-1] = 1.0 stroke = np.concatenate([xy_coords, stroke_eos], axis=1) new_coords.append(stroke) coords = np.vstack(new_coords) return coords def normalize(offsets): """ normalizes strokes to median unit norm """ offsets = np.copy(offsets) offsets[:, :2] /= np.median(np.linalg.norm(offsets[:, :2], axis=1)) return offsets def coords_to_offsets(coords): """ convert from coordinates to offsets """ offsets = np.concatenate([coords[1:, :2] - coords[:-1, :2], coords[1:, 2:3]], axis=1) offsets = np.concatenate([np.array([[0, 0, 1]]), offsets], axis=0) return offsets def offsets_to_coords(offsets): """ convert from offsets to coordinates """ return np.concatenate([np.cumsum(offsets[:, :2], axis=0), offsets[:, 2:3]], axis=1) def draw( offsets, ascii_seq=None, align_strokes=True, denoise_strokes=True, interpolation_factor=None, save_file=None ): strokes = offsets_to_coords(offsets) if denoise_strokes: strokes = denoise(strokes) if interpolation_factor is not None: strokes = interpolate(strokes, factor=interpolation_factor) if align_strokes: strokes[:, :2] = align(strokes[:, :2]) fig, ax = plt.subplots(figsize=(12, 3)) stroke = [] for x, y, eos in strokes: stroke.append((x, y)) if eos == 1: coords = zip(*stroke) ax.plot(coords[0], coords[1], 'k') stroke = [] if stroke: coords = zip(*stroke) ax.plot(coords[0], coords[1], 'k') stroke = [] ax.set_xlim(-50, 600) ax.set_ylim(-40, 40) ax.set_aspect('equal') plt.tick_params( axis='both', left='off', top='off', right='off', bottom='off', labelleft='off', labeltop='off', labelright='off', labelbottom='off' ) if ascii_seq is not None: if not isinstance(ascii_seq, str): ascii_seq = ''.join(list(map(chr, ascii_seq))) plt.title(ascii_seq) if save_file is not None: plt.savefig(save_file) print('saved to {}'.format(save_file)) else: plt.show() plt.close('all')