import subprocess import jinja2 import gradio subprocess.run( ["curl", "--output", "checkpoint.pkl", "https://storage.googleapis.com/ithaca-resources/models/checkpoint_v1.pkl"]) # Copyright 2021 the Ithaca Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Example for running inference. See also colab.""" import functools import pickle from ithaca.eval import inference from ithaca.models.model import Model from ithaca.util.alphabet import GreekAlphabet import jax def get_subregion_name(id, region_map): return region_map['sub']['names_inv'][region_map['sub']['ids_inv'][id]] def load_checkpoint(path): """Loads a checkpoint pickle. Args: path: path to checkpoint pickle Returns: a model config dictionary (arguments to the model's constructor), a dict of dicts containing region mapping information, a GreekAlphabet instance with indices and words populated from the checkpoint, a dict of Jax arrays `params`, and a `forward` function. """ # Pickled checkpoint dict containing params and various config: with open(path, 'rb') as f: checkpoint = pickle.load(f) # We reconstruct the model using the same arguments as during training, which # are saved as a dict in the "model_config" key, and construct a `forward` # function of the form required by attribute() and restore(). params = jax.device_put(checkpoint['params']) model = Model(**checkpoint['model_config']) forward = functools.partial(model.apply, params) # Contains the mapping between region IDs and names: region_map = checkpoint['region_map'] # Use vocabulary mapping from the checkpoint, the rest of the values in the # class are fixed and constant e.g. the padding symbol alphabet = GreekAlphabet() alphabet.idx2word = checkpoint['alphabet']['idx2word'] alphabet.word2idx = checkpoint['alphabet']['word2idx'] return checkpoint['model_config'], region_map, alphabet, params, forward def main(text): restore_template = jinja2.Template("""
Input text: | {% for char in restoration_results.input_text -%} {%- if loop.index0 in prediction_idx -%} {{char}} {%- else -%} {{char}} {%- endif -%} {%- endfor %} | |
Hypothesis {{ loop.index }}: | {{ "%.1f%%"|format(100 * pred.score) }} | {% for char in pred.text -%} {%- if loop.index0 in prediction_idx -%} {{char}} {%- else -%} {{char}} {%- endif -%} {%- endfor %} |