Spaces:

johnlockejrr
/

PyLaia-mcdonald_v2

Sleeping

App Files Files Community

johnlockejrr commited on Jun 25

Commit

140a773

•

1 Parent(s): 1e236cb

Add application file

Browse files

Files changed (4) hide show

README.md +6 -4
app.py +128 -0
examples/default.jpg +0 -0
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -1,13 +1,15 @@
 ---
-title: PyLaia-mcdonald V2
-emoji: 👁
 colorFrom: green
-colorTo: indigo
 sdk: gradio
-sdk_version: 4.36.1
 app_file: app.py
 pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: PyLaia
+emoji: 🐢
 colorFrom: green
+colorTo: purple
 sdk: gradio
+sdk_version: 4.13.0
 app_file: app.py
 pinned: false
 license: mit
+models:
+- Teklia/pylaia-rimes
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from uuid import uuid4
+import gradio as gr
+from laia.scripts.htr.decode_ctc import run as decode
+from laia.common.arguments import CommonArgs, DataArgs, TrainerArgs, DecodeArgs
+import sys
+from tempfile import NamedTemporaryFile, mkdtemp
+from pathlib import Path
+from contextlib import redirect_stdout
+import re
+from huggingface_hub import snapshot_download
+images = Path(mkdtemp())
+IMAGE_ID_PATTERN = r"(?P<image_id>[-a-z0-9]{36})"
+CONFIDENCE_PATTERN = r"(?P<confidence>[0-9.]+)"  # For line
+TEXT_PATTERN = r"\s*(?P<text>.*)\s*"
+LINE_PREDICTION = re.compile(rf"{IMAGE_ID_PATTERN} {CONFIDENCE_PATTERN} {TEXT_PATTERN}")
+models_name = ["johnlockejrr/pylaia-mcdonald_v2"]
+MODELS = {}
+DEFAULT_HEIGHT = 128
+def get_width(image, height=DEFAULT_HEIGHT):
+    aspect_ratio = image.width / image.height
+    return height * aspect_ratio
+def load_model(model_name):
+    if model_name not in MODELS:
+        MODELS[model_name] = Path(snapshot_download(model_name))
+    return MODELS[model_name]
+def predict(model_name, input_img):
+    model_dir = load_model(model_name)
+    temperature = 2.0
+    batch_size = 1
+    weights_path = model_dir / "weights.ckpt"
+    syms_path = model_dir / "syms.txt"
+    language_model_params = {"language_model_weight": 1.0}
+    use_language_model = (model_dir / "tokens.txt").exists()
+    if use_language_model:
+        language_model_params.update(
+            {
+                "language_model_path": str(model_dir / "language_model.arpa.gz"),
+                "lexicon_path": str(model_dir / "lexicon.txt"),
+                "tokens_path": str(model_dir / "tokens.txt"),
+            }
+        )
+    common_args = CommonArgs(
+        checkpoint=str(weights_path.relative_to(model_dir)),
+        train_path=str(model_dir),
+        experiment_dirname="",
+    )
+    data_args = DataArgs(batch_size=batch_size, color_mode="L")
+    trainer_args = TrainerArgs(
+        # Disable progress bar else it messes with frontend display
+        progress_bar_refresh_rate=0
+    )
+    decode_args = DecodeArgs(
+        include_img_ids=True,
+        join_string="",
+        convert_spaces=True,
+        print_line_confidence_scores=True,
+        print_word_confidence_scores=False,
+        temperature=temperature,
+        use_language_model=use_language_model,
+        **language_model_params,
+    )
+    with NamedTemporaryFile() as pred_stdout, NamedTemporaryFile() as img_list:
+        image_id = uuid4()
+        # Resize image to 128 if bigger/smaller
+        input_img = input_img.resize((int(get_width(input_img)), DEFAULT_HEIGHT))
+        input_img.save(str(images / f"{image_id}.jpg"))
+        # Export image list
+        Path(img_list.name).write_text("\n".join([str(image_id)]))
+        # Capture stdout as that's where PyLaia outputs predictions
+        with redirect_stdout(open(pred_stdout.name, mode="w")):
+            decode(
+                syms=str(syms_path),
+                img_list=img_list.name,
+                img_dirs=[str(images)],
+                common=common_args,
+                data=data_args,
+                trainer=trainer_args,
+                decode=decode_args,
+                num_workers=1,
+            )
+            # Flush stdout to avoid output buffering
+            sys.stdout.flush()
+        predictions = Path(pred_stdout.name).read_text().strip().splitlines()
+    assert len(predictions) == 1
+    _, score, text = LINE_PREDICTION.match(predictions[0]).groups()
+    return input_img, {"text": text, "score": score}
+gradio_app = gr.Interface(
+    predict,
+    inputs=[
+        gr.Dropdown(models_name, value=models_name[0], label="Models"),
+        gr.Image(
+            label="Upload an image of a line",
+            sources=["upload", "clipboard"],
+            type="pil",
+            height=DEFAULT_HEIGHT,
+            width=2000,
+            image_mode="L",
+        ),
+    ],
+    outputs=[
+        gr.Image(label="Processed Image"),
+        gr.JSON(label="Decoded text"),
+    ],
+    examples=[
+        ["johnlockejrr/pylaia-mcdonald_v2", str(filename)]
+        for filename in Path("examples").iterdir()
+    ],
+    title="Decode the transcription of an image using a PyLaia model",
+    cache_examples=True,
+)
+if __name__ == "__main__":
+    gradio_app.launch()

examples/default.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ pylaia==1.1.0