Update app.py
Browse filesAdded new shortened prompt provided by Gavin
app.py
CHANGED
@@ -9,7 +9,7 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
|
9 |
# Define the correction function
|
10 |
def correct_transcription(input_text):
|
11 |
# Add task instruction to the input
|
12 |
-
prompt = "Correct the following Yiddish
|
13 |
input_ids = tokenizer(prompt + input_text, return_tensors="pt", truncation=True).input_ids
|
14 |
output_ids = model.generate(input_ids, max_length=512)
|
15 |
corrected_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
9 |
# Define the correction function
|
10 |
def correct_transcription(input_text):
|
11 |
# Add task instruction to the input
|
12 |
+
prompt = "Correct the following raw Yiddish HTR output from Transkribus into authentic Yiddish correspondence. Keep period-specific spellings, regional variations, and Hebrew-origin words as they were historically used. Fix only clear OCR errors, such as broken words or nonsensical combinations. Retain proper nouns, place names, abbreviations, and informal language. Maintain original line breaks and formatting:\n"
|
13 |
input_ids = tokenizer(prompt + input_text, return_tensors="pt", truncation=True).input_ids
|
14 |
output_ids = model.generate(input_ids, max_length=512)
|
15 |
corrected_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|