Addaci commited on
Commit
bdbc86a
·
verified ·
1 Parent(s): 5ba8131

Update app.py

Browse files

Added new shortened prompt provided by Gavin

Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -9,7 +9,7 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
  # Define the correction function
10
  def correct_transcription(input_text):
11
  # Add task instruction to the input
12
- prompt = "Correct the following Yiddish transcription to groundtruth Yiddish in Hebrew script:\n"
13
  input_ids = tokenizer(prompt + input_text, return_tensors="pt", truncation=True).input_ids
14
  output_ids = model.generate(input_ids, max_length=512)
15
  corrected_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
9
  # Define the correction function
10
  def correct_transcription(input_text):
11
  # Add task instruction to the input
12
+ prompt = "Correct the following raw Yiddish HTR output from Transkribus into authentic Yiddish correspondence. Keep period-specific spellings, regional variations, and Hebrew-origin words as they were historically used. Fix only clear OCR errors, such as broken words or nonsensical combinations. Retain proper nouns, place names, abbreviations, and informal language. Maintain original line breaks and formatting:\n"
13
  input_ids = tokenizer(prompt + input_text, return_tensors="pt", truncation=True).input_ids
14
  output_ids = model.generate(input_ids, max_length=512)
15
  corrected_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)