Spaces:
Running
Running
# app.py | |
# MIT License | |
# | |
# Copyright (c) 2024 englissi | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
import os | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
import gradio as gr | |
from nltk.tokenize import sent_tokenize | |
from difflib import SequenceMatcher | |
# Ensure the necessary NLTK data is downloaded | |
os.system('python download.py') | |
# Load a pre-trained T5 model specifically fine-tuned for grammar correction | |
tokenizer = T5Tokenizer.from_pretrained("prithivida/grammar_error_correcter_v1") | |
model = T5ForConditionalGeneration.from_pretrained("prithivida/grammar_error_correcter_v1") | |
# Function to perform grammar correction | |
def grammar_check(text): | |
sentences = sent_tokenize(text) | |
corrected_sentences = [] | |
for sentence in sentences: | |
input_text = f"gec: {sentence}" | |
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True) | |
outputs = model.generate(input_ids, max_length=512, num_beams=4, early_stopping=True) | |
corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
corrected_sentences.append(corrected_sentence) | |
# Function to underline and color revised parts | |
def underline_and_color_revisions(original, corrected): | |
diff = SequenceMatcher(None, original.split(), corrected.split()) | |
result = [] | |
for tag, i1, i2, j1, j2 in diff.get_opcodes(): | |
if tag == 'insert': | |
result.append(f"<u style='color:red;'>{' '.join(corrected.split()[j1:j2])}</u>") | |
elif tag == 'replace': | |
result.append(f"<u style='color:red;'>{' '.join(corrected.split()[j1:j2])}</u>") | |
elif tag == 'equal': | |
result.append(' '.join(original.split()[i1:i2])) | |
return " ".join(result) | |
corrected_text = " ".join( | |
underline_and_color_revisions(orig, corr) for orig, corr in zip(sentences, corrected_sentences) | |
) | |
return corrected_text | |
# Create Gradio interface with a writing prompt | |
interface = gr.Interface( | |
fn=grammar_check, | |
inputs="text", | |
outputs="html", # Output type is HTML | |
title="Grammar Checker", | |
description=( | |
"Enter text to check for grammar mistakes.\n\n" | |
"Writing Prompt:\n" | |
"In the story, Alex and his friends discovered an ancient treasure in Whispering Hollow and decided to donate the artifacts to the local museum.\n\n" | |
"In the past, did you have a similar experience where you found something valuable or interesting? Tell the story. Describe what you found, what you did with it, and how you felt about your decision.\n\n" | |
"Remember to use past tense in your writing.\n\n" | |
"<b>A student's sample answer:</b>\n" | |
"<blockquote>When I was 10, I find an old coin in my backyard. I kept it for a while and shows it to my friends. They was impressed and say it might be valuable. Later, I take it to a local antique shop, and the owner told me it was very old. I decided to give it to the museum in my town. The museum was happy and put it on display. I feel proud of my decision.<br><br><i>Copy and paste to try.</i></blockquote>" | |
) | |
) | |
# Launch the interface | |
interface.launch() | |