Spaces:

harir
/

Review-Toxicity-Checker

File size: 7,009 Bytes

import parser
import requests
import re

def zephyr_score(sentence):
    prompt = f"""<|user|> 
You are an assistant helping with paper reviews. 
You will be tasked to classify sentences as 'J' or 'V'

'J' is positive or 'J' is encouraging. 
'J' has a neutral tone or 'J' is professional. 
'V' is overly blunt or 'V' contains excessive negativity and no constructive feedback.
'V' contains an accusatory tone or 'V' contains sweeping generalizations or 'V' contains personal attacks. 

Text: "{sentence}"

Please classify this text as either 'J' or 'V'. Only output 'J' or 'V' with no additional explanation.<|endoftext|>
<|assistant|>
"""
    return prompt

def zephyr_revise(sentence):
    prompt = f"""<|user|> 
You are an assistant that helps users revise Paper Reviews.
Paper reviews exist to provide authors of academic research papers constructive critism.

This is text found in a review.
This text was classified as 'toxic':

Text: "{sentence}"

Please revise this text such that it maintains the criticism in the original text and delivers it in a friendly but professional manner. Make minimal changes to the original text.<|endoftext|>
<|assistant|>
"""
    return prompt

def mistral_score(sentence):
    prompt = f"""<s>[INST]
You are an assistant helping with paper reviews. 
You will be tasked to classify sentences as 'J' or 'V'

'J' is positive or 'J' is encouraging. 
'J' has a neutral tone or 'J' is professional. 
'V' is overly blunt or 'V' contains excessive negativity and no constructive feedback.
'V' contains an accusatory tone or 'V' contains sweeping generalizations or 'V' contains personal attacks. 

Text: "{sentence}"

Please classify this text as either 'J' or 'V'. Only output 'J' or 'V' with no additional explanation. [/INST]"""
    return prompt

def mistral_revise(sentence):
    prompt = f"""<s>[INST] 
You are an assistant that helps users revise Paper Reviews.
Paper reviews exist to provide authors of academic research papers constructive critism.

This is text found in a review.
This text was classified as 'toxic':

Text: "{sentence}"

Please revise this text such that it maintains the criticism in the original text and delivers it in a friendly but professional manner. Make minimal changes to the original text. [/INST] Revised Text: """
    return prompt

def phi_score(sentence):
    prompt = f"""<|system|>
You are an assistant helping with paper reviews. 
You will be tasked to classify sentences as 'J' or 'V'

'J' is positive or 'J' is encouraging. 
'J' has a neutral tone or 'J' is not discouraging and not encouraging.
'V' is overly blunt or 'V' contains excessive negativity and no constructive feedback.
'V' contains an accusatory tone or 'V' contains sweeping generalizations or 'V' contains personal attacks.<|end|> 
<|user|>
Text: "{sentence}"

Please classify this text as either 'J' or 'V'. Only output 'J' or 'V' with no additional explanation.<|end|>
<|assistant|>
"""
    return prompt

def phi_revise(sentence):
    prompt = f"""<|system|>
You are an assistant that helps users revise Paper Reviews.
Paper reviews exist to provide authors of academic research papers constructive critism.<|end|>
<|user|>
This is text found in a review.
This text was classified as 'toxic'

Text: "{sentence}"

Please revise this text such that it maintains the criticism in the original text and delivers it in a friendly but professional manner. Make minimal changes to the original text.<|end|>
<|assistant|>
"""
    return prompt

def query_model_score(sentence, api_key, model_id, prompt_fun):
    API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
    headers = {"Authorization": f"Bearer {api_key}"}
    prompt = prompt_fun(sentence)
    def query(payload):
        response = requests.post(API_URL, headers=headers, json=payload)
        return response.json()
    parameters = {"max_new_tokens" : 5, "temperature": 0.1, "return_full_text": False}
    options = {"wait_for_model": True}
    data = query({"inputs": f"{prompt}", "parameters": parameters, "options": options})
    score = data[0]['generated_text']
    if 'v' in score.lower():
        return 1
    else:
        return 0
    
def query_model_revise(sentence, api_key, model_id, prompt_fun):
    API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
    headers = {"Authorization": f"Bearer {api_key}"}
    prompt = prompt_fun(sentence)
    def query(payload):
        response = requests.post(API_URL, headers=headers, json=payload)
        return response.json()
    parameters = {"max_new_tokens" : 200, "temperature": 0.1, "return_full_text": False}
    options = {"wait_for_model": True}
    data = query({"inputs": f"{prompt}", "parameters": parameters, "options": options})
    revision = data[0]['generated_text']
    return revision
    
def extract_quotes(text):
    pattern = r'"([^"]*)"'
    matches = re.findall(pattern, text)
    if matches:
        return ' '.join(matches)
    else:
        return text
        
def revise_review(review, api_key, model_id, highlight_color):
    result = {
        "success": False,
        "data": {
            "revision": "",
            "score": "",
            "sentence_count": "",
            "revised_sentences": ""
        },
        "message": ""
    }

    if 'zephyr' in model_id.lower():
        revision_prompt = zephyr_revise
        score_prompt = zephyr_score
    elif 'mistral' in model_id.lower():
        revision_prompt = mistral_revise
        score_prompt = mistral_score
    elif 'phi' in model_id.lower():
        revision_prompt = phi_revise
        score_prompt = phi_score

    try:
        review = review.replace('"', "'")
        sentences = parser.parse_sentences(review)
        review_score = 0
        revision_count = 0
        review_revision = ""
        for sentence in sentences:
            if len(sentence) > 10:
                score = query_model_score(sentence, api_key, model_id, score_prompt)
                if score == 0:
                    review_revision += " " + sentence
                else:
                    review_score = 1
                    revision_count +=1
                    revision = query_model_revise(sentence, api_key, model_id, revision_prompt)
                    revision = extract_quotes(revision)
                    revision = revision.strip().strip('"')
                    review_revision += f" <div style='background-color: {highlight_color}; display: inline;'>{revision}</div>"
            else:
                review_revision += " " + sentence
        # end revision/prepare return json

        result["success"] = True
        result["message"] = "Review successfully revised!"
        result["data"]["revision"] = review_revision
        result["data"]["score"] = review_score
        result["data"]["sentence_count"] = sum(1 for sentence in sentences if len(sentence) > 20)
        result["data"]["revised_sentences"] = revision_count
    except Exception as e:
        result["message"] = str(e)

    return result