Spaces:

polygraf-ai
/

copyright_checker

Running

File size: 4,450 Bytes

883ea44
 
9df8406
f45e494
dd9b08a
883ea44
9df8406
 
 
 
 
 
 
 
a1e484f
9df8406
 
dd9b08a
9df8406
a1e484f
 
 
 
 
9df8406
dd9b08a
883ea44
 
28cd001
883ea44
dd9b08a
38d87ea
 
883ea44
38d87ea
 
 
 
 
 
 
 
883ea44
dd9b08a
 
883ea44
 
 
 
38d87ea
 
 
 
 
 
 
 
 
 
 
 
 
9df8406
 
 
 
883ea44
dd9b08a
 
 
 
 
 
 
 
 
 
 
3c8629c
 
 
 
 
 
 
 
 
dd9b08a
3c8629c
 
 
 
 
 
 
 
dd9b08a

from lime.lime_text import LimeTextExplainer
from nltk.tokenize import sent_tokenize
from predictors import predict_for_explainanility
from predictors import update, correct_text, split_text
from predictors import split_text_allow_complete_sentences_nltk, get_token_length

def explainer(text, model_type):
    def predictor_wrapper(text):
        return predict_for_explainanility(text=text, model_type=model_type)

    class_names = ["negative", "positive"]
    explainer_ = LimeTextExplainer(
        class_names=class_names, split_expression=sent_tokenize
    )
    sentences = [sent for sent in sent_tokenize(text)]
    num_sentences = len(sentences)
    exp = explainer_.explain_instance(
        text, predictor_wrapper, num_features=num_sentences, num_samples=100
    )
    weights_mapping = exp.as_map()[1]
    sentences_weights = {sentence: 0 for sentence in sentences}
    for idx, weight in weights_mapping:
        if 0 <= idx < len(sentences):
            sentences_weights[sentences[idx]] = weight
    print(sentences_weights, model_type)
    return sentences_weights, sentences, exp


def analyze_and_highlight(text, model_type):
    highlighted_text = ""
    sentences_weights, sentences, _ = explainer(text, model_type)
    positive_weights = [weight for weight in sentences_weights.values() if weight >= 0]
    negative_weights = [weight for weight in sentences_weights.values() if weight < 0]

    smoothing_factor = 0.001  # we do this cos to avoid all white colors
    min_positive_weight = min(positive_weights) if positive_weights else 0
    max_positive_weight = max(positive_weights) if positive_weights else 0
    min_negative_weight = min(negative_weights) if negative_weights else 0
    max_negative_weight = max(negative_weights) if negative_weights else 0

    max_positive_weight += smoothing_factor
    min_negative_weight -= smoothing_factor

    for sentence in sentences:
        weight = sentences_weights[sentence]
        sentence = sentence.strip()
        if not sentence:
            continue

        if weight >= 0 and max_positive_weight != min_positive_weight:
            normalized_weight = (weight - min_positive_weight + smoothing_factor) / (
                max_positive_weight - min_positive_weight
            )
            color = f"rgb(255, {int(255 * (1 - normalized_weight))}, {int(255 * (1 - normalized_weight))})"
        elif weight < 0 and min_negative_weight != max_negative_weight:
            normalized_weight = (weight - max_negative_weight - smoothing_factor) / (
                min_negative_weight - max_negative_weight
            )
            color = f"rgb({int(255 * (1 - normalized_weight))}, 255, {int(255 * (1 - normalized_weight))})"
        else:
            color = "rgb(255, 255, 255)"  # when no range

        highlighted_sentence = (
            f'<span style="background-color: {color}; color: black;">{sentence}</span> '
        )
        highlighted_text += highlighted_sentence

    return highlighted_text

def segmented_higlighter(text, bias_buster_selected, model_type):
    if bias_buster_selected:
        text = update(text)
    result = ""
    segmented_results = split_text_allow_complete_sentences_nltk(text)
    for segment in segmented_results:
        chunk = analyze_and_highlight(segment, model_type)
        result = result + " " + chunk
    print(result)
    if model_type == "bc":
        gradient_labels = ["HUMAN", "AI"]
    elif model_type == "quillbot":
        gradient_labels = ["ORIGINAL", "HUMANIZED"]
    else:
        raise ValueError(f"Invalid model type: {model_type}")

    highlighted_text = (
        "<div>"
        + result
        + "<div style='margin-top: 20px; text-align: center;'>"
        + "<div style='position: relative; display: inline-block; width: 60%; height: 20px; background: linear-gradient(to right, #00FF00, #FFFFFF, #FF0000); font-family: \"Segoe UI\", Tahoma, Geneva, Verdana, sans-serif; font-size: 10px; font-weight: 600; color: #222; border-radius: 10px; box-shadow: 0px 2px 5px rgba(0, 0, 0, 0.1);'>"
        + f"<span style='position: absolute; left: 5px; top: 50%; transform: translateY(-50%); color: #000; font-weight: 600;'>{gradient_labels[0]}</span>"
        + f"<span style='position: absolute; right: 5px; top: 50%; transform: translateY(-50%); color: #000; font-weight: 600;'>{gradient_labels[1]}</span>"
        + "</div>"
        + "</div>"
        + "</div>"
    )
    return highlighted_text