PROMPT = """
You are a highly advanced language model capable of identifying and flagging text that may contain inappropriate, harmful, or not safe for work (NSFW) content. Your objective is to analyze the provided text in {lang} and determine if it includes any content falling into these categories. NSFW or harmful content encompasses explicit language, sexual content, violent or graphic descriptions, hate speech, harassment, threats, and any other material deemed inappropriate or dangerous.

Please analyze the following text and provide your analysis in JSON format:

Text: {text}

Response:
- If the text is appropriate and does not contain any harmful or NSFW content, return "Safe": "True" in json.
- If the text contains explicit, sexual, violent, threatening, or abusive content, return "Safe": "False" in json.
"""
def get_prompt(prompt):
    return "[INS]"+prompt+"[/INS]"

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

import gradio as gr
from langchain_huggingface import HuggingFaceEndpoint
from langdetect import detect
CONTEXT_WINDOW = 50_000
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser


llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-classification",
    max_new_tokens=4096,
    temperature=0.5,
    do_sample=False,
)
            

def detect_nsfw(text):
    lang = detect(text)
    
    # Define prompt template
    prompt_detect = PromptTemplate(
        template=PROMPT,
        input_variables=["lang", "text"]
    )

    text = text[:CONTEXT_WINDOW]  # Limit text to first 50,000 characters
    
    detect_chain = prompt_detect | llm | JsonOutputParser()
    try:
        detection = detect_chain.invoke({"lang": lang, "text": text})
        if detection.get("Safe") == "True":
            result_text = "The text is safe and does not contain explicit content."
        elif detection.get("Safe") == "False":
            result_text = "The text contains explicit content."
        else:
            result_text = "Detection result is unexpected."
    except Exception as e:
        result_text = f"Error occurred during detection: {str(e)}"
        
    short_answer = PromptTemplate(
        template="""Your task is to understand the response and give a binary answer like Profanity or Not Profanity. You dont need to explain.
        Your response MUST be either Profanity or Not Profanity.
        Read that text:
        {text}
        
        Answer:
        """,
        input_variables=["text"]
    )
    answer_chain = short_answer | llm 

    return answer_chain.invoke({"text": result_text})

# Define Gradio interface
gr.Interface(
    fn=detect_nsfw,
    inputs=gr.Textbox(label="Enter text to analyze", placeholder="Paste your text here..."),
    outputs=gr.Textbox(label="Detection", placeholder="Detection results will appear here...", lines=10),
    title="NSFW Content Detection",
    description="Detect explicit or harmful content in text.",
).launch()