from typing import Tuple
from app_utils import load_problems, SESSION_ID
from openai import OpenAI
from anthropic import Anthropic
from dotenv import load_dotenv
import os, re
from collections import defaultdict
import json
import logging
from datetime import datetime

# Configure logging
DEBUG = True  # Set to False to disable debug logging

def setup_logging():
    """Configure logging settings"""
    log_filename = f"grading_logs_{datetime.now().strftime('%Y%m%d')}.log"
    
    logging.basicConfig(
        level=logging.DEBUG if DEBUG else logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_filename),
            logging.StreamHandler()
        ]
    )

setup_logging()
logger = logging.getLogger(__name__)

load_dotenv()
LLMs = {
    "DeepInfra": {
        "API_KEY": os.getenv('DEEPINFRA_API_KEY'),
        "BASEURL": "https://api.deepinfra.com/v1/openai",
        "MODEL": "nvidia/Llama-3.1-Nemotron-70B-Instruct",
    },
    "ClaudeAI": {
        "API_KEY": os.getenv('ClaudeAI_API_KEY'),
        "MODEL": "claude-3-5-sonnet-latest"
    }
}

PROVIDER = "ClaudeAI" # "DeepInfra"  

def get_client():
    """Create and return appropriate client based on provider"""
    if PROVIDER == "DeepInfra":
        return OpenAI(
            api_key=LLMs[PROVIDER]["API_KEY"],
            base_url=LLMs[PROVIDER]["BASEURL"],
        )
    elif PROVIDER == "ClaudeAI":
        return Anthropic(api_key=LLMs[PROVIDER]["API_KEY"])
    else:
        raise ValueError("Invalid LLM provider selected")


def call_llm(client, prompt):
    """Make API call to the selected LLM provider"""
    if PROVIDER == "DeepInfra":
        chat_completion = client.chat.completions.create(
            model=LLMs[PROVIDER]["MODEL"],
            messages=[{"role": "user", "content": prompt}],
        )
        return chat_completion.choices[0].message.content
    else:  # ClaudeAI
        message = client.messages.create(
            model=LLMs[PROVIDER]["MODEL"],
            max_tokens=1000,
            temperature=0,
            messages=[{
                "role": "user",
                "content": prompt
            }]
        )
        return message.content[0].text if isinstance(message.content, list) else message.content


Grading_rubric = defaultdict(lambda: None)


def LLM_output_to_dict(LLM_output):
    # Define the regex patterns for each field
    score_pattern = r"##Score:\s*(\d+)"
    feedback_pattern = r"##Feedback:\s*(.*?)\s*##Rubric:"
    rubric_pattern = r"##Rubric:\s*(.*?)\s*</output>"

    # Extract the values using regex
    score_match = re.search(score_pattern, LLM_output, re.DOTALL)
    feedback_match = re.search(feedback_pattern, LLM_output, re.DOTALL)
    rubric_match = re.search(rubric_pattern, LLM_output, re.DOTALL)

    # Get the matched values or set to empty string if not found
    score = int(score_match.group(1)) if score_match else 0
    feedback = feedback_match.group(1).strip() if feedback_match else ""
    rubric = rubric_match.group(1).strip() if rubric_match else ""

    # Return the dictionary
    return {
        "Score": score,
        "Feedback": feedback,
        "Rubric": rubric
    }
 
def grade_submission(student_code: str, problem_id: str, 
    problem_description: str,
    student_answer: str, correct_answer: str) -> Tuple[float, str]:
    """Call LLM model to grade student submission. Returns score and feedback."""
    
    client = get_client()
    rubric = Grading_rubric[problem_id]

    prompt = f"""You are a programming assignment grader.

    Evaluate the student's answer based on the Problem description, Grading rubric and correct answer and provide a score and brief feedback. Note if a problem asks an essay question or short answer question, student's answer may be in comments like "# answer" or in tripple quotes.

    If Grading rubric is not provided, you should develop your own rubric based on the problem description and correct answer. In rubric do not include readability and structure criteria. Please be tolerant for minor syntax errors (up to 1 point deduction for less than 2 minor syntax errors). Please focus on the logic of the code.
    
    ##Problem ID: {problem_id}

    ##Problem Description: {problem_description}

    ##Grading rubric: {rubric}

    ##Student Code:
    {student_code}
    
    ##Student Answer: {student_answer}

    ##Correct Answer: {correct_answer}
    
    Provide your response delimited in <output>  </output>   with the 3 fields below and nothing else.  For "Rubric" key, supply your grading rubric if you did not receive one, otherwise leave it as empty .  Here are 3 fields you need to provide:

    <output>   
    ##Score: (int) number between 0 and highest score specified in problem description

    ##Feedback: (str) Your code is mostly correct, but ... (your brief feedback in markdown only if student's answer is not 100% correct.)

    ##Rubric: (str) 
    - 1. first (2 pts)
    - 2. second (3 pts)
    (this is your grading rubric for this problem in markdown format with numbered bullet points if you did not receive one, otherwise leave it as empty.)
    </output>
    """
    logger.info(f"Problem ID: {problem_id}")

    if DEBUG:
        logger.debug("Prompt sent to LLM:")
        logger.debug(prompt)

    try:
        response = call_llm(client, prompt)
        
        if DEBUG:
            logger.debug("Raw LLM Response:")
        logger.info(response)
        
        # Remove markdown code blocks if present
        response = response.strip()

        try:
            result = LLM_output_to_dict(response)
            score = float(result.get('Score', 0.0))
            feedback = str(result.get('Feedback', ""))
            rubric = str(result.get('Rubric', ""))
            
            if DEBUG:
                logger.debug(f"Parsed Results - Score: {score}, Feedback: {feedback}")
                if rubric and rubric != "None":
                    logger.debug(f"New rubric created for problem {problem_id}")
            
            if rubric and rubric != "None":
                Grading_rubric[problem_id] = rubric
            
            return score, feedback, rubric
        except Exception as e:
            logger.error("Failed to parse LLM response")
            return 0.0, "Failed to parse grading", rubric
        
    except Exception as e:
        logger.error(f"Error during grading: {str(e)}")
        return 0.0, f"Grading error: {str(e)}", rubric