ai_audit_analysis_categories = { "AI Audit": [ "sentiment_analysis", "emotion_detection", "political_bias_detection", "stress_level_detection", "empathy_level_assessment", "mood_detection", "toxicity_detection" ], "GDPR": [ "Privacy_Assessment", "Consent_and_Transparency", "Data_Security", "Environmental_Impact"], "Toxicity": [ "Content_Moderation", "Reporting_Mechanism", "Content_Guidelines", "User_Education"], "Legal": [ "Privacy_Policy", "Data_Retention", "Consent_Mechanism"], "Context": [ "Ethical_AI", "Bais_Mitigation", "Fairness_Assestment", "Explainability"], "Governance": [ "Model_development", "Data_Quality", "Bais_Mitigation", "Fairness_Assestment" "Explainability" "User_Input"], "RiskManagement": [ "Corporate_Ethics", "Board_Management", "Stakeholder_Engagement"], "Robustness": [ "System_Reliability", "Quality_Assurance", "Stress_Testing", "Fail_Safe_Procedures"], "Sustainability": [ "Renewable_Resources", "Waste_Reduction", "Energy_Efficiency", "Sustainable_Practices"] } # Define a standard template for prompts STANDARD_PROMPT_TEMPLATE = "You are a data analysis assistant capable of {analysis_type} analysis. {specific_instruction} Respond with your analysis in JSON format. The JSON schema should include '{json_schema}'." def get_system_prompt(analysis_type: str) -> str: specific_instruction = ANALYSIS_TYPES.get(analysis_type, "Perform the analysis as per the specified type.") json_schema = JSON_SCHEMAS.get(analysis_type, {}) json_schema_str = ', '.join([f"'{key}': {value}" for key, value in json_schema.items()]) return (f"You are a data analyst API capable of {analysis_type} analysis. " f"{specific_instruction} Please respond with your analysis directly in JSON format " f"(without using Markdown code blocks or any other formatting). Always include confidence_score:number (0-1) with two decimals for result based on analysis" f"The JSON schema should include: {{{json_schema_str}}}.") ANALYSIS_TYPES = { "sentiment_analysis": "Analyze the sentiment of the provided text. Determine whether the sentiment is positive, negative, or neutral and provide a confidence score.", "emotion_detection": "Detect and identify the primary emotions expressed in the provided text. Provide a score for the intensity of the detected emotion.", "political_bias_detection": "Detect any political bias in the provided text, identifying leaning towards particular ideologies or parties.", "stress_level_detection": "Analyze the text to assess stress levels, identifying triggers and intensity of stress.", "empathy_level_assessment": "Assess the level of empathy expressed in the text, identifying empathetic responses and tendencies.", "mood_detection": "Detect the mood of the individual based on textual cues, ranging from happy to sad, calm to angry.", "toxicity_detection": "Identify and assess the level of toxicity in the provided text. Determine whether the text contains harmful, offensive, or inappropriate content and provide a score indicating the severity of the toxicity", # GDPR-related types "Consent_and_Transparency": "Evaluate how consent is obtained and the level of transparency provided to users regarding data usage.", "Data_Security": "Assess the measures in place for data security, including vulnerabilities and compliance with security standards.", "Privacy_Assessment": "Analyze the overall privacy practices, including policy compliance, data minimization, and user data accessibility.", "Environmental_Impact": "Assess the environmental impact of data processing practices, including carbon footprint and energy efficiency.", # Toxicity-related types "Content_Moderation": "Evaluate the effectiveness of content moderation practices, including automated and human moderation efforts.", "Reporting_Mechanism": "Assess the ease and effectiveness of reporting mechanisms for inappropriate or harmful content.", "Content_Guidelines": "Analyze the clarity and comprehensiveness of content guidelines and their enforcement consistency.", "User_Education": "Evaluate the availability and accessibility of educational resources for users regarding appropriate content and behavior.", # Legal-related types "Privacy_Policy": "Analyze the clarity and compliance of a privacy policy with legal standards.", "Data_Retention": "Evaluate the data retention practices, including periods, deletion policies, and legal compliance.", "Consent_Mechanism": "Assess the clarity and effectiveness of the consent mechanism in place for data collection and usage.", "GDPR_Compliance": "Evaluate the level of GDPR compliance in data handling, protection measures, and breach notification protocols.", # Context-related types "Ethical_AI": "Assess adherence to ethical standards in AI practices, including identification and mitigation of ethical issues.", "Bias_Mitigation": "Evaluate the presence and mitigation of bias in data or algorithms.", "Fairness_Assessment": "Assess fairness in AI systems, identifying affected groups and providing recommendations for improvement.", "Explainability": "Evaluate the transparency and explainability of AI models to users.", # Governance-related types "Model_Development": "Analyze the process of model development, including team composition and ethical considerations.", "Data_Quality": "Assess the quality of data used, focusing on accuracy, completeness, and timeliness.", "User_Input": "Evaluate the mechanisms for and impact of user feedback on the system.", # Risk Management-related types "Corporate_Ethics": "Assess the ethical practices within a corporation, including employee training and ethics code adherence.", "Board_Management": "Evaluate the effectiveness and diversity of board management and its compliance with ethical standards.", "Stakeholder_Engagement": "Analyze stakeholder engagement practices, including inclusion, feedback mechanisms, and satisfaction.", "Risk_Management": "Assess the identification, mitigation, and monitoring of risks within an organization.", # Robustness-related types "System_Reliability": "Evaluate the reliability and resilience of a system, including uptime and redundancy measures.", "Quality_Assurance": "Assess the quality assurance practices, including compliance with standards and testing frequency.", "Stress_Testing": "Analyze the system's robustness through stress testing and identify weaknesses.", "Fail_Safe_Procedures": "Evaluate the effectiveness of fail-safe procedures in place for system failures.", # Sustainability-related types "Renewable_Resources": "Assess the use of renewable resources and sustainability goals in operations.", "Waste_Reduction": "Evaluate waste management practices, reduction rates, and recycling initiatives.", "Energy_Efficiency": "Analyze energy consumption and efficiency, including energy-saving measures and audits.", "Sustainable_Practices": "Evaluate the adoption of sustainable practices, including training and overall impact." } JSON_SCHEMAS = { "sentiment_analysis": { "sentiment": "string (positive, negative, neutral)", "confidence_score": "number (0-1)", "text_snippets": "array of strings (specific text portions contributing to sentiment)" }, "emotion_detection": { "emotion": "string (primary emotion detected)", "confidence_score": "number (0-1)", "secondary_emotions": "array of objects (secondary emotions and their scores)" }, "political_bias_detection": { "bias": "string (left, right, neutral)", "confidence_score": "number (0-1)", "bias_indicators": "array of strings (elements indicating bias)", "political_alignment_score": "number (quantifying degree of political bias)" }, "stress_level_detection": { "stress_level": "string", "stress_triggers": "array of strings" }, "empathy_level_assessment": { "empathy_level": "string", "empathetic_responses": "array of strings" }, "mood_detection": { "mood": "string", "mood_intensity": "number" }, "toxicity_detection": { "toxicity_level": "string (none, low, medium, high)", "toxicity_flags": "array of strings (specific words or phrases contributing to toxicity)", "contextual_factors": "array of objects (additional contextual elements influencing toxicity interpretation)" }, # GDPR-related schemas "Consent_and_Transparency": { "consent_obtained": "boolean", "transparency_level": "string (low, medium, high)", "missing_information": "array of strings (information not clearly presented or missing)", "user_understanding": "string (poor, average, good)" }, "Data_Security": { "security_status": "string (secure, at risk, breached)", "vulnerability_points": "array of strings (specific areas of potential vulnerability)", "data_encryption": "boolean", "compliance_status": "string (compliant, partially compliant, non-compliant)" }, "Environmental_Impact": { "carbon_footprint": "number (metric tons of CO2 equivalent)", "energy_efficiency": "string (low, moderate, high)", "sustainable_practices": "boolean", "environmental_impact_score": "number (0-100)" }, "Privacy_Assessment": { "overall_privacy_status": "string (positive, negative)" , "privacy_policy_compliance": "string (compliant, partially compliant, non-compliant)", "data_minimization": "boolean", "user_data_accessibility": "string (none, limited, full)", "anonymization": "boolean" }, # Toxicity-related schemas "Content_Moderation": { "moderation_effectiveness": "string (low, medium, high)", "moderated_content_types": "array of strings (types of content being moderated)", "automated_moderation": "boolean", "human_moderation": "boolean" }, "Reporting_Mechanism": { "reporting_ease": "string (easy, moderate, difficult)", "response_time": "string (fast, average, slow)", "report_feedback": "string (detailed, minimal, none)" }, "Content_Guidelines": { "clarity": "string (clear, somewhat clear, unclear)", "comprehensiveness": "string (comprehensive, partial, lacking)", "enforcement_consistency": "string (consistent, inconsistent)" }, "User_Education": { "educational_resources_available": "boolean", "resource_accessibility": "string (easy, moderate, difficult)", "user_comprehension_level": "string (high, medium, low)" }, # Legal-related schemas "Privacy_Policy": { "clarity": "string (clear, somewhat clear, unclear)", "compliance": "string (compliant, partially compliant, non-compliant)", "user_rights": "array of strings (specific rights mentioned in policy)" }, "Consent_Mechanism": { "mechanism_clarity": "string (clear, somewhat clear, unclear)", "user_control": "boolean", "opt_in_out": "string (opt-in, opt-out, not applicable)" }, "GDPR_Compliance": { "compliance_level": "string (fully compliant, partially compliant, non-compliant)", "data_protection_officer": "boolean", "breach_notification": "boolean" }, # Context-related schemas "Ethical_AI": { "ethical_standards_adherence": "string (high, medium, low)", "ethical_issues_identified": "array of strings", "mitigation_measures": "array of strings" }, "Bias_Mitigation": { "bias_identified": "boolean", "bias_types": "array of strings", "mitigation_strategies": "array of strings" }, "Fairness_Assessment": { "fairness_level": "string (high, medium, low)", "affected_groups": "array of strings", "improvement_recommendations": "array of strings" }, "Explainability": { "model_transparency": "string (transparent, opaque)", "explanation_comprehensibility": "string (high, medium, low)", "user_friendly_explanations": "boolean" }, # Governance-related schemas "Model_Development": { "development_process": "string (structured, ad-hoc, undefined)", "team_composition": "array of strings (roles involved)", "ethics_considerations": "boolean" }, "Data_Quality": { "accuracy_level": "string (high, medium, low)", "completeness": "string (complete, partial, incomplete)", "timeliness": "string (up-to-date, outdated)" }, "User_Input": { "user_feedback_mechanism": "boolean", "feedback_responsiveness": "string (responsive, moderately responsive, unresponsive)", "user_input_impact": "string (high, medium, low)" }, # Risk Management-related schemas "Corporate_Ethics": { "ethics_code": "string (exists, partial, none)", "employee_training": "boolean", "ethics_violations": "array of strings" }, "Board_Management": { "board_structure": "string (effective, average, ineffective)", "board_diversity": "boolean", "board_ethics_compliance": "string (compliant, non-compliant)" }, "Stakeholder_Engagement": { "stakeholder_inclusion": "string (inclusive, partially inclusive, exclusive)", "feedback_mechanism": "boolean", "stakeholder_satisfaction": "string (high, medium, low)" }, "Risk_Management": { "risk_identification": "boolean", "risk_mitigation_strategies": "array of strings", "risk_monitoring": "boolean" }, # Robustness-related schemas "System_Reliability": { "uptime_percentage": "number (0-100)", "system_resilience": "string (high, medium, low)", "redundancy_measures": "boolean" }, "Quality_Assurance": { "quality_standards": "array of strings", "testing_frequency": "string (frequent, occasional, rare)", "quality_assurance_compliance": "string (compliant, partially compliant, non-compliant)" }, "Stress_Testing": { "stress_test_pass_rate": "number (0-100)", "identified_weaknesses": "array of strings", "improvement_actions": "array of strings" }, "Fail_Safe_Procedures": { "procedures_defined": "boolean", "execution_frequency": "string (regular, occasional, never)", "effectiveness": "string (effective, partially effective, ineffective)" }, # Sustainability-related schemas "Renewable_Resources": { "resource_usage": "string (high, moderate, low)", "renewable_resource_percentage": "number (0-100)", "sustainability_goals": "boolean" }, "Waste_Reduction": { "waste_management_practices": "string (effective, average, poor)", "reduction_rate": "number (0-100)", "recycling_initiatives": "boolean" }, "Energy_Efficiency": { "energy_consumption": "string (high, moderate, low)", "energy_saving_measures": "array of strings", "energy_audit": "boolean" }, "Sustainable_Practices": { "practice_adoption": "string (widespread, partial, none)", "sustainability_training": "boolean", "sustainability_impact": "string (high, medium, low)" } }