Spaces:

openpecha
/

translation_term_analyser

Sleeping

File size: 8,444 Bytes

20dc456

import json
import os
from datetime import datetime
from pathlib import Path

import gradio as gr

# Define available terms
AVAILABLE_TERMS = ["བྱང་ཆུབ་སེམས་", "ཆོས་", "དགེ་བ་", "སངས་རྒྱས་", "སྡིག་པ་", "སྡིག་པ་"]

def load_context(term: str) -> str:
    """Load context from JSON file for the given term"""
    try:
        context_path = Path(__file__) / "terms_context" / f"{term}.json"
        return json.load(context_path.open("r"))
    except Exception as e:
        return f"Error loading context: {str(e)}"

def process_term(tibetan_term: str, api_key: str) -> dict:
    """Process a Buddhist term and return analysis results"""
    if not api_key:
        return {"error": "Please provide an Anthropic API key"}

    # Set the API key for the session
    os.environ["ANTHROPIC_API_KEY"] = api_key

    context = load_context(tibetan_term)
    try:
        from term_standarization import TermStandardizationAgent
        agent = TermStandardizationAgent()
        results = agent.select_best_terms(tibetan_term, context)

        # Save results
        date_time = datetime.now().strftime("%Y%m%d%H%M%S")
        results_path = Path("results")
        results_path.mkdir(exist_ok=True, parents=True)
        result_fn = results_path / f"{tibetan_term}_{date_time}.json"
        json.dump(results, result_fn.open("w", encoding='utf-8'), indent=2, ensure_ascii=False)

        return results
    except Exception as e:
        return {"error": str(e)}
    finally:
        # Clear the API key from environment after use
        os.environ.pop("ANTHROPIC_API_KEY", None)

def format_semantic_analysis(results: dict) -> str:
    """Format just the semantic analysis section"""
    output = []

    # Source Data section
    output.append("## Data Sources Used in Analysis")
    output.append("\nThis analysis is based on:")
    output.append("\n- Sanskrit parallel text")
    output.append("\n- Traditional Tibetan commentaries")
    output.append("\n- Existing English translations")

    # Sanskrit analysis
    output.append("## Sanskrit Analysis")
    sanskrit = results["analysis"]["sanskrit_analysis"]
    output.append(f"\n**Term:**")
    output.append(f"{sanskrit['term']}")

    output.append(f"\n**Morphology:**")
    output.append(f"{sanskrit['morphology']}")

    output.append(f"\n**Literal Meaning:**")
    output.append(f"{sanskrit['literal_meaning']}")

    output.append(f"\n**Technical Usage:**")
    output.append(f"{sanskrit['technical_usage']}")

    # Tibetan mapping
    output.append("\n## Tibetan Mapping")
    tibetan = results["analysis"]["tibetan_mapping"]
    output.append(f"\n**Term:**")
    output.append(f"{tibetan['term']}")

    output.append(f"\n**Morphology:**")
    output.append(f"{tibetan['morphology']}")

    output.append(f"\n**Translation Strategy:**")
    output.append(f"{tibetan['translation_strategy']}")

    output.append(f"\n**Semantic Extension:**")
    output.append(f"{tibetan['semantic_extension']}")

    # Commentary insights
    output.append("\n## Commentary Insights")
    for commentary in results["analysis"]["commentary_insights"]:
        output.append(f"\n**Source:**")
        output.append(commentary['source'])
        output.append("\n**Explanation:**")
        output.append(commentary['explanation'])
        output.append("\n**Technical Points:**")
        for point in commentary['technical_points']:
            output.append(f"- {point}")

    return "\n".join(output)

def format_results(results: dict) -> tuple:
    """Format the results for display, returning separate sections"""
    if "error" in results:
        return f"Error: {results['error']}", ""

    recommendations = []
    evaluations = []

    # Add recommendations
    recommendations.append("# Recommended Translations")
    for audience, details in results["recommendations"].items():
        recommendations.append(f"\n## {audience}")
        recommendations.append("**Term:**")
        recommendations.append(details['term'])
        recommendations.append("\n**Reasoning:**")
        recommendations.append(details['reasoning'])

    # Evaluation scores
    evaluations.append("# Evaluation Scores")

    # Create table header with more spacing
    evaluations.append("\n| Term &nbsp;&nbsp;&nbsp;&nbsp; | Technical Score &nbsp;&nbsp;&nbsp;&nbsp; | Cultural Score &nbsp;&nbsp;&nbsp;&nbsp; | Audience Score &nbsp;&nbsp;&nbsp;&nbsp; | Reasoning &nbsp;&nbsp;&nbsp;&nbsp; |")
    evaluations.append("|------------|------------------|-----------------|-----------------|-------------|")

    # Add table rows with spacing
    for term, scores in results["evaluations"].items():
        term_padded = f"{term} &nbsp;&nbsp;&nbsp;&nbsp;"
        tech_padded = f"{scores['technical_score']} &nbsp;&nbsp;&nbsp;&nbsp;"
        cultural_padded = f"{scores['cultural_score']} &nbsp;&nbsp;&nbsp;&nbsp;"
        audience_padded = f"{scores['audience_score']} &nbsp;&nbsp;&nbsp;&nbsp;"
        reasoning_padded = f"{scores['reasoning']} &nbsp;&nbsp;&nbsp;&nbsp;"

        evaluations.append(f"| {term_padded} | {tech_padded} | {cultural_padded} | {audience_padded} | {reasoning_padded} |")

    return (
        "\n".join(recommendations),
        format_semantic_analysis(results),
        "\n".join(evaluations)
    )

# Create the Gradio interface
with gr.Blocks(title="Buddhist Term Analyzer") as demo:
    gr.Markdown("# Buddhist Term Analyzer Agent")
    gr.Markdown("Select a Tibetan Buddhist term to analyze its standardized translations and get detailed analysis.")

    gr.Markdown("""
## Data Sources Used in Analysis
This analysis is based on:
- Sanskrit parallel text
- Traditional Tibetan commentaries
- Existing English translations
""")

    with gr.Row():
        with gr.Column():
            api_key = gr.Textbox(
                label="Anthropic API Key",
                placeholder="Enter your Anthropic API key",
                type="password"
            )
            tibetan_input = gr.Dropdown(
                choices=AVAILABLE_TERMS,
                label="Select Tibetan Term",
                value=AVAILABLE_TERMS[0] if AVAILABLE_TERMS else None
            )
            analyze_button = gr.Button("Analyze Term")

        with gr.Column():
            recommendations_output = gr.Markdown(label="Recommendations", visible=False)
            semantic_analysis_box = gr.Accordion("Semantic Analysis", open=False, visible=False)

            with semantic_analysis_box:
                semantic_analysis_output = gr.Markdown(label="Semantic Analysis")

            evaluations_output = gr.Markdown(label="Evaluations", visible=False)

    # Add loading configuration
    with gr.Row():
        with gr.Column(scale=1):
            status_text = gr.Markdown(
                "Ready",
                elem_classes="status-text"
            )

    # Add custom CSS for status text
    gr.Markdown("""
        <style>
            .status-text {
                min-height: 50px;
                font-size: 1.2em;
                margin: 15px 0;
                padding: 10px;
                background-color: #f6f6f6;
                border-radius: 8px;
                text-align: center;
            }
        </style>
    """)

    def process_with_status(term, key):
        try:
            status_text.value = "**Analysis in progress...**"
            results = process_term(term, key)
            recommendations, semantic_analysis, evaluations = format_results(results)
            status_text.value = "**Analysis complete!**"
            return [
                gr.update(value=recommendations, visible=True),
                gr.update(value=semantic_analysis),
                gr.update(value=evaluations, visible=True),
                "**Analysis complete!**",
                gr.update(visible=True)  # For semantic_analysis_box
            ]
        except Exception as e:
            error_msg = "**Error occurred during analysis**"
            return "", "", f"Error: {str(e)}", error_msg

    # Analyze when button is clicked
    analyze_button.click(
        fn=process_with_status,
        inputs=[tibetan_input, api_key],
        outputs=[
            recommendations_output,
            semantic_analysis_output,
            evaluations_output,
            status_text,
            semantic_analysis_box
        ]
    )

if __name__ == "__main__":
    demo.launch()