import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import gradio as gr

# -------------------------------
# 1. Load and Preprocess Data
# -------------------------------
file_path = "path_to_your_csv_file.csv"  # Replace with your actual file path
df = pd.read_csv(file_path)

# Handle Categorical Columns
label_encoders = {}
for col in ['Seed_Variety', 'Irrigation_Schedule']:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

# Normalize Numerical Columns
scaler = StandardScaler()
numerical_cols = ['Soil_Quality', 'Fertilizer_Amount_kg_per_hectare', 'Sunny_Days', 'Rainfall_mm']
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

# Split Dataset
X = df.drop(columns=['Yield_kg_per_hectare'])
y = df['Yield_kg_per_hectare']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------------------
# 2. Train Model
# -------------------------------
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# -------------------------------
# 3. Prediction Function
# -------------------------------
def predict_yield(soil_quality, seed_variety, fertilizer_amount, sunny_days, rainfall, irrigation_schedule):
    # Preprocess Inputs
    input_data = pd.DataFrame({
        'Soil_Quality': [soil_quality],
        'Seed_Variety': [label_encoders['Seed_Variety'].transform([seed_variety])[0]],
        'Fertilizer_Amount_kg_per_hectare': [fertilizer_amount],
        'Sunny_Days': [sunny_days],
        'Rainfall_mm': [rainfall],
        'Irrigation_Schedule': [label_encoders['Irrigation_Schedule'].transform([irrigation_schedule])[0]],
    })
    input_data[numerical_cols] = scaler.transform(input_data[numerical_cols])
    
    # Prediction
    predicted_yield = model.predict(input_data)[0]
    
    # Insights (Static Example)
    insight = (
        f"To optimize yield, maintain fertilizer levels around {fertilizer_amount * 1.1:.2f} kg/hectare "
        f"and ensure consistent irrigation on {irrigation_schedule} schedule."
    )
    
    return f"""
- **Predicted Yield:** {predicted_yield:.2f} kg/hectare  
- **Optimal Fertilizer Usage:** {fertilizer_amount * 1.1:.2f} kg/hectare  
- **Insight:** {insight}
"""

# -------------------------------
# 4. User Interface (Gradio)
# -------------------------------
interface = gr.Interface(
    fn=predict_yield,
    inputs=[
        gr.Number(label="Soil Quality (0-1 normalized)"),
        gr.Textbox(label="Seed Variety"),
        gr.Number(label="Fertilizer Amount (kg/hectare)"),
        gr.Number(label="Sunny Days"),
        gr.Number(label="Rainfall (mm)"),
        gr.Textbox(label="Irrigation Schedule"),
    ],
    outputs="text",
    title="Crop Yield Prediction App",
    description="Enter crop parameters to predict yield and get professional agricultural insights."
)

# Launch App
if __name__ == "__main__":
    interface.launch()