import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from transformers import AutoTokenizer, AutoModelForSequenceClassification import gradio as gr # ------------------------------- # 1. Load and Preprocess Data # ------------------------------- file_path = "path_to_your_csv_file.csv" # Replace with your actual file path df = pd.read_csv(file_path) # Handle Categorical Columns label_encoders = {} for col in ['Seed_Variety', 'Irrigation_Schedule']: label_encoders[col] = LabelEncoder() df[col] = label_encoders[col].fit_transform(df[col]) # Normalize Numerical Columns scaler = StandardScaler() numerical_cols = ['Soil_Quality', 'Fertilizer_Amount_kg_per_hectare', 'Sunny_Days', 'Rainfall_mm'] df[numerical_cols] = scaler.fit_transform(df[numerical_cols]) # Split Dataset X = df.drop(columns=['Yield_kg_per_hectare']) y = df['Yield_kg_per_hectare'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # ------------------------------- # 2. Train Model # ------------------------------- model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # ------------------------------- # 3. Prediction Function # ------------------------------- def predict_yield(soil_quality, seed_variety, fertilizer_amount, sunny_days, rainfall, irrigation_schedule): # Preprocess Inputs input_data = pd.DataFrame({ 'Soil_Quality': [soil_quality], 'Seed_Variety': [label_encoders['Seed_Variety'].transform([seed_variety])[0]], 'Fertilizer_Amount_kg_per_hectare': [fertilizer_amount], 'Sunny_Days': [sunny_days], 'Rainfall_mm': [rainfall], 'Irrigation_Schedule': [label_encoders['Irrigation_Schedule'].transform([irrigation_schedule])[0]], }) input_data[numerical_cols] = scaler.transform(input_data[numerical_cols]) # Prediction predicted_yield = model.predict(input_data)[0] # Insights (Static Example) insight = ( f"To optimize yield, maintain fertilizer levels around {fertilizer_amount * 1.1:.2f} kg/hectare " f"and ensure consistent irrigation on {irrigation_schedule} schedule." ) return f""" - **Predicted Yield:** {predicted_yield:.2f} kg/hectare - **Optimal Fertilizer Usage:** {fertilizer_amount * 1.1:.2f} kg/hectare - **Insight:** {insight} """ # ------------------------------- # 4. User Interface (Gradio) # ------------------------------- interface = gr.Interface( fn=predict_yield, inputs=[ gr.Number(label="Soil Quality (0-1 normalized)"), gr.Textbox(label="Seed Variety"), gr.Number(label="Fertilizer Amount (kg/hectare)"), gr.Number(label="Sunny Days"), gr.Number(label="Rainfall (mm)"), gr.Textbox(label="Irrigation Schedule"), ], outputs="text", title="Crop Yield Prediction App", description="Enter crop parameters to predict yield and get professional agricultural insights." ) # Launch App if __name__ == "__main__": interface.launch()