import pandas as pd from flask import Flask, request, jsonify from sklearn.compose import ColumnTransformer from sklearn.ensemble import RandomForestClassifier from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import LabelEncoder, StandardScaler # Load the CSV data data = pd.read_csv('dataset.csv') # Split the data into features and labels X = data.drop('PlacedOrNot', axis=1) y = data['PlacedOrNot'] # Encode categorical features categorical_features = ['HistoryOfBacklogs'] for feature in categorical_features: encoder = LabelEncoder() X[feature] = encoder.fit_transform(X[feature]) # Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create the pipeline numerical_features = ['Internships', 'CGPA'] numerical_transformer = StandardScaler() categorical_features = [ 'HistoryOfBacklogs'] categorical_transformer = SimpleImputer(strategy='most_frequent') preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ]) pipeline = Pipeline([ ('preprocessor', preprocessor), ('classifier', RandomForestClassifier(random_state=42)) ]) # Train the model pipeline.fit(X_train, y_train) # Evaluate the model accuracy = pipeline.score(X_test, y_test) print('Accuracy:', accuracy) # Create Flask app app = Flask(__name__) # Define API route for making predictions @app.route('/predict', methods=['POST']) def predict(): # Get input data from request data = request.get_json() # Convert input data to dataframe input_data = pd.DataFrame(data, index=[0]) # Make predictions using the trained pipeline predictions = pipeline.predict(input_data) # Prepare response response = {'prediction': predictions[0]} return jsonify(response) # Run the Flask app if __name__ == '__main__': app.run(debug=True)