harshiv's picture
Update app.py
154b7a1
raw
history blame
2.13 kB
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import gradio as gr
# Load the CSV data into a pandas DataFrame
data = pd.read_csv('dataset.csv')
# Split the data into features (X) and labels (y)
X = data.iloc[:, :-1] # All columns except the last one
y = data.iloc[:, -1] # Last column (placed or not)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create a pipeline with a Random Forest Classifier
pipeline = Pipeline([
('scaler', StandardScaler()), # Standardize features
('classifier', RandomForestClassifier()) # Random Forest Classifier
])
# Fit the pipeline to the training data
pipeline.fit(X_train, y_train)
# Make predictions on the testing data
y_pred = pipeline.predict(X_test)
# Calculate accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
# Define the input and output types for Gradio
input_type = 'csv'
output_type = 'label'
# Define the function to make predictions using the trained model
def predict_placement(Internships, CGPA, HistoryOfBacklogs):
# Create a DataFrame from the input data
input_df = pd.DataFrame({'Internships': [Internships], 'CGPA': [CGPA], 'HistoryOfBacklogs': [HistoryOfBacklogs]})
# Make a prediction using the trained model
prediction = pipeline.predict(input_df)[0]
# Return the predicted label
return 'Placed' if prediction else 'Not Placed'
# Create the Gradio interface
iface = gr.Interface(fn=predict_placement,
inputs=input_type,
outputs=output_type,
title='Student Job Placement Predictor',
description='Predicts whether a student will be placed in a job or not based on internships, CGPA, and history of backlogs.')
# Launch the Gradio interface
iface.launch()