harshiv commited on
Commit
154b7a1
1 Parent(s): 293f224

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -36
app.py CHANGED
@@ -1,54 +1,39 @@
 
1
  import pandas as pd
2
- from flask import Flask, request, jsonify
3
-
4
- from sklearn.compose import ColumnTransformer
5
- from sklearn.ensemble import RandomForestClassifier
6
- from sklearn.impute import SimpleImputer
7
  from sklearn.model_selection import train_test_split
 
 
8
  from sklearn.pipeline import Pipeline
9
- from sklearn.preprocessing import LabelEncoder, StandardScaler
10
- from streamlit import *
11
- import joblib
12
-
13
 
14
- # Load the CSV data
15
  data = pd.read_csv('dataset.csv')
16
 
17
- # Split the data into features and labels
18
- X = data.drop('PlacedOrNot', axis=1)
19
- y = data['PlacedOrNot']
20
-
21
- # Encode categorical features
22
- categorical_features = ['HistoryOfBacklogs']
23
- for feature in categorical_features:
24
- encoder = LabelEncoder()
25
- X[feature] = encoder.fit_transform(X[feature])
26
 
27
  # Split the data into training and testing sets
28
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
29
 
30
- # Create the pipeline
31
- numerical_features = ['Internships', 'CGPA']
32
- numerical_transformer = StandardScaler()
33
- categorical_features = [ 'HistoryOfBacklogs']
34
- categorical_transformer = SimpleImputer(strategy='most_frequent')
35
- preprocessor = ColumnTransformer(
36
- transformers=[
37
- ('num', numerical_transformer, numerical_features),
38
- ('cat', categorical_transformer, categorical_features)
39
- ])
40
-
41
  pipeline = Pipeline([
42
- ('preprocessor', preprocessor),
43
- ('classifier', RandomForestClassifier(random_state=42))
44
  ])
45
 
46
- # Train the model
47
  pipeline.fit(X_train, y_train)
48
 
49
- # Evaluate the model
50
- accuracy = pipeline.score(X_test, y_test)
 
 
 
51
  print('Accuracy:', accuracy)
 
 
52
  input_type = 'csv'
53
  output_type = 'label'
54
 
@@ -71,4 +56,4 @@ iface = gr.Interface(fn=predict_placement,
71
  description='Predicts whether a student will be placed in a job or not based on internships, CGPA, and history of backlogs.')
72
 
73
  # Launch the Gradio interface
74
- iface.launch()
 
1
+ # Import necessary libraries
2
  import pandas as pd
 
 
 
 
 
3
  from sklearn.model_selection import train_test_split
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.metrics import accuracy_score
6
  from sklearn.pipeline import Pipeline
7
+ from sklearn.preprocessing import StandardScaler
8
+ import gradio as gr
 
 
9
 
10
+ # Load the CSV data into a pandas DataFrame
11
  data = pd.read_csv('dataset.csv')
12
 
13
+ # Split the data into features (X) and labels (y)
14
+ X = data.iloc[:, :-1] # All columns except the last one
15
+ y = data.iloc[:, -1] # Last column (placed or not)
 
 
 
 
 
 
16
 
17
  # Split the data into training and testing sets
18
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
19
 
20
+ # Create a pipeline with a Random Forest Classifier
 
 
 
 
 
 
 
 
 
 
21
  pipeline = Pipeline([
22
+ ('scaler', StandardScaler()), # Standardize features
23
+ ('classifier', RandomForestClassifier()) # Random Forest Classifier
24
  ])
25
 
26
+ # Fit the pipeline to the training data
27
  pipeline.fit(X_train, y_train)
28
 
29
+ # Make predictions on the testing data
30
+ y_pred = pipeline.predict(X_test)
31
+
32
+ # Calculate accuracy of the model
33
+ accuracy = accuracy_score(y_test, y_pred)
34
  print('Accuracy:', accuracy)
35
+
36
+ # Define the input and output types for Gradio
37
  input_type = 'csv'
38
  output_type = 'label'
39
 
 
56
  description='Predicts whether a student will be placed in a job or not based on internships, CGPA, and history of backlogs.')
57
 
58
  # Launch the Gradio interface
59
+ iface.launch()