Upload 5 files
Browse files- .gitattributes +1 -35
- Train.py +48 -0
- Trained.py +29 -0
- gender_prediction_model.h5 +3 -0
- tfidf_vectorizer.joblib +0 -0
.gitattributes
CHANGED
@@ -1,35 +1 @@
|
|
1 |
-
*.
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Train.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from sklearn.model_selection import train_test_split
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.metrics import accuracy_score
|
6 |
+
from tensorflow.keras.models import Sequential
|
7 |
+
from tensorflow.keras.layers import Dense, Dropout
|
8 |
+
from tensorflow.keras.models import save_model
|
9 |
+
from joblib import dump # To save the TF-IDF vectorizer
|
10 |
+
|
11 |
+
# 1. Read Data
|
12 |
+
data = pd.read_excel('gender.xlsx')
|
13 |
+
|
14 |
+
# 2. Preprocess Data
|
15 |
+
data['Gender'] = data['Gender'].map({'M': 1, 'F': 0})
|
16 |
+
|
17 |
+
# 3. Convert text data into numerical data using TF-IDF
|
18 |
+
tfidf = TfidfVectorizer(analyzer='char', ngram_range=(1, 3))
|
19 |
+
X = tfidf.fit_transform(data['Name']).toarray() # Convert names into numerical features
|
20 |
+
y = data['Gender'].values # Labels: 1 for Male, 0 for Female
|
21 |
+
|
22 |
+
# 4. Split the dataset into training and testing sets
|
23 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
24 |
+
|
25 |
+
# 5. Build the Neural Network Model
|
26 |
+
model = Sequential()
|
27 |
+
model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
|
28 |
+
model.add(Dropout(0.5)) # Add dropout to prevent overfitting
|
29 |
+
model.add(Dense(64, activation='relu'))
|
30 |
+
model.add(Dropout(0.5))
|
31 |
+
model.add(Dense(1, activation='sigmoid')) # Output layer with sigmoid for binary classification
|
32 |
+
|
33 |
+
# 6. Compile the model
|
34 |
+
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
|
35 |
+
|
36 |
+
# 7. Train the model with epochs
|
37 |
+
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)
|
38 |
+
|
39 |
+
# 8. Save the model after training
|
40 |
+
model.save('gender_prediction_model.h5')
|
41 |
+
|
42 |
+
# 9. Save the TF-IDF vectorizer
|
43 |
+
dump(tfidf, 'tfidf_vectorizer.joblib')
|
44 |
+
|
45 |
+
# 10. Evaluate the model
|
46 |
+
y_pred = (model.predict(X_test) > 0.5).astype("int32") # Convert probabilities to binary output
|
47 |
+
accuracy = accuracy_score(y_test, y_pred)
|
48 |
+
print(f"Model Accuracy: {accuracy * 100:.2f}%")
|
Trained.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import os
|
3 |
+
from tensorflow.keras.models import load_model
|
4 |
+
from joblib import load
|
5 |
+
|
6 |
+
# Function to predict gender based on a name
|
7 |
+
def predict_gender(name, model, tfidf):
|
8 |
+
vectorized_name = tfidf.transform([name]).toarray() # Transform name into feature vector
|
9 |
+
gender = model.predict(vectorized_name) > 0.5 # Get prediction
|
10 |
+
return 'Male' if gender[0][0] == 1 else 'Female'
|
11 |
+
|
12 |
+
# Load the pre-trained model
|
13 |
+
model = load_model('gender_prediction_model.h5')
|
14 |
+
|
15 |
+
# Check if the TF-IDF vectorizer file exists
|
16 |
+
tfidf_vectorizer_file = 'tfidf_vectorizer.joblib'
|
17 |
+
if not os.path.exists(tfidf_vectorizer_file):
|
18 |
+
raise FileNotFoundError(f"{tfidf_vectorizer_file} not found. Please ensure the file exists in the current directory.")
|
19 |
+
|
20 |
+
# Load the TF-IDF vectorizer
|
21 |
+
tfidf = load(tfidf_vectorizer_file)
|
22 |
+
|
23 |
+
# Main loop to take user input for predictions
|
24 |
+
while True:
|
25 |
+
name = input("Enter a name to predict gender (or type 'exit' to quit): ")
|
26 |
+
if name.lower() == 'exit':
|
27 |
+
break
|
28 |
+
predicted_gender = predict_gender(name, model, tfidf)
|
29 |
+
print(f"The predicted gender for '{name}' is: {predicted_gender}")
|
gender_prediction_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2cc4d6f2709ff0d838d5fb5694116cd806ba2ad2e714bc2ee8fce63dba34f03
|
3 |
+
size 15286728
|
tfidf_vectorizer.joblib
ADDED
Binary file (169 kB). View file
|
|