neelnsoni13 commited on
Commit
a4c05e4
·
verified ·
1 Parent(s): 9dda8bc

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.h5 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Train.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics import accuracy_score
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import Dense, Dropout
8
+ from tensorflow.keras.models import save_model
9
+ from joblib import dump # To save the TF-IDF vectorizer
10
+
11
+ # 1. Read Data
12
+ data = pd.read_excel('gender.xlsx')
13
+
14
+ # 2. Preprocess Data
15
+ data['Gender'] = data['Gender'].map({'M': 1, 'F': 0})
16
+
17
+ # 3. Convert text data into numerical data using TF-IDF
18
+ tfidf = TfidfVectorizer(analyzer='char', ngram_range=(1, 3))
19
+ X = tfidf.fit_transform(data['Name']).toarray() # Convert names into numerical features
20
+ y = data['Gender'].values # Labels: 1 for Male, 0 for Female
21
+
22
+ # 4. Split the dataset into training and testing sets
23
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
24
+
25
+ # 5. Build the Neural Network Model
26
+ model = Sequential()
27
+ model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
28
+ model.add(Dropout(0.5)) # Add dropout to prevent overfitting
29
+ model.add(Dense(64, activation='relu'))
30
+ model.add(Dropout(0.5))
31
+ model.add(Dense(1, activation='sigmoid')) # Output layer with sigmoid for binary classification
32
+
33
+ # 6. Compile the model
34
+ model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
35
+
36
+ # 7. Train the model with epochs
37
+ model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)
38
+
39
+ # 8. Save the model after training
40
+ model.save('gender_prediction_model.h5')
41
+
42
+ # 9. Save the TF-IDF vectorizer
43
+ dump(tfidf, 'tfidf_vectorizer.joblib')
44
+
45
+ # 10. Evaluate the model
46
+ y_pred = (model.predict(X_test) > 0.5).astype("int32") # Convert probabilities to binary output
47
+ accuracy = accuracy_score(y_test, y_pred)
48
+ print(f"Model Accuracy: {accuracy * 100:.2f}%")
Trained.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ from tensorflow.keras.models import load_model
4
+ from joblib import load
5
+
6
+ # Function to predict gender based on a name
7
+ def predict_gender(name, model, tfidf):
8
+ vectorized_name = tfidf.transform([name]).toarray() # Transform name into feature vector
9
+ gender = model.predict(vectorized_name) > 0.5 # Get prediction
10
+ return 'Male' if gender[0][0] == 1 else 'Female'
11
+
12
+ # Load the pre-trained model
13
+ model = load_model('gender_prediction_model.h5')
14
+
15
+ # Check if the TF-IDF vectorizer file exists
16
+ tfidf_vectorizer_file = 'tfidf_vectorizer.joblib'
17
+ if not os.path.exists(tfidf_vectorizer_file):
18
+ raise FileNotFoundError(f"{tfidf_vectorizer_file} not found. Please ensure the file exists in the current directory.")
19
+
20
+ # Load the TF-IDF vectorizer
21
+ tfidf = load(tfidf_vectorizer_file)
22
+
23
+ # Main loop to take user input for predictions
24
+ while True:
25
+ name = input("Enter a name to predict gender (or type 'exit' to quit): ")
26
+ if name.lower() == 'exit':
27
+ break
28
+ predicted_gender = predict_gender(name, model, tfidf)
29
+ print(f"The predicted gender for '{name}' is: {predicted_gender}")
gender_prediction_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2cc4d6f2709ff0d838d5fb5694116cd806ba2ad2e714bc2ee8fce63dba34f03
3
+ size 15286728
tfidf_vectorizer.joblib ADDED
Binary file (169 kB). View file