File size: 9,417 Bytes
c69a273
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":9715840,"sourceType":"datasetVersion","datasetId":5943727},{"sourceId":9739406,"sourceType":"datasetVersion","datasetId":5915042}],"dockerImageVersionId":30786,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import numpy as np\nimport os\nimport hashlib\nimport tensorflow as tf\nfrom PIL import Image\nfrom tensorflow.keras import layers, models\nfrom tensorflow.keras.preprocessing.image import load_img, img_to_array  # Correct import\nfrom tensorflow.keras.preprocessing import image\nimport numpy as np\nfrom tensorflow.keras.preprocessing.image import ImageDataGenerator\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.applications import EfficientNetB0\nfrom tensorflow.keras.layers import Dropout, Dense, GlobalAveragePooling2D\nfrom tensorflow.keras.models import Model\nfrom sklearn.metrics import confusion_matrix, classification_report\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score\nfrom os import listdir","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# Check for TPU\ntry:\n    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()\n    tf.config.experimental_connect_to_cluster(resolver)\n    tf.tpu.experimental.initialize_tpu_system(resolver)\n    strategy = tf.distribute.TPUStrategy(resolver)\nexcept ValueError:\n    strategy = tf.distribute.get_strategy()  # Default to CPU/GPU strategy if TPU is not found\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"\ndef augment_image(image, label):\n    image = tf.image.random_flip_left_right(image)  # Randomly flip images\n    image = tf.image.random_brightness(image, max_delta=0.1)  # Random brightness\n    return image, label\n\ntrain_data = tf.keras.utils.image_dataset_from_directory(\n    '/kaggle/input/ai-vs-real-dataset/_DATASET/dataset/train',\n    image_size=(224, 224),\n    batch_size=128,  # Adjust based on your TPU memory\n    shuffle=True\n)\n    \ntrain_data = train_data.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)\n    # Prefetch to improve performance\ntrain_data = train_data.prefetch(tf.data.AUTOTUNE)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"\nwith strategy.scope():\n    base_model = tf.keras.applications.EfficientNetB0(weights='/kaggle/input/efficientnetb0-weights/efficientnetb0_notop.h5', include_top=False, input_shape=(224, 224, 3))\n    \n    # Add custom classification layers\n    x = base_model.output\n    x = tf.keras.layers.GlobalAveragePooling2D()(x)\n    x = tf.keras.layers.Dense(128, activation='relu')(x)\n    x = tf.keras.layers.Dropout(0.5)(x)\n    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)\n\n    # Create the final model\n    model = tf.keras.Model(inputs=base_model.input, outputs=x)\n\n    # Compile the model\n    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n\n# Summary of the model\nmodel.summary()\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"with strategy.scope():\n    model.fit(train_data, epochs=20)\n    model.save('/kaggle/working/EfficientNet_pre_model.h5')","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# # Test images here\n\n# Load the test dataset\ntest_data = tf.keras.utils.image_dataset_from_directory(\n    '/kaggle/input/ai-vs-real-dataset/_DATASET/dataset/test',  # Your test directory\n    image_size=(224, 224),\n    batch_size=64,\n    shuffle=True  # Important to keep the order for predictions\n)\n\n\n# Extract true labels from the dataset\ny_true = np.concatenate([y.numpy() for x, y in test_data], axis=0)\nclass_names = ['FAKE', 'REAL']\n\n# Make predictions\nwith strategy.scope():\n    predictions = model.predict(test_data)\n\n# Convert predictions to binary labels\ny_pred = (predictions > 0.5).astype(\"int32\").flatten()  # Flatten to match shape of y_true\n\n# Create confusion matrix\ncm = confusion_matrix(y_true, y_pred)\n\nprint(\"Confusion Matrix:\")\nprint(cm)\n\nprint(\"Classification Report:\")\nprint(classification_report(y_true, y_pred, target_names=class_names))\n\n# Calculate metrics\naccuracy = accuracy_score(y_true, y_pred)\nprecision = precision_score(y_true, y_pred)\nrecall = recall_score(y_true, y_pred)\nf1 = f1_score(y_true, y_pred)\n\nprint(f\"Accuracy: {accuracy}\")\nprint(f\"Precision: {precision}\")\nprint(f\"Recall: {recall}\")\nprint(f\"F1 Score: {f1}\")\n\n# Plot confusion matrix for better visualization\nplt.figure(figsize=(6, 6))\nsns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\", xticklabels=class_names, yticklabels=class_names)\nplt.title('Confusion Matrix')\nplt.ylabel('True Label')\nplt.xlabel('Predicted Label')\nplt.show()","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"# Unfreeze top layers for fine-tuning\nwith strategy.scope():\n    base_model.trainable = True\n\n# Fine-tune from this layer onwards (adjust as needed)\n    for layer in base_model.layers[:150]:\n        layer.trainable = False\n\n# Re-compile the model with a lower learning rate\n    model.compile(optimizer=tf.keras.optimizers.Adam(1e-5), \n                  loss='binary_crossentropy', \n                  metrics=['accuracy'])\n\n# Train again\n\n    history_fine = model.fit(\n        train_data,\n        epochs=10  # Fine-tuning for a few additional epochs\n    )\n    model.save('/kaggle/working/EfficientNet_fine_tune_model.h5')\n\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"\ntest_data = tf.keras.utils.image_dataset_from_directory(\n    '/kaggle/input/ai-vs-real-dataset/_DATASET/dataset/test', \n    image_size=(224, 224),\n    batch_size=64,\n    shuffle=True  \n)\n\n# Extract true labels from the dataset\ny_true = np.concatenate([y.numpy() for x, y in test_data], axis=0)\nclass_names = ['FAKE', 'REAL']\n\n# Make predictions\nwith strategy.scope():\n    predictions = model.predict(test_data)\n\n# Convert predictions to binary labels\ny_pred = (predictions > 0.5).astype(\"int32\").flatten()  # Flatten to match shape of y_true\n\n# Create confusion matrix\ncm = confusion_matrix(y_true, y_pred)\n\nprint(\"Confusion Matrix:\")\nprint(cm)\n\nprint(\"Classification Report:\")\nprint(classification_report(y_true, y_pred, target_names=class_names))\n\n# Calculate metrics\naccuracy = accuracy_score(y_true, y_pred)\nprecision = precision_score(y_true, y_pred)\nrecall = recall_score(y_true, y_pred)\nf1 = f1_score(y_true, y_pred)\n\nprint(f\"Accuracy: {accuracy}\")\nprint(f\"Precision: {precision}\")\nprint(f\"Recall: {recall}\")\nprint(f\"F1 Score: {f1}\")\n\n# Plot confusion matrix for better visualization\nplt.figure(figsize=(6, 6))\nsns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\", xticklabels=class_names, yticklabels=class_names)\nplt.title('Confusion Matrix')\nplt.ylabel('True Label')\nplt.xlabel('Predicted Label')\nplt.show()","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"model = tf.keras.models.load_model('/kaggle/working/EfficientNet_fine_tune_model.h5')\naccuracy = model.evaluate(test_data)\nprint(f\"Test Accuracy: {accuracy[1]}\")\n","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#for deployment\ndef predictPicture(imagePath):\n    # model=tf.keras.models.load_model('//kaggle/input/efficientnetmodel-5/EfficientNet_model_5/saved_model.pb')\n    if imagePath.endswith((\".png\", \".jpg\", \".jpeg\")):\n        # Load and preprocess the image\n        img = load_img(imagePath, target_size=(224, 224))  # Resize image to model input size\n        img_arr = img_to_array(img)  # Convert to array\n        img_arr = np.expand_dims(img_arr, axis=0) # Add batch dimension\n    \n        with strategy.scope():\n            prediction = model.predict(img_arr)\n        \n        # Output the predicted probability\n        print(prediction[0])\n        \n        # Output the label based on probability\n        if prediction[0] > 0.5:\n            print(\"Prediction: Real\")\n        else:\n            print(\"Prediction: Fake\")\n    else:\n        print(\"Please provide a valid image file (PNG or JPG format).\")\n# get the path or directory\nfake_test = \"/kaggle/input/ai-vs-real-dataset/_DATASET/dataset/test/FAKE\"\ni=0\nfor images in os.listdir(fake_test):\n    file_path=fake_test+\"/\"+images\n    if i%200==0:\n        predictPicture(file_path)\n    i+=1\ni=0\nprint(\"CHANGE\")\nreal_test = \"/kaggle/input/ai-vs-real-dataset/_DATASET/dataset/test/REAL\"\nfor images in os.listdir(real_test):\n    file_path=real_test+\"/\"+images\n    if i%200==0:\n        predictPicture(file_path)    \n    i+=1","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}