# Mounting the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Importing libraries required to load the data
import zipfile

import os

from PIL import Image

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler

# To ignore warnings
import warnings

warnings.filterwarnings('ignore')

# Remove the limit from the number of displayed columns and rows. It helps to see the entire dataframe while printing it
pd.set_option("display.max_columns", None)

pd.set_option("display.max_rows", 200)


# Storing the path of the data file from the Google drive
path = '/content/drive/MyDrive/Capstone_Project/cell_images.zip'

# The data is provided as a zip file so we need to extract the files from the zip file
with zipfile.ZipFile(path, 'r') as zip_ref:

    zip_ref.extractall()


# Storing the path of the extracted "train" folder 
train_dir = '/content/cell_images/train'

# Size of image so that each image has the same size
SIZE = 64

# Empty list to store the training images after they are converted to NumPy arrays
train_images = []

# Empty list to store the training labels (0 - uninfected, 1 - parasitized)
train_labels = []


# We will run the same code for "parasitized" as well as "uninfected" folders within the "train" folder
for folder_name in ['/parasitized/', '/uninfected/']:
    
    # Path of the folder
    images_path = os.listdir(train_dir + folder_name)

    for i, image_name in enumerate(images_path):
    
        try:
    
            # Opening each image using the path of that image
            image = Image.open(train_dir + folder_name + image_name)

            # Resizing each image to (64, 64)
            image = image.resize((SIZE, SIZE))

            # Converting images to arrays and appending that array to the empty list defined above
            train_images.append(np.array(image))

            # Creating labels for parasitized and uninfected images
            if folder_name == '/parasitized/':
            
                train_labels.append(1)
           
            else:
           
                train_labels.append(0)
        
        except Exception:
       
            pass       

# Converting lists to arrays
train_images = np.array(train_images)

train_labels = np.array(train_labels)


# Storing the path of the extracted "test" folder 
test_dir = '/content/cell_images/test'

# Size of image so that each image has the same size (it must be same as the train image size)
SIZE = 64

# Empty list to store the testing images after they are converted to NumPy arrays
test_images = []

# Empty list to store the testing labels (0 - uninfected, 1 - parasitized)
test_labels = []


# We will run the same code for "parasitized" as well as "uninfected" folders within the "test" folder
for folder_name in ['/parasitized/', '/uninfected/']:
    
    # Path of the folder
    images_path = os.listdir(test_dir + folder_name)

    for i, image_name in enumerate(images_path):

        try:
            # Opening each image using the path of that image
            image = Image.open(test_dir + folder_name + image_name)
            
            # Resizing each image to (64, 64)
            image = image.resize((SIZE, SIZE))
            
            # Converting images to arrays and appending that array to the empty list defined above
            test_images.append(np.array(image))
            
            # Creating labels for parasitized and uninfected images
            if folder_name == '/parasitized/':

                test_labels.append(1)

            else:

                test_labels.append(0)

        except Exception:

            pass       

# Converting lists to arrays
test_images = np.array(test_images)

test_labels = np.array(test_labels)


# Shape of images
print("Shape of first train image:", train_images[0].shape)

print()

print("Shape of first test image:", test_images[0].shape)

Shape of first train image: (64, 64, 3)

Shape of first test image: (64, 64, 3)


# Shape of labels 
print("Shape of train labels:", train_labels.shape)

print()

print("Shape of test labels:", test_labels.shape)

Shape of train labels: (24958,)

Shape of test labels: (2600,)


# Try to use min and max function from numpy

print("Pixel range for train images:", np.min(train_images), "-", np.max(train_images))
 
print()

print("Pixel range for test images:", np.min(test_images), "-", np.max(test_images))

Pixel range for train images: 0 - 255

Pixel range for test images: 0 - 255


# Try to use value_counts to count the values

malaria_train = pd.Series(train_labels)
train_para = malaria_train.value_counts()[1]
train_uninf = malaria_train.value_counts()[0]
print("In train set")
print("Number of parasitized:", train_para)
print("Number of uninfected:",train_uninf, '\n')

malaria_test = pd.Series(test_labels)
test_para = malaria_test.value_counts()[1]
test_uninf = malaria_test.value_counts()[0]
print("In test set")
print("Number of parasitized:", test_para)
print("Number of uninfected:", test_uninf)

In train set
Number of parasitized: 12582
Number of uninfected: 12376 

In test set
Number of parasitized: 1300
Number of uninfected: 1300


# Try to normalize the train and test images by dividing it by 255 and convert them to float32 using astype function
train_images = (train_images/255).astype('float32')

test_images = (test_images/255).astype('float32')


# You are free to use bar plot or pie-plot or count plot, etc. to plot the labels of train and test data and check if they are balanced
fig = plt.figure()
ax = fig.add_axes([0,0,0.5,1])
count = [train_para, train_uninf]
infection = ['Parasitized', 'Uninfected']

x_pos= [0, 0.5]
plt.bar(x_pos,count, width=0.3, color=['r','b'])

plt.yticks(np.arange(0, 14000, 1000))
plt.text(x=-0.05, y=train_para +50, s = train_para)
plt.text(x=0.45, y=train_uninf +50, s = train_uninf)
plt.xticks(x_pos, infection)

ax.set_ylabel('Count')
ax.set_title('Malaria in Train Data')

plt.show()

print()

fig = plt.figure()
ax = fig.add_axes([0,0,0.5,1])
count = [test_para, test_uninf]
infection = ['Parasitized', 'Uninfected']

x_pos= [0, 0.5]
plt.bar(x_pos,count, width=0.3, color=['r','b'])

plt.yticks(np.arange(0, 14000, 1000))
plt.text(x=-0.05, y=test_para +50, s = test_para)
plt.text(x=0.45, y=test_uninf +50, s = test_uninf)
plt.xticks(x_pos, infection)

ax.set_ylabel('Count')
ax.set_title('Malaria in Test Data')

plt.show()


# This code will help you in visualizing both the parasitized and uninfected images
np.random.seed(42)

plt.figure(1, figsize = (16 , 16))

for n in range(1, 17):

    plt.subplot(4, 4, n)

    index = int(np.random.randint(0, train_images.shape[0], 1))

    if train_labels[index] == 1: 

        plt.title('parasitized')

    else:
        plt.title('uninfected')

    plt.imshow(train_images[index])

    plt.axis('off')


# Hint: Have a keen look into the number of iterations that the for loop should iterate

np.random.seed(42)

plt.figure(1, figsize = (12 , 12))

for n in range(1, 37):

    plt.subplot(6, 6, n)

    index = int(np.random.randint(0, train_images.shape[0], 1))

    if train_labels[index] == 1: 

        plt.title('parasitized')

    else:
        plt.title('uninfected')

    plt.imshow(train_images[index])

    plt.axis('off')


# Function to find the mean
def find_mean_img(full_mat, title):

    # Calculate the average
    mean_img = np.mean(full_mat, axis = 0)[0]

    # Reshape it back to a matrix
    plt.imshow(mean_img)

    plt.title(f'Average {title}')

    plt.axis('off')

    plt.show()

    return mean_img


# If the label = 1 then the image is parasitised and if the label = 0 then the image is uninfected
parasitized_data = []  # Create a list to store the parasitized data

for img, label in zip(train_images, train_labels):

        if label == 1:
              
              parasitized_data.append([img])          

parasitized_mean = find_mean_img(np.array(parasitized_data), 'Parasitized')   # find the mean


# Similarly write the code to find the mean image of uninfected
# If the label = 1 then the image is parasitised and if the label = 0 then the image is uninfected
uninfected_data = []  # Create a list to store the uninfected data

for img, label in zip(train_images, train_labels):

        if label == 0:
              
              uninfected_data.append([img])          

uninfected_mean = find_mean_img(np.array(uninfected_data), 'Uninfected')   # find the mean


import cv2

gfx=[]   # to hold the HSV image array

for i in np.arange(0, 100, 1):

  a = cv2.cvtColor(train_images[i], cv2.COLOR_BGR2HSV)
  
  gfx.append(a)

gfx = np.array(gfx)


viewimage = np.random.randint(1, 100, 5)

fig, ax = plt.subplots(1, 5, figsize = (18, 18))

for t, i in zip(range(5), viewimage):

  Title = train_labels[i]

  ax[t].set_title(Title)

  ax[t].imshow(gfx[i])

  ax[t].set_axis_off()
  
  fig.tight_layout()

WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


# Similarly you can visualize for the images in the test data
viewimage = np.random.randint(1, 100, 5)

fig, ax = plt.subplots(1, 5, figsize = (18, 18))

for t, i in zip(range(5), viewimage):

  Title = test_labels[i]

  ax[t].set_title(Title)

  ax[t].imshow(gfx[i])

  ax[t].set_axis_off()
  
  fig.tight_layout()

WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


gbx = []  # To hold the blurred images

for i in np.arange(0, 100, 1):

  b = cv2.GaussianBlur(train_images[i], (5, 5), 0)

  gbx.append(b)

gbx = np.array(gbx)


viewimage = np.random.randint(1, 100, 5)

fig, ax = plt.subplots(1, 5, figsize = (18, 18))

for t, i in zip(range(5), viewimage):

  Title = train_labels[i]

  ax[t].set_title(Title)
  
  ax[t].imshow(gbx[i])
  
  ax[t].set_axis_off()
  
  fig.tight_layout()


# Similarly you can apply Gaussian blurring for the images in the test data
gbt = []  # To hold the blurred images

for i in np.arange(0, 100, 1):

  b = cv2.GaussianBlur(test_images[i], (5, 5), 0)

  gbt.append(b)

gbt = np.array(gbt)


viewimage = np.random.randint(1, 100, 5)

fig, ax = plt.subplots(1, 5, figsize = (18, 18))

for t, i in zip(range(5), viewimage):

  Title = test_labels[i]

  ax[t].set_title(Title)

  ax[t].imshow(gbt[i])
  
  ax[t].set_axis_off()
  
  fig.tight_layout()


# Mounting the drive
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Importing libraries required to load the data
import zipfile

import os

from PIL import Image

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler


import tensorflow as tf

from tensorflow.keras import optimizers

from tensorflow.keras.utils import to_categorical


# To ignore warnings
import warnings

warnings.filterwarnings('ignore')

# Remove the limit from the number of displayed columns and rows. It helps to see the entire dataframe while printing it
pd.set_option("display.max_columns", None)

pd.set_option("display.max_rows", 200)


# Storing the path of the data file from the Google drive
path = '/content/drive/MyDrive/Capstone_Project/cell_images.zip'

# The data is provided as a zip file so we need to extract the files from the zip file
with zipfile.ZipFile(path, 'r') as zip_ref:

    zip_ref.extractall()


# Storing the path of the extracted "train" folder 
train_dir = '/content/cell_images/train'

# Size of image so that each image has the same size
SIZE = 64

# Empty list to store the training images after they are converted to NumPy arrays
train_images = []

# Empty list to store the training labels (0 - uninfected, 1 - parasitized)
train_labels = []


# We will run the same code for "parasitized" as well as "uninfected" folders within the "train" folder
for folder_name in ['/parasitized/', '/uninfected/']:
    
    # Path of the folder
    images_path = os.listdir(train_dir + folder_name)

    for i, image_name in enumerate(images_path):
    
        try:
    
            # Opening each image using the path of that image
            image = Image.open(train_dir + folder_name + image_name)

            # Resizing each image to (64, 64)
            image = image.resize((SIZE, SIZE))

            # Converting images to arrays and appending that array to the empty list defined above
            train_images.append(np.array(image))

            # Creating labels for parasitized and uninfected images
            if folder_name == '/parasitized/':
                
                train_labels.append(1)
            
            else:
            
                train_labels.append(0)
        
        except Exception:
        
            pass       

# Converting lists to arrays
train_images = np.array(train_images)

train_labels = np.array(train_labels)


# Storing the path of the extracted "test" folder 
test_dir = '/content/cell_images/test'

# Size of image so that each image has the same size (it must be same as the train image size)
SIZE = 64

# Empty list to store the testing images after they are converted to NumPy arrays
test_images = []

# Empty list to store the testing labels (0 - uninfected, 1 - parasitized)
test_labels = []


# We will run the same code for "parasitized" as well as "uninfected" folders within the "test" folder
for folder_name in ['/parasitized/', '/uninfected/']:
    
    # Path of the folder
    images_path = os.listdir(test_dir + folder_name)

    for i, image_name in enumerate(images_path):
     
        try:
            # Opening each image using the path of that image
            image = Image.open(test_dir + folder_name + image_name)
            
            # Resizing each image to (64, 64)
            image = image.resize((SIZE, SIZE))
            
            # Converting images to arrays and appending that array to the empty list defined above
            test_images.append(np.array(image))
            
            # Creating labels for parasitized and uninfected images
            if folder_name == '/parasitized/':
                
                test_labels.append(1)
            
            else:
            
                test_labels.append(0)
        
        except Exception:
        
            pass       

# Converting lists to arrays
test_images = np.array(test_images)

test_labels = np.array(test_labels)


# Try to normalize the train and test images by dividing it by 255 and convert them to float32 using astype function
train_images = (train_images/255).astype('float32')

test_images = (test_images/255).astype('float32')


# Encoding Train Labels

train_labels = to_categorical(train_labels, 2)

# Similarly let us try to encode test labels
test_labels = to_categorical(test_labels, 2)


# Clearing backend
from tensorflow.keras import backend

from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout  

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from random import shuffle

backend.clear_session()

# Fixing the seed for random number generators so that we can ensure we receive the same output everytime
np.random.seed(42)

import random

random.seed(42)

tf.random.set_seed(42)


# Creating sequential model
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu", input_shape = (64, 64, 3))) #first convolutional layer

model.add(MaxPooling2D(pool_size = 2))

model.add(Dropout(0.2))

model.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu")) #second convolutional layer

model.add(MaxPooling2D(pool_size = 2))

model.add(Dropout(0.2))

model.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu")) #third convolutional layer

model.add(MaxPooling2D(pool_size = 2))

model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(512, activation = "relu"))

model.add(Dropout(0.4))

model.add(Dense(2, activation = "softmax")) # 2 represents output layer neurons 

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 64, 64, 32)        416       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 32, 32, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        4128      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 16, 16, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 32)        4128      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 8, 8, 32)         0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 8, 8, 32)          0         
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 512)               1049088   
                                                                 
 dropout_3 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
=================================================================
Total params: 1,058,786
Trainable params: 1,058,786
Non-trainable params: 0
_________________________________________________________________


model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])


callbacks = [EarlyStopping(monitor = 'val_loss', patience = 2),
             ModelCheckpoint('.mdl_wts.hdf5', monitor = 'val_loss', save_best_only = True)]


# Fit the model with min batch size as 32 can tune batch size to some factor of 2^power ] 
history = model.fit(train_images, train_labels, batch_size = 32, callbacks = callbacks, validation_split = 0.2, epochs = 20, verbose = 1)

Epoch 1/20
624/624 [==============================] - 101s 159ms/step - loss: 0.3921 - accuracy: 0.8123 - val_loss: 0.2029 - val_accuracy: 0.9343
Epoch 2/20
624/624 [==============================] - 92s 148ms/step - loss: 0.1278 - accuracy: 0.9529 - val_loss: 0.2228 - val_accuracy: 0.9275
Epoch 3/20
624/624 [==============================] - 93s 148ms/step - loss: 0.1056 - accuracy: 0.9654 - val_loss: 0.1169 - val_accuracy: 0.9826
Epoch 4/20
624/624 [==============================] - 93s 149ms/step - loss: 0.0868 - accuracy: 0.9720 - val_loss: 0.0799 - val_accuracy: 0.9850
Epoch 5/20
624/624 [==============================] - 93s 149ms/step - loss: 0.0769 - accuracy: 0.9754 - val_loss: 0.0643 - val_accuracy: 0.9870
Epoch 6/20
624/624 [==============================] - 92s 148ms/step - loss: 0.0719 - accuracy: 0.9765 - val_loss: 0.0841 - val_accuracy: 0.9836
Epoch 7/20
624/624 [==============================] - 95s 152ms/step - loss: 0.0700 - accuracy: 0.9778 - val_loss: 0.0622 - val_accuracy: 0.9878
Epoch 8/20
624/624 [==============================] - 94s 151ms/step - loss: 0.0653 - accuracy: 0.9777 - val_loss: 0.0857 - val_accuracy: 0.9808
Epoch 9/20
624/624 [==============================] - 94s 150ms/step - loss: 0.0648 - accuracy: 0.9770 - val_loss: 0.0721 - val_accuracy: 0.9810


accuracy = model.evaluate(test_images, test_labels, verbose = 1)
print('\n', 'Test_Accuracy:-', accuracy[1])

82/82 [==============================] - 3s 39ms/step - loss: 0.0821 - accuracy: 0.9788

 Test_Accuracy:- 0.9788461327552795


from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

pred = model.predict(test_images)

pred = np.argmax(pred, axis = 1) 

y_true = np.argmax(test_labels, axis = 1)

# Printing the classification report

class_rep = classification_report(y_true, pred)

print(class_rep)

class_rep = classification_report(y_true, pred, output_dict=True)


# Plotting the heatmap using confusion matrix
cm = confusion_matrix(y_true, pred)

plt.figure(figsize = (8, 5))

sns.heatmap(cm, annot = True,  fmt = '.0f', xticklabels = ['Uninfected', 'Parasitized'], yticklabels = ['Uninfected', 'Parasitized'])

plt.ylabel('Actual')

plt.xlabel('Predicted')

plt.show()

              precision    recall  f1-score   support

           0       0.98      0.97      0.98      1300
           1       0.97      0.98      0.98      1300

    accuracy                           0.98      2600
   macro avg       0.98      0.98      0.98      2600
weighted avg       0.98      0.98      0.98      2600


# Function to plot train and validation accuracy 
def plot_accuracy(history):

    N = len(history.history["accuracy"])

    plt.figure(figsize = (7, 7))

    plt.plot(np.arange(0, N), history.history["accuracy"], label = "train_accuracy", ls = '--')

    plt.plot(np.arange(0, N), history.history["val_accuracy"], label = "val_accuracy", ls = '--')

    plt.title("Accuracy vs Epoch")
    
    plt.xlabel("Epochs")
    
    plt.ylabel("Accuracy")
    
    plt.legend(loc="bottom right")


plot_accuracy(history)


backend.clear_session() # Clearing the backend for new model


# Creating sequential model
model1 = Sequential()

model1.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu", input_shape = (64, 64, 3))) #first convolutional layer

model1.add(MaxPooling2D(pool_size = 2))

model1.add(Dropout(0.2))

model1.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu")) #second convolutional layer

model1.add(MaxPooling2D(pool_size = 2))

model1.add(Dropout(0.2))

model1.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu")) #third convolutional layer

model1.add(MaxPooling2D(pool_size = 2))

model1.add(Dropout(0.2))

model1.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu")) #fourth convolutional layer

model1.add(MaxPooling2D(pool_size = 2))

model1.add(Dropout(0.2))

model1.add(Conv2D(filters = 32, kernel_size = 2, padding = "same", activation = "relu")) #fifth convolutional layer

model1.add(MaxPooling2D(pool_size = 2))

model1.add(Dropout(0.2))

model1.add(Flatten())

model1.add(Dense(512, activation = "relu"))

model1.add(Dropout(0.4))

model1.add(Dense(2, activation = "softmax")) # 2 represents output layer neurons 

model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 64, 64, 32)        416       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 32, 32, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        4128      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 dropout_1 (Dropout)         (None, 16, 16, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 32)        4128      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 8, 8, 32)         0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 8, 8, 32)          0         
                                                                 
 conv2d_3 (Conv2D)           (None, 8, 8, 32)          4128      
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 4, 4, 32)         0         
 2D)                                                             
                                                                 
 dropout_3 (Dropout)         (None, 4, 4, 32)          0         
                                                                 
 conv2d_4 (Conv2D)           (None, 4, 4, 32)          4128      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 2, 2, 32)         0         
 2D)                                                             
                                                                 
 dropout_4 (Dropout)         (None, 2, 2, 32)          0         
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 512)               66048     
                                                                 
 dropout_5 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
=================================================================
Total params: 84,002
Trainable params: 84,002
Non-trainable params: 0
_________________________________________________________________


model1.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])


callbacks = [EarlyStopping(monitor = 'val_loss', patience = 2),
             ModelCheckpoint('.mdl_wts.hdf5', monitor = 'val_loss', save_best_only = True)]


history1 = model1.fit(train_images, train_labels, batch_size = 32, callbacks = callbacks,  validation_split = 0.2, epochs = 20, verbose = 1)

Epoch 1/20
624/624 [==============================] - 93s 148ms/step - loss: 0.3486 - accuracy: 0.8212 - val_loss: 0.0963 - val_accuracy: 0.9834
Epoch 2/20
624/624 [==============================] - 90s 145ms/step - loss: 0.0861 - accuracy: 0.9714 - val_loss: 0.1016 - val_accuracy: 0.9712
Epoch 3/20
624/624 [==============================] - 90s 145ms/step - loss: 0.0803 - accuracy: 0.9730 - val_loss: 0.0573 - val_accuracy: 0.9804
Epoch 4/20
624/624 [==============================] - 90s 144ms/step - loss: 0.0749 - accuracy: 0.9761 - val_loss: 0.0539 - val_accuracy: 0.9826
Epoch 5/20
624/624 [==============================] - 90s 145ms/step - loss: 0.0716 - accuracy: 0.9758 - val_loss: 0.0618 - val_accuracy: 0.9766
Epoch 6/20
624/624 [==============================] - 90s 145ms/step - loss: 0.0738 - accuracy: 0.9756 - val_loss: 0.0696 - val_accuracy: 0.9750


accuracy1 = model1.evaluate(test_images, test_labels, verbose = 1)

print('\n', 'Test_Accuracy:-', accuracy1[1])

82/82 [==============================] - 3s 38ms/step - loss: 0.0571 - accuracy: 0.9823

 Test_Accuracy:- 0.9823076725006104


pred1 = model1.predict(test_images)

pred1 = np.argmax(pred1, axis = 1) 

y_true = np.argmax(test_labels, axis = 1)

# Printing the classification report
class_rep1 = classification_report(y_true, pred1)

print(class_rep1)

class_rep1 = classification_report(y_true, pred1, output_dict=True)


# Plotting the heatmap using confusion matrix
cm1 = confusion_matrix(y_true, pred1)

plt.figure(figsize = (8, 5))

sns.heatmap(cm1, annot = True,  fmt = '.0f', xticklabels = ['Uninfected', 'Parasitized'], yticklabels = ['Uninfected', 'Parasitized'])

plt.ylabel('Actual')

plt.xlabel('Predicted')

plt.show()

              precision    recall  f1-score   support

           0       0.99      0.98      0.98      1300
           1       0.98      0.99      0.98      1300

    accuracy                           0.98      2600
   macro avg       0.98      0.98      0.98      2600
weighted avg       0.98      0.98      0.98      2600


# Function to plot train and validation accuracy 

plot_accuracy(history1)


backend.clear_session() # Clearing the backend for new model


from tensorflow.keras.layers import BatchNormalization, LeakyReLU

model2 = Sequential()

model2.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #first convolutional layer

model2.add(LeakyReLU(0.1))

model2.add(MaxPooling2D(pool_size = 2))

model2.add(Dropout(0.2))

model2.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #second convolutional layer

model2.add(LeakyReLU(0.1))

model2.add(MaxPooling2D(pool_size = 2))

model2.add(BatchNormalization())

model2.add(Dropout(0.2))

model2.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #third convolutional layer

model2.add(LeakyReLU(0.1))

model2.add(MaxPooling2D(pool_size = 2))

model2.add(Dropout(0.2))

model2.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #fourth convolutional layer

model2.add(LeakyReLU(0.1))

model2.add(MaxPooling2D(pool_size = 2))

model2.add(BatchNormalization())

model2.add(Dropout(0.2))

model2.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #fifth convolutional layer

model2.add(LeakyReLU(0.1))

model2.add(MaxPooling2D(pool_size = 2))

model2.add(Dropout(0.2))

model2.add(Flatten())

model2.add(Dense(512, activation = "relu"))

model2.add(Dropout(0.4))

model2.add(Dense(2, activation = "softmax")) # 2 represents output layer neurons 

adam = optimizers.Adam(learning_rate = 0.001)

model2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 64, 64, 32)        896       
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 64, 64, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 32, 32, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 32, 32, 32)        0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 batch_normalization (BatchN  (None, 16, 16, 32)       128       
 ormalization)                                                   
                                                                 
 dropout_1 (Dropout)         (None, 16, 16, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 32)        9248      
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 16, 16, 32)        0         
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 8, 8, 32)         0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 8, 8, 32)          0         
                                                                 
 conv2d_3 (Conv2D)           (None, 8, 8, 32)          9248      
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 8, 8, 32)          0         
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 4, 4, 32)         0         
 2D)                                                             
                                                                 
 batch_normalization_1 (Batc  (None, 4, 4, 32)         128       
 hNormalization)                                                 
                                                                 
 dropout_3 (Dropout)         (None, 4, 4, 32)          0         
                                                                 
 conv2d_4 (Conv2D)           (None, 4, 4, 32)          9248      
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 4, 4, 32)          0         
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 2, 2, 32)         0         
 2D)                                                             
                                                                 
 dropout_4 (Dropout)         (None, 2, 2, 32)          0         
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 512)               66048     
                                                                 
 dropout_5 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
=================================================================
Total params: 105,218
Trainable params: 105,090
Non-trainable params: 128
_________________________________________________________________


model2.compile(loss = "binary_crossentropy", optimizer = adam, metrics = ['accuracy'])


callbacks = [EarlyStopping(monitor = 'val_loss', patience = 2),
             ModelCheckpoint('.mdl_wts.hdf5', monitor = 'val_loss', save_best_only = True)]


history2 = model2.fit(train_images, train_labels, batch_size = 32, callbacks = callbacks, validation_split = 0.2, epochs = 20, verbose = 1)

Epoch 1/20
624/624 [==============================] - 144s 228ms/step - loss: 0.3217 - accuracy: 0.8457 - val_loss: 0.0306 - val_accuracy: 0.9954
Epoch 2/20
624/624 [==============================] - 142s 228ms/step - loss: 0.0940 - accuracy: 0.9703 - val_loss: 0.1043 - val_accuracy: 0.9788
Epoch 3/20
624/624 [==============================] - 142s 227ms/step - loss: 0.0844 - accuracy: 0.9744 - val_loss: 0.0423 - val_accuracy: 0.9910


# Plotting the accuracies

plot_accuracy(history2)


# Evaluate the model to calculate the accuracy

accuracy2 = model2.evaluate(test_images , test_labels, verbose = 1)

print('\n', 'Test_Accuracy:-', accuracy2[1])

82/82 [==============================] - 5s 63ms/step - loss: 0.0748 - accuracy: 0.9815

 Test_Accuracy:- 0.9815384745597839


from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

pred2 = model2.predict(test_images)

pred2 = np.argmax(pred2, axis = 1) 

y_true = np.argmax(test_labels, axis = 1)

# Printing the classification report
class_rep2 = classification_report(y_true, pred2)

print(class_rep2)

class_rep2 = classification_report(y_true, pred2, output_dict=True)

# Plotting the heatmap using confusion matrix

cm2 = confusion_matrix(y_true, pred2)

plt.figure(figsize = (8, 5))

sns.heatmap(cm2, annot = True,  fmt = '.0f', xticklabels = ['Uninfected', 'Parasitized'], yticklabels = ['Uninfected', 'Parasitized'])

plt.ylabel('Actual')

plt.xlabel('Predicted')

plt.show()

              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1300
           1       0.99      0.98      0.98      1300

    accuracy                           0.98      2600
   macro avg       0.98      0.98      0.98      2600
weighted avg       0.98      0.98      0.98      2600


backend.clear_session() # Clearing backend for new model


from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size = 0.2, random_state = 42)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Using ImageDataGenerator to generate images
train_datagen = ImageDataGenerator(horizontal_flip = True, 
                                  zoom_range = 0.5, rotation_range = 30)

val_datagen  = ImageDataGenerator()

# Flowing training images using train_datagen generator
train_generator = train_datagen.flow(x = train_images, y = train_labels, batch_size = 64, seed = 42, shuffle = True)


# Flowing validation images using val_datagen generator
val_generator =  val_datagen.flow(x = test_images, y = test_labels, batch_size = 64, seed = 42, shuffle = True)


# Creating an iterable for images and labels from the training data
images, labels = next(train_generator)

# Plotting 16 images from the training data
fig, axes = plt.subplots(4, 4, figsize = (16, 8))

fig.set_size_inches(16, 16)
for (image, label, ax) in zip(images, labels, axes.flatten()):

    ax.imshow(image)

    if label[1] == 1: 

        ax.set_title('parasitized')

    else:

        ax.set_title('uninfected')

    ax.axis('off')


model3 = Sequential()

model3.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #first convolutional layer

model3.add(LeakyReLU(0.1))

model3.add(MaxPooling2D(pool_size = 2))

model3.add(Dropout(0.2))

model3.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #second convolutional layer

model3.add(LeakyReLU(0.1))

model3.add(MaxPooling2D(pool_size = 2))

model3.add(BatchNormalization())

model3.add(Dropout(0.2))

model3.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #third convolutional layer

model3.add(LeakyReLU(0.1))

model3.add(MaxPooling2D(pool_size = 2))

model3.add(Dropout(0.2))

model3.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #fourth convolutional layer

model3.add(LeakyReLU(0.1))

model3.add(MaxPooling2D(pool_size = 2))

model3.add(BatchNormalization())

model3.add(Dropout(0.2))

model3.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #fifth convolutional layer

model3.add(LeakyReLU(0.1))

model3.add(MaxPooling2D(pool_size = 2))

model3.add(Dropout(0.2))

model3.add(Flatten())

model3.add(Dense(512, activation = "relu"))

model3.add(Dropout(0.4))

model3.add(Dense(2, activation = "softmax")) # 2 represents output layer neurons 

adam = optimizers.Adam(learning_rate = 0.001)

model3.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])

model3.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 64, 64, 32)        896       
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 64, 64, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 32, 32, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 32, 32, 32)        0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 batch_normalization (BatchN  (None, 16, 16, 32)       128       
 ormalization)                                                   
                                                                 
 dropout_1 (Dropout)         (None, 16, 16, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 32)        9248      
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 16, 16, 32)        0         
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 8, 8, 32)         0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 8, 8, 32)          0         
                                                                 
 conv2d_3 (Conv2D)           (None, 8, 8, 32)          9248      
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 8, 8, 32)          0         
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 4, 4, 32)         0         
 2D)                                                             
                                                                 
 batch_normalization_1 (Batc  (None, 4, 4, 32)         128       
 hNormalization)                                                 
                                                                 
 dropout_3 (Dropout)         (None, 4, 4, 32)          0         
                                                                 
 conv2d_4 (Conv2D)           (None, 4, 4, 32)          9248      
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 4, 4, 32)          0         
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 2, 2, 32)         0         
 2D)                                                             
                                                                 
 dropout_4 (Dropout)         (None, 2, 2, 32)          0         
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 512)               66048     
                                                                 
 dropout_5 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
=================================================================
Total params: 105,218
Trainable params: 105,090
Non-trainable params: 128
_________________________________________________________________


callbacks = [EarlyStopping(monitor = 'val_loss', patience = 2),
             ModelCheckpoint('.mdl_wts.hdf5', monitor = 'val_loss', save_best_only = True)]


history3 = model3.fit(train_generator, 
                                  validation_data = val_generator,
                                  batch_size = 32, callbacks = callbacks,
                                  epochs = 20, verbose = 1)

Epoch 1/20
390/390 [==============================] - 225s 574ms/step - loss: 0.6354 - accuracy: 0.6434 - val_loss: 0.6222 - val_accuracy: 0.6204
Epoch 2/20
390/390 [==============================] - 224s 574ms/step - loss: 0.2275 - accuracy: 0.9169 - val_loss: 0.1100 - val_accuracy: 0.9615
Epoch 3/20
390/390 [==============================] - 193s 494ms/step - loss: 0.1670 - accuracy: 0.9433 - val_loss: 0.0848 - val_accuracy: 0.9792
Epoch 4/20
390/390 [==============================] - 192s 493ms/step - loss: 0.1567 - accuracy: 0.9478 - val_loss: 0.0882 - val_accuracy: 0.9804
Epoch 5/20
390/390 [==============================] - 199s 511ms/step - loss: 0.1509 - accuracy: 0.9504 - val_loss: 0.0735 - val_accuracy: 0.9808
Epoch 6/20
390/390 [==============================] - 185s 475ms/step - loss: 0.1460 - accuracy: 0.9518 - val_loss: 0.0688 - val_accuracy: 0.9804
Epoch 7/20
390/390 [==============================] - 182s 468ms/step - loss: 0.1493 - accuracy: 0.9515 - val_loss: 0.0635 - val_accuracy: 0.9831
Epoch 8/20
390/390 [==============================] - 182s 467ms/step - loss: 0.1449 - accuracy: 0.9532 - val_loss: 0.0897 - val_accuracy: 0.9762
Epoch 9/20
390/390 [==============================] - 181s 465ms/step - loss: 0.1409 - accuracy: 0.9544 - val_loss: 0.0584 - val_accuracy: 0.9800
Epoch 10/20
390/390 [==============================] - 184s 473ms/step - loss: 0.1365 - accuracy: 0.9557 - val_loss: 0.0717 - val_accuracy: 0.9800
Epoch 11/20
390/390 [==============================] - 183s 468ms/step - loss: 0.1392 - accuracy: 0.9540 - val_loss: 0.0575 - val_accuracy: 0.9819
Epoch 12/20
390/390 [==============================] - 184s 473ms/step - loss: 0.1349 - accuracy: 0.9553 - val_loss: 0.0768 - val_accuracy: 0.9762
Epoch 13/20
390/390 [==============================] - 181s 465ms/step - loss: 0.1369 - accuracy: 0.9554 - val_loss: 0.0608 - val_accuracy: 0.9835


# Plotting the accuracies
plot_accuracy(history3)


# Evaluating the model on test data
accuracy3 = model3.evaluate(test_images, test_labels, verbose = 1)

print('\n', 'Test_Accuracy:-', accuracy3[1])

82/82 [==============================] - 5s 59ms/step - loss: 0.0608 - accuracy: 0.9835

 Test_Accuracy:- 0.9834615588188171


pred3 = model3.predict(test_images)

pred3 = np.argmax(pred3, axis = 1) 

y_true = np.argmax(test_labels, axis = 1)

# Printing the classification report
class_rep3 = classification_report(y_true, pred3)

print(class_rep3)

class_rep3 = classification_report(y_true, pred3, output_dict=True)

# Plotting the heatmap using confusion matrix

cm3 = confusion_matrix(y_true, pred3)

plt.figure(figsize = (8, 5))

sns.heatmap(cm3, annot = True,  fmt = '.0f', xticklabels = ['Uninfected', 'Parasitized'], yticklabels = ['Uninfected', 'Parasitized'])

plt.ylabel('Actual')

plt.xlabel('Predicted')

plt.show()

              precision    recall  f1-score   support

           0       0.99      0.98      0.98      1300
           1       0.98      0.99      0.98      1300

    accuracy                           0.98      2600
   macro avg       0.98      0.98      0.98      2600
weighted avg       0.98      0.98      0.98      2600


# Clearing backend
from tensorflow.keras import backend

backend.clear_session()

# Fixing the seed for random number generators
np.random.seed(42)

import random

random.seed(42)

tf.random.set_seed(42)


from tensorflow.keras.applications.vgg16 import VGG16

from tensorflow.keras import Model

vgg = VGG16(include_top = False, weights = 'imagenet', input_shape = (64, 64, 3))

vgg.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 64, 64, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 64, 64, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 64, 64, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 32, 32, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 32, 32, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 32, 32, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 16, 16, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 16, 16, 256)       295168    
                                                                 
 block3_conv2 (Conv2D)       (None, 16, 16, 256)       590080    
                                                                 
 block3_conv3 (Conv2D)       (None, 16, 16, 256)       590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, 8, 8, 256)         0         
                                                                 
 block4_conv1 (Conv2D)       (None, 8, 8, 512)         1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, 8, 8, 512)         2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, 8, 8, 512)         2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, 4, 4, 512)         0         
                                                                 
 block5_conv1 (Conv2D)       (None, 4, 4, 512)         2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, 4, 4, 512)         2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, 4, 4, 512)         2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, 2, 2, 512)         0         
                                                                 
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________


transfer_layer = vgg.get_layer('block5_pool')

vgg.trainable = False

# Add classification layers on top of it  
x = Flatten()(transfer_layer.output)  # Flatten the output from the 3rd block of the VGG16 model

x = Dense(256, activation = 'relu')(x)

# Similarly add a dense layer with 128 neurons
x = Dropout(0.3)(x)

# Add a dense layer with 64 neurons
x = BatchNormalization()(x)

pred = Dense(2, activation = 'softmax')(x)

model4 = Model(vgg.input, pred) # Initializing the model


# Compiling the model 
adam = optimizers.Adam(learning_rate = 0.001)

model4.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])


# Adding Callbacks to the model
callbacks = [EarlyStopping(monitor = 'val_loss', patience = 2),
             ModelCheckpoint('.mdl_wts.hdf5', monitor = 'val_loss', save_best_only = True)]


# Fitting the model and running the model for 10 epochs
history4 = model4.fit(
            train_images, train_labels,
            epochs = 20,
            callbacks = callbacks,
            batch_size = 32,
            validation_split = 0.2,
            verbose = 1
)

Epoch 1/20
624/624 [==============================] - 1043s 2s/step - loss: 0.2318 - accuracy: 0.9117 - val_loss: 0.3740 - val_accuracy: 0.8429
Epoch 2/20
624/624 [==============================] - 1039s 2s/step - loss: 0.1873 - accuracy: 0.9265 - val_loss: 0.2471 - val_accuracy: 0.9042
Epoch 3/20
624/624 [==============================] - 1041s 2s/step - loss: 0.1796 - accuracy: 0.9306 - val_loss: 0.0692 - val_accuracy: 0.9890
Epoch 4/20
624/624 [==============================] - 1037s 2s/step - loss: 0.1713 - accuracy: 0.9345 - val_loss: 0.1832 - val_accuracy: 0.9367
Epoch 5/20
624/624 [==============================] - 1043s 2s/step - loss: 0.1630 - accuracy: 0.9369 - val_loss: 0.1350 - val_accuracy: 0.9561


# plotting the accuracies
plot_accuracy(history4)


# Evaluating the model on test data

accuracy4 = model4.evaluate(test_images, test_labels, verbose = 1)

print('\n', 'Test_Accuracy:-', accuracy4[1])

82/82 [==============================] - 109s 1s/step - loss: 0.1508 - accuracy: 0.9419

 Test_Accuracy:- 0.9419230818748474


# Plot the confusion matrix and generate a classification report for the model
pred4 = model4.predict(test_images)

pred4 = np.argmax(pred4, axis = 1) 

y_true = np.argmax(test_labels, axis = 1)

# Printing the classification report
class_rep4 = classification_report(y_true, pred4)

print(class_rep4)

class_rep4 = classification_report(y_true, pred4, output_dict=True)

# Plotting the heatmap using confusion matrix

cm4 = confusion_matrix(y_true, pred4)

plt.figure(figsize = (8, 5))

sns.heatmap(cm4, annot = True,  fmt = '.0f', xticklabels = ['Uninfected', 'Parasitized'], yticklabels = ['Uninfected', 'Parasitized'])

plt.ylabel('Actual')

plt.xlabel('Predicted')

plt.show()

              precision    recall  f1-score   support

           0       0.96      0.92      0.94      1300
           1       0.92      0.96      0.94      1300

    accuracy                           0.94      2600
   macro avg       0.94      0.94      0.94      2600
weighted avg       0.94      0.94      0.94      2600


#generating summary table of accuracies of all the models

#converting classification reports to dataframes to call values

reports=[class_rep,class_rep1,class_rep2, class_rep3, class_rep4]

index=range(len(reports))
dfs=[]
for i in index:
  name= 'df_class_rep' + str(i)
  dfs.append(name)
print('Report names are:',dfs)

collection={}
for i in index:
  df= pd.DataFrame(reports[i]).transpose()
  collection[dfs[i]] = df

print('Collection of Classification Report DataFrames: \n', collection)

Report names are: ['df_class_rep0', 'df_class_rep1', 'df_class_rep2', 'df_class_rep3', 'df_class_rep4']
Collection of Classification Report DataFrames: 
 {'df_class_rep0':               precision    recall  f1-score      support
0              0.983683  0.973846  0.978740  1300.000000
1              0.974105  0.983846  0.978951  1300.000000
accuracy       0.978846  0.978846  0.978846     0.978846
macro avg      0.978894  0.978846  0.978846  2600.000000
weighted avg   0.978894  0.978846  0.978846  2600.000000, 'df_class_rep1':               precision    recall  f1-score      support
0              0.987558  0.976923  0.982212  1300.000000
1              0.977169  0.987692  0.982402  1300.000000
accuracy       0.982308  0.982308  0.982308     0.982308
macro avg      0.982364  0.982308  0.982307  2600.000000
weighted avg   0.982364  0.982308  0.982307  2600.000000, 'df_class_rep2':               precision    recall  f1-score      support
0              0.977134  0.986154  0.981623  1300.000000
1              0.986025  0.976923  0.981453  1300.000000
accuracy       0.981538  0.981538  0.981538     0.981538
macro avg      0.981579  0.981538  0.981538  2600.000000
weighted avg   0.981579  0.981538  0.981538  2600.000000, 'df_class_rep3':               precision    recall  f1-score      support
0              0.989867  0.976923  0.983353  1300.000000
1              0.977221  0.990000  0.983569  1300.000000
accuracy       0.983462  0.983462  0.983462     0.983462
macro avg      0.983544  0.983462  0.983461  2600.000000
weighted avg   0.983544  0.983462  0.983461  2600.000000, 'df_class_rep4':               precision    recall  f1-score      support
0              0.962188  0.920000  0.940621  1300.000000
1              0.923360  0.963846  0.943169  1300.000000
accuracy       0.941923  0.941923  0.941923     0.941923
macro avg      0.942774  0.941923  0.941895  2600.000000
weighted avg   0.942774  0.941923  0.941895  2600.000000}


#obtaining values from the confusion matricies

tn, fp, fn, tp = cm.ravel()
tn1, fp1, fn1, tp1 = cm1.ravel()
tn2, fp2, fn2, tp2 = cm2.ravel()
tn3, fp3, fn3, tp3 = cm3.ravel()
tn4, fp4, fn4, tp4 = cm4.ravel()


epochs_ex=[len(history.history['loss']),len(history1.history['loss']),len(history2.history['loss']),len(history3.history['loss']),len(history4.history['loss'])]
test_acc= [accuracy[1], accuracy1[1], accuracy2[1], accuracy3[1], accuracy4[1]]
precs=[collection["df_class_rep0"]['precision']['weighted avg'],
       collection["df_class_rep1"]['precision']['weighted avg'],
       collection["df_class_rep2"]['precision']['weighted avg'],
       collection["df_class_rep3"]['precision']['weighted avg'],
       collection["df_class_rep4"]['precision']['weighted avg']]
recalls=[collection["df_class_rep0"]['recall']['weighted avg'],
       collection["df_class_rep1"]['recall']['weighted avg'],
       collection["df_class_rep2"]['recall']['weighted avg'],
       collection["df_class_rep3"]['recall']['weighted avg'],
       collection["df_class_rep4"]['recall']['weighted avg']]
f1scores=[collection["df_class_rep0"]['f1-score']['weighted avg'],
       collection["df_class_rep1"]['f1-score']['weighted avg'],
       collection["df_class_rep2"]['f1-score']['weighted avg'],
       collection["df_class_rep3"]['f1-score']['weighted avg'],
       collection["df_class_rep4"]['f1-score']['weighted avg']]
falsepos=[fp, fp1, fp2, fp3, fp4]
falsenegs=[fn,fn1,fn2,fn3,fn4]
headers=['Model Base','Model 1', 'Model 2', 'Model 3', 'Model 4 ']
indicies=['Epochs Executed', 'Test Accuracy', 'Precision', 'Recall', 'F1-Score', 'False Positives', 'False Negatives']

lists=[epochs_ex,test_acc,precs,recalls,f1scores,falsepos,falsenegs]
df = pd.concat([pd.Series(x) for x in lists], axis=1)
df=df.T
df.columns = headers
df.index = indicies
print(df)

                 Model Base    Model 1    Model 2    Model 3    Model 4 
Epochs Executed    9.000000   6.000000   3.000000  13.000000    5.000000
Test Accuracy      0.978846   0.982308   0.981538   0.983462    0.941923
Precision          0.978894   0.982364   0.981579   0.983544    0.942774
Recall             0.978846   0.982308   0.981538   0.983462    0.941923
F1-Score           0.978846   0.982307   0.981538   0.983461    0.941895
False Positives   34.000000  30.000000  18.000000  30.000000  104.000000
False Negatives   21.000000  16.000000  30.000000  13.000000   47.000000


plt.figure(figsize=(8,6))
plt.scatter(epochs_ex,falsenegs,s=100,color="red")
plt.xlabel("Epochs Executed")
plt.ylabel("False Negatives")
plt.title("Epochs Executed vs False Negatives",fontsize=15)
for i, label in enumerate(headers):
    plt.annotate(label, (epochs_ex[i], falsenegs[i]))

plt.show()


model.save("/content/drive/MyDrive/Capstone_Project/model.h5")
model1.save("/content/drive/MyDrive/Capstone_Project/model1.h5")
model2.save("/content/drive/MyDrive/Capstone_Project/model2.h5")
model3.save("/content/drive/MyDrive/Capstone_Project/model3.h5")
model4.save("/content/drive/MyDrive/Capstone_Project/model4.h5")


# Mounting the drive
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


# Importing libraries required to load the data
import zipfile

import os

from PIL import Image

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler


import tensorflow as tf

from tensorflow.keras import optimizers

from tensorflow.keras.utils import to_categorical


# To ignore warnings
import warnings

warnings.filterwarnings('ignore')

# Remove the limit from the number of displayed columns and rows. It helps to see the entire dataframe while printing it
pd.set_option("display.max_columns", None)

pd.set_option("display.max_rows", 200)


# Storing the path of the data file from the Google drive
path = '/content/drive/MyDrive/Capstone_Project/cell_images.zip'

# The data is provided as a zip file so we need to extract the files from the zip file
with zipfile.ZipFile(path, 'r') as zip_ref:

    zip_ref.extractall()


# Storing the path of the extracted "train" folder 
train_dir = '/content/cell_images/train'

# Size of image so that each image has the same size
SIZE = 64

# Empty list to store the training images after they are converted to NumPy arrays
train_images = []

# Empty list to store the training labels (0 - uninfected, 1 - parasitized)
train_labels = []


# We will run the same code for "parasitized" as well as "uninfected" folders within the "train" folder
for folder_name in ['/parasitized/', '/uninfected/']:
    
    # Path of the folder
    images_path = os.listdir(train_dir + folder_name)

    for i, image_name in enumerate(images_path):
    
        try:
    
            # Opening each image using the path of that image
            image = Image.open(train_dir + folder_name + image_name)

            # Resizing each image to (64, 64)
            image = image.resize((SIZE, SIZE))

            # Converting images to arrays and appending that array to the empty list defined above
            train_images.append(np.array(image))

            # Creating labels for parasitized and uninfected images
            if folder_name == '/parasitized/':
                
                train_labels.append(1)
            
            else:
            
                train_labels.append(0)
        
        except Exception:
        
            pass       

# Converting lists to arrays
train_images = np.array(train_images)

train_labels = np.array(train_labels)


# Storing the path of the extracted "test" folder 
test_dir = '/content/cell_images/test'

# Size of image so that each image has the same size (it must be same as the train image size)
SIZE = 64

# Empty list to store the testing images after they are converted to NumPy arrays
test_images = []

# Empty list to store the testing labels (0 - uninfected, 1 - parasitized)
test_labels = []


# We will run the same code for "parasitized" as well as "uninfected" folders within the "test" folder
for folder_name in ['/parasitized/', '/uninfected/']:
    
    # Path of the folder
    images_path = os.listdir(test_dir + folder_name)

    for i, image_name in enumerate(images_path):
     
        try:
            # Opening each image using the path of that image
            image = Image.open(test_dir + folder_name + image_name)
            
            # Resizing each image to (64, 64)
            image = image.resize((SIZE, SIZE))
            
            # Converting images to arrays and appending that array to the empty list defined above
            test_images.append(np.array(image))
            
            # Creating labels for parasitized and uninfected images
            if folder_name == '/parasitized/':
                
                test_labels.append(1)
            
            else:
            
                test_labels.append(0)
        
        except Exception:
        
            pass       

# Converting lists to arrays
test_images = np.array(test_images)

test_labels = np.array(test_labels)


# Try to normalize the train and test images by dividing it by 255 and convert them to float32 using astype function
train_images = (train_images/255).astype('float32')

test_images = (test_images/255).astype('float32')


import cv2

gfx=[]   # to hold the HSV image array

for i in np.arange(0, 24958, 1):

  a = cv2.cvtColor(train_images[i], cv2.COLOR_BGR2HSV)
  
  gfx.append(a)

gfx = np.array(gfx)


gft=[]   # to hold the HSV image array

for i in np.arange(0, 2600, 1):

  a = cv2.cvtColor(test_images[i], cv2.COLOR_BGR2HSV)
  
  gft.append(a)

gft = np.array(gft)


viewimage = np.random.randint(1, 24958, 5)

fig, ax = plt.subplots(1, 5, figsize = (18, 18))

for t, i in zip(range(5), viewimage):

  Title = train_labels[i]

  ax[t].set_title(Title)

  ax[t].imshow(gfx[i])

  ax[t].set_axis_off()
  
  fig.tight_layout()

WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


# Similarly you can visualize for the images in the test data
viewimage = np.random.randint(1, 100, 5)

fig, ax = plt.subplots(1, 5, figsize = (18, 18))

for t, i in zip(range(5), viewimage):

  Title = test_labels[i]

  ax[t].set_title(Title)

  ax[t].imshow(gft[i])

  ax[t].set_axis_off()
  
  fig.tight_layout()

WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


# Encoding Train Labels

train_labels = to_categorical(train_labels, 2)

# Similarly let us try to encode test labels
test_labels = to_categorical(test_labels, 2)


# Clearing backend
from tensorflow.keras import backend

from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout  

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from random import shuffle

backend.clear_session()

# Fixing the seed for random number generators so that we can ensure we receive the same output everytime
np.random.seed(42)

import random

random.seed(42)

tf.random.set_seed(42)


from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(gfx, train_labels, test_size = 0.2, random_state = 42)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Using ImageDataGenerator to generate images
train_datagen = ImageDataGenerator(horizontal_flip = True, 
                                  zoom_range = 0.5, rotation_range = 30)

val_datagen  = ImageDataGenerator()

# Flowing training images using train_datagen generator
train_generator = train_datagen.flow(x = gfx, y = train_labels, batch_size = 64, seed = 42, shuffle = True)


# Flowing validation images using val_datagen generator
val_generator =  val_datagen.flow(x = gft, y = test_labels, batch_size = 64, seed = 42, shuffle = True)


# Creating an iterable for images and labels from the training data
images, labels = next(train_generator)

# Plotting 16 images from the training data
fig, axes = plt.subplots(4, 4, figsize = (16, 8))

fig.set_size_inches(16, 16)
for (image, label, ax) in zip(images, labels, axes.flatten()):

    ax.imshow(image)

    if label[1] == 1: 

        ax.set_title('parasitized')

    else:

        ax.set_title('uninfected')

    ax.axis('off')

WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
WARNING:matplotlib.image:Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


from tensorflow.keras.layers import BatchNormalization, LeakyReLU

model5 = Sequential()

model5.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #first convolutional layer

model5.add(LeakyReLU(0.1))

model5.add(MaxPooling2D(pool_size = 2))

model5.add(Dropout(0.2))

model5.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #second convolutional layer

model5.add(LeakyReLU(0.1))

model5.add(MaxPooling2D(pool_size = 2))

model5.add(BatchNormalization())

model5.add(Dropout(0.2))

model5.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #third convolutional layer

model5.add(LeakyReLU(0.1))

model5.add(MaxPooling2D(pool_size = 2))

model5.add(Dropout(0.2))

model5.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #fourth convolutional layer

model5.add(LeakyReLU(0.1))

model5.add(MaxPooling2D(pool_size = 2))

model5.add(BatchNormalization())

model5.add(Dropout(0.2))

model5.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), padding = 'same')) #fifth convolutional layer

model5.add(LeakyReLU(0.1))

model5.add(MaxPooling2D(pool_size = 2))

model5.add(Dropout(0.2))

model5.add(Flatten())

model5.add(Dense(512, activation = "relu"))

model5.add(Dropout(0.4))

model5.add(Dense(2, activation = "softmax")) # 2 represents output layer neurons 

adam = optimizers.Adam(learning_rate = 0.001)

model5.compile(loss = 'binary_crossentropy', optimizer = adam, metrics = ['accuracy'])

model5.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 64, 64, 32)        896       
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 64, 64, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 32, 32, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 32, 32, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 32)        9248      
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 32, 32, 32)        0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 16, 32)       0         
 2D)                                                             
                                                                 
 batch_normalization (BatchN  (None, 16, 16, 32)       128       
 ormalization)                                                   
                                                                 
 dropout_1 (Dropout)         (None, 16, 16, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 16, 16, 32)        9248      
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 16, 16, 32)        0         
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 8, 8, 32)         0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 8, 8, 32)          0         
                                                                 
 conv2d_3 (Conv2D)           (None, 8, 8, 32)          9248      
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 8, 8, 32)          0         
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 4, 4, 32)         0         
 2D)                                                             
                                                                 
 batch_normalization_1 (Batc  (None, 4, 4, 32)         128       
 hNormalization)                                                 
                                                                 
 dropout_3 (Dropout)         (None, 4, 4, 32)          0         
                                                                 
 conv2d_4 (Conv2D)           (None, 4, 4, 32)          9248      
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 4, 4, 32)          0         
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 2, 2, 32)         0         
 2D)                                                             
                                                                 
 dropout_4 (Dropout)         (None, 2, 2, 32)          0         
                                                                 
 flatten (Flatten)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 512)               66048     
                                                                 
 dropout_5 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 2)                 1026      
                                                                 
=================================================================
Total params: 105,218
Trainable params: 105,090
Non-trainable params: 128
_________________________________________________________________


callbacks = [EarlyStopping(monitor = 'val_loss', patience = 2),
             ModelCheckpoint('.mdl_wts.hdf5', monitor = 'val_loss', save_best_only = True)]


history5 = model5.fit(train_generator, 
                                  validation_data = val_generator,
                                  batch_size = 32, callbacks = callbacks,
                                  epochs = 20, verbose = 1)

Epoch 1/20
390/390 [==============================] - 36s 68ms/step - loss: 0.6948 - accuracy: 0.5569 - val_loss: 0.9884 - val_accuracy: 0.5035
Epoch 2/20
390/390 [==============================] - 26s 67ms/step - loss: 0.6673 - accuracy: 0.6021 - val_loss: 1.2571 - val_accuracy: 0.5096
Epoch 3/20
390/390 [==============================] - 26s 67ms/step - loss: 0.6510 - accuracy: 0.6210 - val_loss: 0.7437 - val_accuracy: 0.4023
Epoch 4/20
390/390 [==============================] - 36s 93ms/step - loss: 0.5726 - accuracy: 0.6842 - val_loss: 1.6657 - val_accuracy: 0.5000
Epoch 5/20
390/390 [==============================] - 36s 92ms/step - loss: 0.3633 - accuracy: 0.8362 - val_loss: 0.2881 - val_accuracy: 0.8885
Epoch 6/20
390/390 [==============================] - 48s 123ms/step - loss: 0.2512 - accuracy: 0.9058 - val_loss: 0.0875 - val_accuracy: 0.9762
Epoch 7/20
390/390 [==============================] - 32s 83ms/step - loss: 0.2086 - accuracy: 0.9276 - val_loss: 0.1041 - val_accuracy: 0.9704
Epoch 8/20
390/390 [==============================] - 36s 92ms/step - loss: 0.1979 - accuracy: 0.9324 - val_loss: 0.0913 - val_accuracy: 0.9742


# Function to plot train and validation accuracy 
def plot_accuracy(history):

    N = len(history.history["accuracy"])

    plt.figure(figsize = (7, 7))

    plt.plot(np.arange(0, N), history.history["accuracy"], label = "train_accuracy", ls = '--')

    plt.plot(np.arange(0, N), history.history["val_accuracy"], label = "val_accuracy", ls = '--')

    plt.title("Accuracy vs Epoch")
    
    plt.xlabel("Epochs")
    
    plt.ylabel("Accuracy")
    
    plt.legend(loc="bottom right")


# Plotting the accuracies
plot_accuracy(history5)


# Evaluating the model on test data
accuracy5 = model5.evaluate(gft, test_labels, verbose = 1)

print('\n', 'Test_Accuracy:-', accuracy5[1])

82/82 [==============================] - 1s 9ms/step - loss: 0.0913 - accuracy: 0.9742

 Test_Accuracy:- 0.9742307662963867


from sklearn.metrics import classification_report

from sklearn.metrics import confusion_matrix

pred5 = model5.predict(gft)

pred5 = np.argmax(pred5, axis = 1) 

y_true = np.argmax(test_labels, axis = 1)

# Printing the classification report
class_rep5 = classification_report(y_true, pred5)

print(class_rep5)

class_rep5 = classification_report(y_true, pred5, output_dict=True)

# Plotting the heatmap using confusion matrix

cm5 = confusion_matrix(y_true, pred5)

plt.figure(figsize = (8, 5))

sns.heatmap(cm5, annot = True,  fmt = '.0f', xticklabels = ['Uninfected', 'Parasitized'], yticklabels = ['Uninfected', 'Parasitized'])

plt.ylabel('Actual')

plt.xlabel('Predicted')

plt.show()

              precision    recall  f1-score   support

           0       0.96      0.99      0.97      1300
           1       0.99      0.96      0.97      1300

    accuracy                           0.97      2600
   macro avg       0.97      0.97      0.97      2600
weighted avg       0.97      0.97      0.97      2600


model5.save("/content/drive/MyDrive/Capstone_Project/model5.h5")


model = tf.keras.models.load_model('/content/drive/MyDrive/Capstone_Project/model3.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflmodel = converter.convert()
file = open( '/content/drive/MyDrive/Capstone_Project/finalmalariamodel3.tflite' , 'wb' ) 
file.write( tflmodel )

427576


#appending model 5 stats to model performance table 

tn5, fp5, fn5, tp5 = cm5.ravel()


m5=[len(history5.history['loss']),accuracy5[1],class_rep5['weighted avg']['precision'],class_rep5['weighted avg']['recall'],class_rep5['weighted avg']['f1-score'], fp5, fn5]


df = pd.read_csv('/content/drive/MyDrive/Capstone_Project/model_performance_MS2.csv')


df.head()


del df['Unnamed: 6']


df=df.assign(Model5=m5)


df.to_csv('/content/drive/MyDrive/Capstone_Project/app_model_performance_MS2.csv')


df = pd.read_csv('/content/drive/MyDrive/Capstone_Project/app_model_performance_MS2.csv')


epochs=df.loc[0].values.tolist()
del epochs[0:2]

FN=df.loc[6].values.tolist()
del FN[0:2]


headers=['Model Base','Model 1', 'Model 2', 'Model 3', 'Model 4 ', 'Model 5']


plt.figure(figsize=(8,6))
plt.scatter(epochs,FN,s=100,color="red")
plt.xlabel("Epochs Executed")
plt.ylabel("False Negatives")
plt.title("Epochs Executed vs False Negatives",fontsize=15)
for i, label in enumerate(headers):
    plt.annotate(label, (epochs[i], FN[i]))

plt.show()

		Model Base	Model 1	Model 2	Model 3	Model 4	Unnamed: 6
0	Epochs Executed	9.0000	6.0000	3.0000	13.0000	5.0000	NaN
1	Test Accuracy	0.9788	0.9823	0.9815	0.9835	0.9419	NaN
2	Precision	0.9789	0.9824	0.9816	0.9835	0.9428	NaN
3	Recall	0.9788	0.9823	0.9815	0.9835	0.9419	NaN
4	F1-Score	0.9788	0.9823	0.9815	0.9835	0.9419	NaN

Milestone 1

Problem Definition¶

Data Description ¶

Important Notes¶

Mounting the Drive

Loading libraries¶

Let us load the data¶

Checking the shape of train and test images

Checking the shape of train and test labels

Check the minimum and maximum range of pixel values for train and test images

Observations and insights: The pixels for both the train and test images range from 0 to 255.

Count the number of values in both uninfected and parasitized

Normalize the images

Observations and insights: The number of parasitized versus uninfected are fairly evenly distributed in the train set and perfectly split in the test set. The images were normalized by dividing by 255, the maximum number of pixels of the images.

Plot to check if the data is balanced

Data Exploration¶

Similarly visualize the images with subplot(6, 6) and figsize = (12, 12)

Observations and insights: We can see that our observations regarding the magenta spots, lavender uninfected, light pink parasitized, and overall round cells still hold for these 36 images.

Plotting the mean images for parasitized and uninfected

Observations and insights: Further confirmation that parasitized cells are more pink, uninfected are lavender and overall the blood cells are round.

Converting RGB to HSV of Images using OpenCV

Converting the train data

Converting the test data

Observations and insights: By applying the hue saturation the parasitized spots are more easily visible in bright yellow.

Processing Images using Gaussian Blurring

Gaussian Blurring on train data

Gaussian Blurring on test data

Observations and insights: Gaussian blur effectively blurs the whole image making it easier to identify by the different colours rather than the shape.¶

Proposed approach¶

Milestone 2

Mounting the Drive

Loading libraries¶

Let us load the data¶

Normalize the images

One Hot Encoding on the train and test labels

Base Model¶

Importing the required libraries for building and training our Model

Building the model

Compiling the model

Evaluating the model on test data

Model 1

Trying to improve the performance of our model by adding new layers

Building the Model

Compiling the model

Evaluating the model

Think about it:

Model 2 with Batch Normalization

Building the Model

Compiling the model

Evaluating the model

Observations and insights: Train and validation accuracy converge at 98%. Validation accuracy starts off much higher suggesting model2 with LeakyReLU has better initial performance on smaller sets.

Think About It :

Model 3 with Data Augmentation

Using image data generator

Think About It :

Visualizing Augmented images

Observations and insights: The sites of the parasites get distorted; either by losing a defined spot boundary or by becoming vague magenta regions.

Building the Model

Evaluating the model

Pre-trained model (VGG16)¶

Compiling the model

Observations and insights: The pre-trained model works much better on the validation set than the train set. This is indicates that the model is optimized for smaller sets.¶

Evaluating the model

Think about it:¶

Observations and Conclusions drawn from the final model: _

Insights¶

Refined insights:¶

Comparison of various techniques and their relative performance:¶

Proposal for the final solution design:¶

Malaria Detection HSV and Model 3

Mounting the Drive

Loading libraries¶

Let us load the data¶

Normalize the images

Converting RGB to HSV of Images using OpenCV

Converting the train data

Converting the test data

Model 5 with Data Augmentation and HSV

One Hot Encoding on the train and test labels

Importing the required libraries for building and training our Model