i am new to this field and i am trying to make an alogrithm using triplet loss and siamese network to make a face recognition and the problem is that the loss value does not decrease lower than the margin of the triplet loss i've tried 4 networks that may solve the problem and i 've tried resnet50 network and i had the same issue tried to change the learning rate to lots of values and the issue is the same tried regulizations like dropout and did not change anything the loss value does not decrease and when i try to get the L2 distance between any 2 images the distance between are almost 0 ,i've tried this dataset to get the images https://www.kaggle.com/datasets/stoicstatic/face-recognition-dataset and i ran this code that i made read the images then i ran second code to transform the dataset to make me the anchors and positive and negatives this first code to read the images from the harddisk
#dataset_path is the full path to the dataset in my local hard drive
#iterations is a number of different people in the dataset
iterations= len(os.listdir(dataset_path))
for main_path in range(iterations):
#then i store the current path to use it later when i try to access individual image to read it
current_path = os.path.join(dataset_path,str(main_path))
#each person in the main path has a 72 different/positive images
for sub_path in range(len(os.listdir(current_path))):
full_img_path = os.path.join(current_path,str(sub_path))
#after getting the full path of the image i read it using cv2
img = cv2.imread(full_img_path+".png")
#some images in my previus dataset dimensions were wrong so i used this if statement to make sure that all of the dataset images are in the same dims
if img.shape ==(112,112,3):
rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
resized_im = cv2.resize(rgb_image,(round(128/2),round(128/2)) )
lst_imgs.append(resized_im)
imgs[main_path] = lst_imgs.copy()
lst_imgs.clear()
after storing the images in dictionary called imgs where each key is the main path and the value is a list of 72 images of the same person
i used this code to tranform to code to anchors , positives and negatives
anchors = []
positives = []
negatives = []
for main , sub in imgs.items():
# for each key/person in the imgs dict i get the positive images and suffile them
choices = sub.copy()
random.shuffle(choices)
for choice in choices:
#then i append the first iamge as an anchor for the other 72 positive images
anchors.append(sub[0])
#and for each one of the 72 suffled choices i append the suffled one as a positive image
positives.append(choice)
#to get the negative image i get a copy of the dataset
negative__images = imgs.copy()
#then i exclude the current person because it would be positive of i used the current person as negative image for that person
del negative__images[main]
#then chaining all of the images as 1 list with len = (72 * len(imgs) - 72) then suffle them and chooce a random choice of them
neg_choices = list(itertools.chain(*(value for value in negative__images.values())))
rand_negtaives_choices = random.choice(neg_choices)
negatives.append(rand_negtaives_choices)
then i converted the lists to numpy arrays
anchors=np.array(anchors)
positives=np.array(positives)
negatives=np.array(negatives)
this is the triplet loss that i used in all of the tests and i tried to change it and get it from someone else's implementation and i still face the same issue
def triplet_loss(y_true, y_pred, alpha=0.2):
anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]
pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=-1)
neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=-1)
loss = tf.maximum(pos_dist - neg_dist + alpha, 0.0)
return tf.reduce_mean(loss)
this is the model that i used
input_shape = (anchors.shape[1],anchors.shape[2],3)
input_anchor = Input(shape=input_shape)
input_positive = Input(shape=input_shape)
input_negative = Input(shape=input_shape)
def create_siamese_network(input_shape):
input_image = Input(shape=input_shape)
x = Conv2D(64, (3, 3), activation='relu')(input_image)
x = MaxPooling2D()(x)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = MaxPooling2D()(x)
x = Flatten()(x)
x = Dense(4069, activation='relu')(x)
x = Dense(2048, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
x = Dense(265, activation='relu')(x)
output = Dense(128)(x)
return Model(inputs=input_image, outputs=output)
siamese_network = create_siamese_network(input_shape)
embedding_anchor = siamese_network(input_anchor)
embedding_positive = siamese_network(input_positive)
embedding_negative = siamese_network(input_negative)
output = tf.keras.layers.concatenate([embedding_anchor, embedding_positive, embedding_negative], axis=1)
i've tried to change the model and change the number of the output layer neurons (the embedding) and i the issue did not get solved
siamese_model_2.compile(optimizer=Adam(learning_rate=0.00001), loss=triplet_loss,)
labels = np.zeros((6103,))
siamese_model_2.fit([anchors,positives,negatives],y=labels,epochs=250,batch_size = 40)
any solution please to this issue ? ...
i've tried a more complex model like this in other tries
def create_siamese_network(input_shape):
inputs = tf.keras.Input(shape=input_shape)
x = tf.keras.layers.Conv2D(64, (10, 10),padding='same', activation='relu')(inputs)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(128, (7, 7),padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(128, (4, 4),padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D((2, 2))(x)
x = Dropout(0.3)(x)
x = tf.keras.layers.Conv2D(256, (4, 4),padding='same', activation='relu')(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(4096, activation='relu')(x)
# embedding = tf.keras.layers.Dense(1024,activation='sigmoid')(x)
embedding = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(x) # L2 normalize embeddings
model = tf.keras.Model(inputs=inputs, outputs=embedding)
return model
siamese_network = create_siamese_network(input_shape)
i've tried this other complex network and the issue is still the same
model = tf.keras.applications.ResNet50(weights='imagenet',include_top=False)
for layer in model.layers[120:]:
layer.trainable = True
new_model_1 = tf.keras.Sequential([
model,
Dense(4069,activation='relu'),
Dense(2048,activation='relu'),
Dense(1024,activation='relu'),
Dense(512,activation='relu'),
Dense(256,activation='relu'),
Dense(128,activation='linear')])
input_shape = (anchors.shape[1],anchors.shape[2],3)
input_anchor = Input(shape=input_shape)
input_positive = Input(shape=input_shape)
input_negative = Input(shape=input_shape)
embedding_anchor = new_model_1(input_anchor)
embedding_positive = new_model_1(input_positive)
embedding_negative = new_model_1(input_negative)
output = tf.keras.layers.concatenate([embedding_anchor, embedding_positive, embedding_negative], axis=1)
new_model_1 = Model(inputs=[input_anchor, input_positive, input_negative], outputs=output)
new_model_1.compile(optimizer=Adam(learning_rate=0.000002), loss=triplet_loss,)
labels = np.zeros((8200,))
new_model_1.fit([anchors,positives,negatives],y=labels,epochs=5,batch_size = 30)