I'm making agents that play Gomoku using supervised learning and reinfocement learning. The problem occured at supervised learning. The accuracy and loss stop increasing/decreasing. Loss Graph Accuracy Graph. Pink is Adam, and Yellow is SGD
My model and source code:
def InYeongGoModel(input_shape, is_policy_net = True):
model = Sequential()
model.add(Conv2D(64, (7, 7), input_shape=input_shape, padding='same', activation='relu', data_format='channels_first'))
model.add(BatchNormalizationV2())
model.add(Conv2D(64, (7, 7), input_shape=input_shape, padding='same', activation='relu', data_format='channels_first'))
model.add(BatchNormalizationV2())
model.add(Dropout(0.2))
model.add(Conv2D(128, (5, 5), input_shape=input_shape, padding='same', activation='relu', data_format='channels_first'))
model.add(BatchNormalizationV2())
model.add(Conv2D(128, (5, 5), input_shape=input_shape, padding='same', activation='relu', data_format='channels_first'))
model.add(BatchNormalizationV2())
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), input_shape=input_shape, padding='same', activation='relu', data_format='channels_first'))
model.add(BatchNormalizationV2())
model.add(Conv2D(64, (3, 3), input_shape=input_shape, padding='same', activation='relu', data_format='channels_first'))
model.add(BatchNormalizationV2())
model.add(Dropout(0.2))
if is_policy_net:
model.add(Conv2D(filters=1, kernel_size=1, padding='same', data_format='channels_first', activation='softmax'))
model.add(Flatten())
return model
def data_generator(self, states, actions, rewards, batch_size):
n = states.shape[0]
num_moves = len(actions)
indices = np.arange(n)
while True:
np.random.shuffle(indices)
for start_idx in range(0, n, batch_size):
end_idx = min(start_idx + batch_size, n)
batch_indices = indices[start_idx:end_idx]
batch_states = states[batch_indices]
batch_actions = actions[batch_indices]
batch_rewards = rewards[batch_indices]
y = np.zeros((batch_indices.shape[0], num_moves))
for i, action in enumerate(batch_actions):
reward = batch_rewards[i]
y[i][action] = reward
yield batch_states, y
def train(self, experience, lr=0.00001, clipnorm=1.0, batch_size:int=256, epochs:int = 1):
opt = SGD(learning_rate=lr, clipnorm=clipnorm)
#opt = Adam()
self.model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
n = experience.states.shape[0]
#current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = TensorBoard(log_dir=f"./logs_{current_time}")
checkpoint_callback = ModelCheckpoint(
filepath=f"{self.encoder.name()}_alphago_sl_checkpoint_{{epoch:02d}}",
save_best_only=False,
period=10 # Save model every 10 epochs
)
generator = self.data_generator(experience.states, experience.actions, experience.rewards, batch_size)
self.model.fit(
generator,
steps_per_epoch=n // batch_size,
epochs=epochs,
#callbacks=[tensorboard_callback, checkpoint_callback]
)
I used Adam and SGD with learning rates of 0.01, 0.001, 1e-4. But every lr didn't solved the problem. Datasets has length of 1280000, and the batch size was varied from 256 to 512. The total epochs were 100 and learning time was about 4 hours(2.4 minutes/epoch). Is there a solution, or more faster training method?