Multi-class macro-average ROC plots not looking good

27 views Asked by At

I am trying to plot macro-average for multiclass data. However, the plots do not start from (0,0). I am quite sure that, the code works well since when i plot micro-average ROC curves this problem is not occurring. I attach an example of the two plots micro and macro average for the same dataset, and classifier (Random Forest Classifier). The three curves compared are baseline and two oversampling methods applied to the same dataset. I notice, that this problem occurs more frequently with RFC (Random Forests), whereas less with NB and not occurring with DT.

macro enter image description here micro enter image description here

The code that i use for multi-class dataset, in plotting classifier performance on 3 different oversampled datasets d_probabiities is a dictionary of models and probabiliies.

def result_render_multiclass(d_probabilities,d_accuracies,y_test,title_set,model):
  table_multi_micro = pd.DataFrame(columns = ['Classifier','fpr','tpr','auc'])
  table_multi_macro = pd.DataFrame(columns = ['Classifier','fpr','tpr','auc'])
  imcp_scores, auc_micro,auc_macro, accuracy= [], [], [],[]
  list_metrics = []

  n_classes = len(np.unique(y_test))
  y_test_binarize = label_binarize(y_test, classes=np.arange(n_classes))
  scores = {}

  for model_name, model_proba in d_probabilities.items():  #iterating over 3 probabilities of 3 models
    y_pred = model_proba
    scores[model_name] = model_proba

    fpr ,tpr ,roc_auc ,thresholds = dict(), dict(), dict() ,dict() 
    # micro-average
    for i in range(n_classes):
      fpr[i], tpr[i], thresholds[i] = roc_curve(y_test_binarize[:, i], y_pred[:, i], drop_intermediate=False)
      roc_auc[i] = metrics.auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test_binarize.ravel(), y_pred.ravel())
    roc_auc["micro"] = metrics.auc(fpr["micro"], tpr["micro"])
    #aggregates all false positive rates

    #all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    fpr_grid = np.linspace(0.0, 1.0, 1000)

    # Then interpolate all ROC curves at this points
    #mean_tpr = np.zeros_like(all_fpr)
    mean_tpr = np.zeros_like(fpr_grid)
    for i in range(n_classes):
      mean_tpr += np.interp(fpr_grid, fpr[i], tpr[i])
    # Finally average it and compute AUC
    mean_tpr /= n_classes
    fpr["macro"] = fpr_grid
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = metrics.auc(fpr["macro"], tpr["macro"])

    # storing average-micro fpr, tpr, auc 
    row_micro = {'Classifier': model_name, 'fpr': fpr['micro'],'tpr':tpr['micro'],'auc':roc_auc['micro']}
    table_multi_micro.loc[len(table_multi_micro)] = row_micro

    # storing average-macro fpr, tpr, auc 
    row_macro = {'Classifier': model_name,'fpr':fpr['macro'],'tpr':tpr['macro'],'auc':roc_auc['macro']}
    table_multi_macro.loc[len(table_multi_macro)] = row_macro

    #appending AUC(ROC) for micro and macro average
    auc_micro.append(roc_auc_score(y_test, y_pred, multi_class='ovr',average = 'micro' ))
    auc_macro.append(roc_auc_score(y_test, y_pred, multi_class='ovr',average = 'macro' ))
    #appending aimcp for (raw,smote,kde)

  for acc in d_accuracies.values():  #appending average accuracies (over 10)  for raw,smote,kde to list:  3 accuracies
      accuracy.append(acc)
  for acc_score,auc_micro,auc_macro, imcp_s in zip(accuracy,auc_micro,auc_macro,imcp_scores):  #creating list containing acc,auc,imcp for each method sequentially
      list_metrics.append(float(f'{acc_score:.3f}'))
      list_metrics.append(float(f'{auc_micro:.3f}'))
      list_metrics.append(float(f'{auc_macro:.3f}')) #auc micro  #inserted new auc !! macro

  return list_metrics, table_multi_macro, table_multi_micro




def multi_class_roc_save(title_set,table,model,save_folder,name = str()):
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    plt.figure(figsize=(8,6))
    table.set_index('Classifier', inplace = True)
    colors = ['navy','orange','green']
    for i,color in zip(table.index,colors):
      plt.plot(table.loc[i]['fpr'], 
            table.loc[i]['tpr'], 
            label="{}, AUC={:.3f}".format(i, table.loc[i]['auc']),color = color)
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel("Flase Positive Rate", fontsize=15)
    plt.ylabel("True Positive Rate", fontsize=15)
    plt.title('{}-average ROC curve  - {}'.format(name, title_set), fontweight='bold', fontsize=15)
    plt.legend(prop={'size':13}, loc='lower right')
    file_name_macro = os.path.join(save_folder, '{}_{}_{}'.format(title_set,model,name))
    plt.savefig(file_name_macro)
    plt.close()

I was expecting to for macro-average ROC curves to start from 0.0

An example with Naive Bayes This is with NB Macro enter image description here micro enter image description here

How can i fiw this problem?

0

There are 0 answers