Why does the Confusion Matrix in Soft Voting vary across multiple iterations?

19 views Asked by At

Why does the Confusion Matrix in Soft Voting vary across multiple iterations? Does probability=True have an influence? How can the Confusion Matrix results be consistent in Soft Voting?

from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
import numpy as np
from sklearn.metrics import classification_report

X = dataset_baru.iloc[:, :-1].values
y = dataset_baru.iloc[:, -1].values

# Initialize SVM models with different kernels
svm_kernel1_soft = SVC(kernel='linear', probability=True)
svm_kernel2_soft = SVC(kernel='poly', probability=True)
svm_kernel3_soft = SVC(kernel='rbf', probability=True)

svm_kernel1_hard = SVC(kernel='linear')
svm_kernel2_hard = SVC(kernel='poly')
svm_kernel3_hard = SVC(kernel='rbf')

# Perform K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

confusion_matrices_hard = []
confusion_matrices_soft = []

for train_index, test_index in kfold.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Initialize VotingClassifier with hard voting
    voting_clf_hard = VotingClassifier(
        estimators=[('svm1', svm_kernel1_hard), ('svm2', svm_kernel2_hard), ('svm3', svm_kernel3_hard)],
        voting='hard'
    )

    # Fit the VotingClassifier with hard voting
    voting_clf_hard.fit(X_train, y_train)

    # Predict labels using hard voting
    y_pred_hard = voting_clf_hard.predict(X_test)



    # Initialize VotingClassifier with soft voting
    voting_clf_soft = VotingClassifier(
        estimators=[('svm1', svm_kernel1_soft), ('svm2', svm_kernel2_soft), ('svm3', svm_kernel3_soft)],
        voting='soft'
    )

    # Fit the VotingClassifier with soft voting
    voting_clf_soft.fit(X_train, y_train)

    # Predict labels using soft voting
    y_pred_soft = voting_clf_soft.predict(X_test)

      # Calculate confusion matrices for hard voting
    confusion_matrices_hard.append(confusion_matrix(y_test, y_pred_hard))

    # Calculate confusion matrices for soft voting
    confusion_matrices_soft.append(confusion_matrix(y_test, y_pred_soft))

# Print classification reports for hard voting
print("Classification Report - Hard Voting:")
for i, cm in enumerate(confusion_matrices_hard):
    print(f"Fold {i+1}:")
    print(classification_report(y_test, y_pred_hard))

# Print classification reports for soft voting
print("Classification Report - Soft Voting:")
for i, cm in enumerate(confusion_matrices_soft):
    print(f"Fold {i+1}:")
    print(classification_report(y_test, y_pred_soft))


# Mencetak laporan klasifikasi untuk hard voting
print("Classification Report - Hard Voting:")
print(classification_report(y_test, y_pred_hard))

# Mencetak laporan klasifikasi untuk soft voting
print("Classification Report - Soft Voting:")
print(classification_report(y_test, y_pred_soft))

When I tried to remove probability=True from SVM, an error occurred.

AttributeError: predict_proba is not available when probability=False

0

There are 0 answers