Im new in ML and I have a problem with my confusion matrix. Unfortunatelly, I have this error (The error occurs when generating the confusion matrix.):
dataandreferenceshould be factors with the same levels.
Here is my code:
library(caret)
library(fastAdaboost)
data <- read.csv('~/Desktop/test1.csv', sep = ";")
data1 <- subset(data,select=c(4,5,6,7,8,12,15,16))
set.seed(1234)
parts = createDataPartition(data1$Status.szkody, p = 0.7, list = F)
train = data1[parts, ]
test = data1[-parts, ]
model <- adaboost(Status.szkody ~., data = train,6)
a <- predict(model, train, type = "class")
train$Status.szkody = as.factor(train$Status.szkody)
confusionMatrix(a,train$Status.szkody, mode = "everything")
I see that "train$Status.szkody" has a level and an "a" not, but how to deal with it?
> str(a)
List of 5
$ formula:Class 'formula' language Status.szkody ~ .
.. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
$ votes : num [1:40845, 1:2] 1.14 1.77 1.59 1.35 1.77 ...
$ class : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ prob : num [1:40845, 1:2] 0.644 1 0.9 0.762 1 ...
$ error : num 0.234
> str(train$Status.szkody)
Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
> levels(a)
NULL
> levels(train$Status.szkody)
[1] "0" "1"
Moreover, I tried with "cvms::confusion_matrix(train$Status.szkody,a)", but there is an error - 'targets' and 'predictions' must have same length.
Any help would be greatly appreciated, because I do not know how to deal with it. Thanks in advance.
Edit1:
dput(head(data1,30))
structure(list(Miesiąc = c("styczeń", "luty", "styczeń", "styczeń",
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń",
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń",
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń",
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń",
"styczeń", "styczeń"), Kwartał = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Terminal = c("Katowice", "Legnica",
"Katowice", "Legnica", "Sosnowiec", "Wrocław", "Legnica", "Katowice",
"Katowice", "Legnica", "Gliwice", "Wrocław", "Wrocław", "Legnica",
"Wrocław", "Legnica", "Sosnowiec", "Wrocław", "Katowice", "Gliwice",
"Gliwice", "Gliwice", "Katowice", "Wrocław", "Legnica", "Legnica",
"Gliwice", "Legnica", "Katowice", "Legnica"), Towar = c("RTV",
"RTV", "Telefony", "AGD", "Komputery", "AGD małe", "AGD do zabudowy",
"Telefony", "RTV", "AGD małe", "AGD", "RTV", "Komputery", "AGD małe",
"RTV", "AGD do zabudowy", "RTV", "Komputery", "Telefony", "Komputery",
"RTV", "AGD małe", "AGD małe", "AGD", "Telefony", "Telefony",
"AGD małe", "AGD do zabudowy", "AGD do zabudowy", "AGD do zabudowy"
), Status.szkody = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L,
1L, 0L, 0L, 0L), Kraj = c("PL", "PL", "PL", "PL", "PL", "PL",
"PL", "PL", "PL", "DE", "DE", "DE", "DE", "PL", "PL", "PL", "PL",
"PL", "PL", "DE", "DE", "DE", "DE", "DE", "DE", "AT", "DE", "DE",
"AT", "DE"), Usługa = c("Express", "Express", "Express", "Express",
"Express", "Express", "Express", "Express", "Express", "Express",
"Express", "Express", "Express", "Express", "Express", "Express",
"Express", "Express", "Express", "Express", "Express", "Express",
"Express", "Express", "Express", "Express", "Express", "Express",
"Express", "Express"), Partner = c("Partner D", "Partner A",
"Partner D", "Partner A", "Partner C", "Partner D", "Partner D",
"Partner A", "Partner D", "Partner B", "Partner C", "Partner A",
"Partner C", "Partner B", "Partner D", "Partner B", "Partner D",
"Partner E", "Partner B", "Partner D", "Partner E", "Partner D",
"Partner E", "Partner B", "Partner D", "Partner D", "Partner C",
"Partner A", "Partner E", "Partner B")), row.names = c(NA, 30L
), class = "data.frame")
You should use
a$classwhich are your predictions of your model in a vector. You can use the following code:Created on 2022-07-23 by the reprex package (v2.0.1)