what im trying to do is to apply a model that analyses dicom images with segmentations, for the segmentations i simply used itk to "paint" a certain zone of the dicom images (512x512x84) when i try to use metrics to see how good the model is trained y get this error RuntimeError: Predictions and targets are expected to have the same shape, but got torch.Size([1, 84]) and torch.Size([1, 1, 128, 128, 84]). i have tried resizing the labels but it doesnt work , plus from what i read in pytorch metrics, the tensors must have the same size, i just stared last week using MONAI and ill be very grateful if someone can guide me to resolve the issue, thanks in advance Code:
import logging
import os
import monai
import sys
from glob import glob
import numpy as np
import torch
def main():
monai.config.print_config()
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
data_dir="C:/Users/ACER/Desktop/POSGRADO VIU/Prácticas/Datasets/archive/files/aneurismamonai"
#se crean las rutas de cada carpeta con las imagenes
train_images = sorted(glob(os.path.join(data_dir,"TrainData","*.dcm")))
train_labels = sorted(glob(os.path.join(data_dir,"TrainLabel","*.dcm")))
val_images = sorted(glob(os.path.join(data_dir,"ValData","*.dcm")))
val_labels = sorted(glob(os.path.join(data_dir,"ValLabel","*.dcm")))
#creacion de diccionarios para las transformadas
train_files=[{"image":image,"label":label} for image,label in zip(train_images,train_labels)]
val_files=[{"image":image,"label":label} for image,label in zip(val_images,val_labels)]
#train files es una lista print(type(train_files))
#print(train_files.shape)
#print("fddf",len(train_files))
train_transforms = Compose(
[
LoadImaged(keys=["image","label"], ensure_channel_first=True,),
RandRotate90d(keys=["image","label"], prob=0.5, spatial_axes=[0, 1]),
Resized(keys=["image","label"], spatial_size=(128, 128, 84)),
]
)
val_transforms = Compose(
[
LoadImaged(keys=["image","label"], ensure_channel_first=True),
ScaleIntensityd(keys=["image","label"]),
Resized(keys=["image","label"], spatial_size=(128, 128, 84)),
#ToTensord(keys=["image","label"]),
]
)
#post_pred = Compose([Activations(softmax=True)])
#post_label = Compose([AsDiscrete(to_onehot=2)])
import cv2
train_ds=Dataset(data=train_files,transform=train_transforms)
train_loader = DataLoader(train_ds,batch_size=1,num_workers=1,pin_memory=torch.cuda.is_available())#batch se refiere a la canitad de imagenes a usar en las iteraciones
first_batch = next(iter(train_loader))
# Obtener el primer dato del primer lote
primer_dato = first_batch["image"]
print("Forma del primer dato:", primer_dato.shape)
test_patient = first(train_loader)
#print("dsfs",train_loader["image"].shape,test_patient["label"].shape,type(test_patient["image"]),type(test_patient["label"]))
val_ds=Dataset(data=val_files,transform=val_transforms)
val_loader =DataLoader(val_ds,batch_size=1)
print(test_patient["label"].shape)
#for idx, sample in enumerate(train_loader):
# print(f"Elemento {idx + 1}: {sample['image'].shape}")
# print(type(sample['image']))
#EL PROBLEMA ESTA EN LA DIMENSION DE LAS ETIQUETAS,torch.Size([1, 1, 96, 96, 96]) torch.Size([1, 1, 512, 512, 1])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DenseNet121(spatial_dims=3, in_channels=1, out_channels=84).to(device)
#loss_function = torch.nn.CrossEntropyLoss()#ni idea de esto
#untimeError: 0D or 1D target tensor expected, multi-target not supported
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), 1e-5)
#auc_metric = ROCAUCMetric()
#entrenamiento de pytorch MODLEO DYSNET121
val_interval = 2
best_metric = -1
best_metric_epoch = -1
epoch_loss_values = []
metric_values = []
writer = SummaryWriter()
for epoch in range(1):
print("-" * 10)
print(f"epoch {epoch + 1}/{2}")
model.train()
epoch_loss = 0
step = 0
for batch_data in train_loader:
step += 1
inputs,labels = (batch_data["image"].to(device),batch_data["label"].to(device))
#print(inputs.shape,labels.shape)
print("inputs",inputs.shape)
optimizer.zero_grad()
outputs = model(inputs)
print("outputs",outputs.shape)
loss = loss_function(outputs, labels)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
#outputs2= F.interpolate(outputs, size=labels.shape[2:], mode='trilinear', align_corners=True)
#labels2 = labels.view(1,1)
labels=labels.long()
accuracy=Accuracy(task="Binary")
accuracy(outputs,labels)
epoch_loss_values.append(epoch_loss)#evaluacion atraves de la perdida
epoch_len = len(train_ds) // train_loader.batch_size
print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}")
writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step)
epoch_loss /= step
print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}")
#epoc loss indica cuanta informaicon se pierde al momento de aprednder, pero no es 100% confiable
#validacion DEL MODELO
if (epoch + 1) % val_interval == 0:
model.eval()
num_correct = 0.0
metric_count = 0
for val_data in val_loader:
val_images, val_labels = val_data["image"].to(device), val_data["label"].to(device)
with torch.no_grad():
val_outputs = model(val_images)
value = torch.eq(val_outputs.argmax(dim=1), val_labels.argmax(dim=1))
metric_count += len(value)
num_correct += value.sum().item()
#calculo de metricas la imagen puedo o no tener ojos, matriz de confusion
metric = num_correct / metric_count
metric_values.append(metric)
if metric > best_metric:
best_metric = metric
best_metric_epoch = epoch + 1
torch.save(model.state_dict(), "best_metric_model_classification3d_array.pth")
print("saved new best metric model")
print(f"Current epoch: {epoch+1} current accuracy: {metric:.4f} ")
print(f"Best accuracy: {best_metric:.4f} at epoch {best_metric_epoch}")
writer.add_scalar("val_accuracy", metric, epoch + 1)
print(f"Training completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}")
valx=[i + 1 for i in range (len(epoch_loss_values))]
valy= epoch_loss_values
plt.xlabel("Epoch")
plt.plot(valx,valy)
plt.show()
writer.close()
if __name__ == "__main__":
main()