I'm working on creating a dataset for Yolo model training. Currently the frames and annotation files are present for multiple videos in their separate folders.
Here's the code I wrote to split the images and annotation file from the different folders.
The images to be added in test/val folders are selected randomly and moved.
I've tried various copy methods (including copy2, copyfileobj from shutils etc) but only a few images are copied properly, the rest are empty files with size 38kb.
#Assume other libraries are imported
for video_folder in video_folders:
os.chdir(os.path.join(video_folder, "obj_train_data"))
print(f"Currently in {os.getcwd()}")
sample_files = os.listdir("./")
for image_file in sample_files:
label_file = os.path.join(str(image_file[:-3] + "txt"))
print(f"Image file name = {image_file}")
print(f"label file name = {label_file}")
#Generate full path.
image_file = os.path.join(os.getcwd(), image_file)
label_file = os.path.join(os.getcwd(), label_file)
assert(os.path.exists(image_file))
assert(os.path.exists(label_file))
#rename the iamge and label file to avoid overwriting
new_image_file_name = f"frame_{sample_moved}.PNG"
new_label_file_name = f"frame_{sample_moved}.txt"
# Copy the background file
if not (os.path.exists(label_file)):
os.system(f"cp {image_file} {os.path.join(__IMAGE_TRAIN_FOLDER__,new_image_file_name)}")
continue
if(sample_moved == random_indices[0]):
try:
random_indices.pop(0)
except IndexError as e:
print("array is empty!!")
#Move file to test folder
if(test_counter < max_test_samples_count):
print("moving to test folder")
os.system(f"cp {image_file} {os.path.join(__IMAGE_TEST_FOLDER__,new_image_file_name)}")
os.system(f"cp {label_file} {os.path.join(__LABEL_TEST_FOLDER__,new_label_file_name)}")
sleep(0.1)
test_counter+=1
#Move file to validation counter
elif(val_counter < max_val_samples_count):
print("moving to validation folder")
os.system(f"cp {image_file} {os.path.join(__IMAGE_VAL_FOLDER__,new_image_file_name)}")
os.system(f"cp {label_file} {os.path.join(__LABEL_VAL_FOLDER__,new_label_file_name)}")
sleep(0.1)
val_counter+=1
else:
print("Moving to training folder")
os.system(f"cp {image_file} {os.path.join(__IMAGE_TRAIN_FOLDER__,new_image_file_name)}")
os.system(f"cp {label_file} {os.path.join(__LABEL_TRAIN_FOLDER__,new_label_file_name)}")
sleep(0.1)
sample_moved+=1
# Move to base directory.
os.chdir("../../")
print(f"Total samples moved = {sample_moved}")
Following is the dir strucure:
-Images
-Labels
-video_1
-obj_train_data
-video_2
--obj_train_data
-video_3
--obj_train_data
I utilized the following methods:
os.system("cp src dest")shutil.copy(" src dest")
What I expect:
- All the images and respective annotation files are copied in respective folders.
What is happening:
- Only some images are copied that are proper in size and can be viewed with image viewer applications. Most of the image files are only 38 kb in size and are empty.
The mistake in code is highlighted here:
This generates a list of "all" the files in the directory, which also includes
.txtfiles. These files are then copied as image files later on in code.How to prevent this from happening:
create a list of images only using:
This will ensure that loop only runs on images and not on
.txtfiles.