I have a folder that contains a lot of images, a mixture of horizontal and vertical images. Im trying to filter out the images where it shows me images that look are horizontal and that have a lot of white space on the top and bottom of the images here are examples
while i have other images that look like this
all the images are same size 400x400 im trying to filter out the image based on the amount of white space on the top and bottom of the image.
here is what i have tried
from PIL import Image
import os
import shutil
def has_white_borders(image_path, threshold_top_bottom=0.2):
# Open the image
img = Image.open(image_path)
# Get image size
width, height = img.size
# Define the number of rows to consider for top and bottom
rows_to_check = int(height * 0.05) # Consider top and bottom 5% of the image
# Get the top and bottom rows
top_rows = img.crop((0, 0, width, rows_to_check))
bottom_rows = img.crop((0, height - rows_to_check, width, height))
# Calculate the ratio of white pixels in the top and bottom rows
top_white_pixels = sum(top_rows.convert("L").point(lambda p: p > 200 and 1 or 0).getdata())
bottom_white_pixels = sum(bottom_rows.convert("L").point(lambda p: p > 200 and 1 or 0).getdata())
top_ratio = top_white_pixels / (width * rows_to_check)
bottom_ratio = bottom_white_pixels / (width * rows_to_check)
# Check if the ratio of white pixels on top and bottom exceeds the threshold
return top_ratio > threshold_top_bottom and bottom_ratio > threshold_top_bottom
def find_images_with_white_borders(input_folder, output_folder):
# Get a list of all files in the input folder
files = os.listdir(input_folder)
# Filter out non-image files
image_files = [file for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'))]
# Check each image for white borders
images_with_white_borders = [img for img in image_files if has_white_borders(os.path.join(input_folder, img))]
# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
# Copy images with white borders to the output folder
for img_name in images_with_white_borders:
source_path = os.path.join(input_folder, img_name)
destination_path = os.path.join(output_folder, img_name)
shutil.copy2(source_path, destination_path)
if __name__ == "__main__":
# Specify the input folder containing your images
input_image_folder = r'folder/path'
# Specify the output folder for filtered images
output_image_folder = r'folder/path'
# Find images with more white borders on top and bottom and copy them to the output folder
find_images_with_white_borders(input_image_folder, output_image_folder)
print(f"Images with more white borders on top and bottom copied to: {output_image_folder}")
when i run the code it still shows me images with very little to no white boarder on top and bottom or it shows me images with white boarder on the sides.. i want to find the images that are horizontal like the first image.



You could check whether the first row and last row are both all white: