I have code in python which is finding and replacing a image in pdfs, but I'm having hard time adjusting the size of the new image so what the code is doing is finding the old image and it's using the the size and position of it to apply the new image. I want to be able to modify the size of the new image to whatever height of width I want, but still place it on the same spot where the old one was. The goal is to replace a logo on multiple pdfs at the same time. Thanks for suggestions.
from pikepdf import Pdf, PdfImage, Name
from PIL import Image
import zlib
import os
# Path to the folder containing the input PDF files
input_folder = r'C:\input folder'
# Path to the folder where the modified PDF files will be saved
output_folder = r'C:\output folder'
# Path to the replacement image
replacement_image_path = r'C:\new image to replace'
def replace_images_in_pdf(input_pdf_path, output_pdf_path, image_path):
pdf = Pdf.open(input_pdf_path, allow_overwriting_input=True)
replacement_image = Image.open(image_path)
image_replaced = False # Track if an image has been replaced
for page in pdf.pages:
if image_replaced: # If an image has already been replaced, stop processing further pages
break
for image_key in list(page.images.keys()):
raw_image = page.images[image_key]
pdf_image = PdfImage(raw_image)
raw_image = pdf_image.obj
pillow_image = pdf_image.as_pil_image()
# Resize the replacement image to match the original image's dimensions
replacement_image_resized = replacement_image.resize((pillow_image.width, pillow_image.height))
# Replace the original image
raw_image.write(zlib.compress(replacement_image_resized.tobytes()), filter=Name("/FlateDecode"))
raw_image.ColorSpace = Name("/DeviceRGB")
raw_image.Width, raw_image.Height = pillow_image.width, pillow_image.height
image_replaced = True # Mark that an image has been replaced
break # Exit the loop after replacing the first image
pdf.save(output_pdf_path)
pdf.close()
def process_folder(input_folder, output_folder, replacement_image_path):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for input_file in os.listdir(input_folder):
if input_file.lower().endswith('.pdf'):
input_pdf_path = os.path.join(input_folder, input_file)
output_pdf_path = os.path.join(output_folder, input_file)
replace_images_in_pdf(input_pdf_path, output_pdf_path, replacement_image_path)
# Run the process
process_folder(input_folder, output_folder, replacement_image_path)
Taking KJ's idea to use redaction annotations for resizing the display area of the image with PyMuPDF: