Find, Replace and adjust image in PDF's using python

53 views Asked by At

I have code in python which is finding and replacing a image in pdfs, but I'm having hard time adjusting the size of the new image so what the code is doing is finding the old image and it's using the the size and position of it to apply the new image. I want to be able to modify the size of the new image to whatever height of width I want, but still place it on the same spot where the old one was. The goal is to replace a logo on multiple pdfs at the same time. Thanks for suggestions.

from pikepdf import Pdf, PdfImage, Name
from PIL import Image
import zlib
import os

# Path to the folder containing the input PDF files
input_folder = r'C:\input folder'
# Path to the folder where the modified PDF files will be saved
output_folder = r'C:\output folder'
# Path to the replacement image
replacement_image_path = r'C:\new image to replace'

def replace_images_in_pdf(input_pdf_path, output_pdf_path, image_path):
    pdf = Pdf.open(input_pdf_path, allow_overwriting_input=True)
    replacement_image = Image.open(image_path)
    image_replaced = False  # Track if an image has been replaced

    for page in pdf.pages:
        if image_replaced:  # If an image has already been replaced, stop processing further pages
            break
        for image_key in list(page.images.keys()):
            raw_image = page.images[image_key]
            pdf_image = PdfImage(raw_image)
            raw_image = pdf_image.obj
            pillow_image = pdf_image.as_pil_image()
            
            # Resize the replacement image to match the original image's dimensions
            replacement_image_resized = replacement_image.resize((pillow_image.width, pillow_image.height))
            
            # Replace the original image
            raw_image.write(zlib.compress(replacement_image_resized.tobytes()), filter=Name("/FlateDecode"))
            raw_image.ColorSpace = Name("/DeviceRGB")
            raw_image.Width, raw_image.Height = pillow_image.width, pillow_image.height
            
            image_replaced = True  # Mark that an image has been replaced
            break  # Exit the loop after replacing the first image
        
    pdf.save(output_pdf_path)
    pdf.close()

def process_folder(input_folder, output_folder, replacement_image_path):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for input_file in os.listdir(input_folder):
        if input_file.lower().endswith('.pdf'):
            input_pdf_path = os.path.join(input_folder, input_file)
            output_pdf_path = os.path.join(output_folder, input_file)
            replace_images_in_pdf(input_pdf_path, output_pdf_path, replacement_image_path)

# Run the process
process_folder(input_folder, output_folder, replacement_image_path)
1

There are 1 answers

1
Jorj McKie On BEST ANSWER

Taking KJ's idea to use redaction annotations for resizing the display area of the image with PyMuPDF:

# Assumption: we already know which image we want to deal with.
# For example:

import fitz # PyMuPDF
doc=fitz.open("input.pdf")
page=doc[0]

page.get_images()
[(3, 0, 1280, 720, 8, 'DeviceRGB', '', 'Img3', 'DCTDecode')]
# we have an image at xref 3.

# remove it and re-insert it in a different rectangle, 90° rotated.
img_bbox = page.get_image_rects(3)[0]  # old display location

img = doc.extract_image(3)  # extract image
page.add_redact_annot(img_bbox)  # mark image area for removal
'Redact' annotation on page 0 of landscape.pdf

# remove image
page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_REMOVE)

# re-insert in new rectangle 90° rotated anti-clockwise
page.insert_image((100,100,400,500), stream=img["image"], rotate=90)

# save at max compression to new file
doc.ez_save("output.pdf")