# pdf-segmented: Generate PDFs using separate compression for foreground and background # Copyright (C) 2025 Lee Yingtong Li # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from ..compression import CompressedLayer, CompressedPage from ..compression.jbig2 import JBIG2Layer from ..compression.jp2 import JP2Layer from ..compression.jpeg import JPEGLayer from ..compression.png import PNGLayer from ..input import InputPages from pikepdf import ContentStreamInstruction, Dictionary, Name, Operator, Page, Pdf, Stream, unparse_content_stream from typing import Generator def pdf_write_pages( input_pages: InputPages, compressed_pages: Generator[CompressedPage], output_file: str ) -> None: # Get size of image in PostScript points width_pt = input_pages.width / input_pages.dpi * 72 height_pt = input_pages.height / input_pages.dpi * 72 # Build PDF pdf = Pdf.new() # Write each page for compressed_page in compressed_pages: page = pdf.add_blank_page(page_size=(width_pt, height_pt)) # Write each layer to the page content_instructions = [] pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions) pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions) # Generate content stream wrapped_instructions = [ ContentStreamInstruction([], Operator('q')), ContentStreamInstruction([width_pt, 0, 0, height_pt, 0, 0], Operator('cm')) ] + content_instructions + [ ContentStreamInstruction([], Operator('Q')), ] content_stream = unparse_content_stream(wrapped_instructions) page.Contents.write(content_stream) # Save PDF pdf.save(output_file) def pdf_write_layer( input_pages: InputPages, pdf: Pdf, page: Page, layer: CompressedLayer, is_foreground: bool, content_instructions, ) -> None: # Write the layer to PDF if isinstance(layer, JBIG2Layer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceGray, Filter=Name.JBIG2Decode, BitsPerComponent=1, Mask=[1, 1] # Layer mask ) elif isinstance(layer, JP2Layer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceRGB, Filter=Name.JPXDecode, BitsPerComponent=8 ) elif isinstance(layer, JPEGLayer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceRGB, Filter=Name.DCTDecode, BitsPerComponent=8 ) elif isinstance(layer, PNGLayer): if is_foreground: # See PDF 1.7 section 7.4.4.3 # See also the implementation in img2pdf pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.get_flate_data(), content_instructions=content_instructions, ColorSpace=Name.DeviceGray, Filter=Name.FlateDecode, BitsPerComponent=1, Mask=[1, 1], # Layer mask DecodeParms=Dictionary( Predictor=15, # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file BitsPerComponent=1, # Default is 8 so must set this here Columns=input_pages.width ) ) else: pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.get_flate_data(), content_instructions=content_instructions, ColorSpace=Name.DeviceRGB, Filter=Name.FlateDecode, BitsPerComponent=8, DecodeParms=Dictionary( Predictor=15, Colors=3, # Default is 1 so must set this here Columns=input_pages.width ) ) else: raise NotImplementedError() def pdf_write_image( input_pages: InputPages, pdf: Pdf, page: Page, value: bytes, content_instructions, **kwargs ) -> None: # Write the layer as an Image # Insert the Image as an XObject resource xobj = Stream( pdf, value, Type=Name.XObject, Subtype=Name.Image, Width=input_pages.width, Height=input_pages.height, **kwargs ) xobj_name = page.add_resource(xobj, '/XObject') # Add render instruction to the content stream content_instructions.append( ContentStreamInstruction([xobj_name], Operator('Do')) )