# pdf-segmented: Generate PDFs using separate compression for foreground and background # Copyright (C) 2025 Lee Yingtong Li # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from ..compression import CompressedLayer, CompressedPage from ..compression.jbig2 import JBIG2Layer from ..compression.jpeg import JPEGLayer from ..input import InputPages from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream from typing import Generator def pdf_write_pages( input_pages: InputPages, compressed_pages: Generator[CompressedPage], output_file: str ) -> None: # Get size of image in PostScript points width_pt = input_pages.width / input_pages.dpi * 72 height_pt = input_pages.height / input_pages.dpi * 72 # Build PDF pdf = Pdf.new() # Write each page for compressed_page in compressed_pages: page = pdf.add_blank_page(page_size=(width_pt, height_pt)) # Write each layer to the page content_instructions = [] pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions) pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions) # Generate content stream wrapped_instructions = [ ContentStreamInstruction([], Operator('q')), ContentStreamInstruction([width_pt, 0, 0, height_pt, 0, 0], Operator('cm')) ] + content_instructions + [ ContentStreamInstruction([], Operator('Q')), ] content_stream = unparse_content_stream(wrapped_instructions) page.Contents.write(content_stream) # Save PDF pdf.save(output_file) def pdf_write_layer( input_pages: InputPages, pdf: Pdf, page: Page, layer: CompressedLayer, content_instructions, ) -> None: # Write the layer to PDF if isinstance(layer, JBIG2Layer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceGray, Filter=Name.JBIG2Decode, BitsPerComponent=1, Mask=[1, 1] # Layer mask ) elif isinstance(layer, JPEGLayer): pdf_write_image( input_pages=input_pages, pdf=pdf, page=page, value=layer.data, content_instructions=content_instructions, ColorSpace=Name.DeviceRGB, Filter=Name.DCTDecode, BitsPerComponent=8 ) else: raise NotImplementedError() def pdf_write_image( input_pages: InputPages, pdf: Pdf, page: Page, value: bytes, content_instructions, **kwargs ) -> None: # Write the layer as an Image # Insert the Image as an XObject resource xobj = Stream( pdf, value, Type=Name.XObject, Subtype=Name.Image, Width=input_pages.width, Height=input_pages.height, **kwargs ) xobj_name = page.add_resource(xobj, '/XObject') # Add render instruction to the content stream content_instructions.append( ContentStreamInstruction([xobj_name], Operator('Do')) )