2025-05-08 19:27:05 +10:00

122 lines
3.5 KiB
Python

# pdf-segmented: Generate PDFs using separate compression for foreground and background
# Copyright (C) 2025 Lee Yingtong Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from ..compression import CompressedLayer, CompressedPage
from ..compression.jbig2 import JBIG2Layer
from ..compression.jpeg import JPEGLayer
from ..input import InputPages
from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream
from typing import Generator
def pdf_write_pages(
input_pages: InputPages,
compressed_pages: Generator[CompressedPage],
output_file: str
) -> None:
# Get size of image in PostScript points
width_pt = input_pages.width / input_pages.dpi * 72
height_pt = input_pages.height / input_pages.dpi * 72
# Build PDF
pdf = Pdf.new()
# Write each page
for compressed_page in compressed_pages:
page = pdf.add_blank_page(page_size=(width_pt, height_pt))
# Write each layer to the page
content_instructions = []
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions)
# Generate content stream
wrapped_instructions = [
ContentStreamInstruction([], Operator('q')),
ContentStreamInstruction([width_pt, 0, 0, height_pt, 0, 0], Operator('cm'))
] + content_instructions + [
ContentStreamInstruction([], Operator('Q')),
]
content_stream = unparse_content_stream(wrapped_instructions)
page.Contents.write(content_stream)
# Save PDF
pdf.save(output_file)
def pdf_write_layer(
input_pages: InputPages,
pdf: Pdf,
page: Page,
layer: CompressedLayer,
content_instructions,
) -> None:
# Write the layer to PDF
if isinstance(layer, JBIG2Layer):
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.data,
content_instructions=content_instructions,
ColorSpace=Name.DeviceGray,
Filter=Name.JBIG2Decode,
BitsPerComponent=1,
Mask=[1, 1] # Layer mask
)
elif isinstance(layer, JPEGLayer):
pdf_write_image(
input_pages=input_pages,
pdf=pdf,
page=page,
value=layer.data,
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
Filter=Name.DCTDecode,
BitsPerComponent=8
)
else:
raise NotImplementedError()
def pdf_write_image(
input_pages: InputPages,
pdf: Pdf,
page: Page,
value: bytes,
content_instructions,
**kwargs
) -> None:
# Write the layer as an Image
# Insert the Image as an XObject resource
xobj = Stream(
pdf,
value,
Type=Name.XObject,
Subtype=Name.Image,
Width=input_pages.width,
Height=input_pages.height,
**kwargs
)
xobj_name = page.add_resource(xobj, '/XObject')
# Add render instruction to the content stream
content_instructions.append(
ContentStreamInstruction([xobj_name], Operator('Do'))
)