Support different-sized pages within same document

This commit is contained in:
RunasSudo 2025-10-22 14:53:09 +11:00
parent 6fbd9de2c9
commit 76e50c0d5e
Signed by: RunasSudo
GPG Key ID: 7234E476BF21C61A
6 changed files with 44 additions and 26 deletions

View File

@ -43,6 +43,8 @@ class CompressionOptions:
@dataclass
class CompressedPage:
width: int
height: int
fg: Optional[CompressedLayer]
bg: Optional[CompressedLayer]
@ -77,6 +79,8 @@ def compress_page(
# Compress foreground and background layers
return CompressedPage(
width=segmented_page.width,
height=segmented_page.height,
fg=compress_layer(
input_pages=input_pages,
layer=segmented_page.fg,

View File

@ -23,7 +23,5 @@ from typing import Generator
class InputPages:
file_name: str
num_pages: int
width: int
height: int
dpi: float
pages: Generator[Image]

View File

@ -36,19 +36,21 @@ def xcf_get_pages(input_file: str) -> InputPages:
# Read metadata
proc = subprocess.run(['magick', 'identify', '-format', '%n;%W;%H;%x;%y;%U|', input_file], capture_output=True, encoding='utf-8', check=True)
magick_layers_data = proc.stdout.strip('|').split('|')
magick_layers_data = [l.split(';') for l in proc.stdout.strip('|').split('|')]
if any(d != magick_layers_data[0] for d in magick_layers_data[1:]):
raise Exception('Different metadata between XCF layers')
if any(d[0] != magick_layers_data[0][0] for d in magick_layers_data[1:]):
raise Exception('Different number of layers reported between XCF layers')
if any(d[3:6] != magick_layers_data[0][3:6] for d in magick_layers_data[1:]):
raise Exception('Different resolution reported between XCF layers')
magick_layer_data = magick_layers_data[0].split(';')
magick_layer_data = magick_layers_data[0]
num_layers = int(magick_layer_data[0])
width = int(magick_layer_data[1])
height = int(magick_layer_data[2])
resolution_x = float(magick_layer_data[3])
resolution_y = float(magick_layer_data[4])
resolution_units = magick_layer_data[5]
if num_layers != len(magick_layers_data):
raise Exception('Different number of layers reported between metadata and layer data')
if resolution_units != 'PixelsPerInch':
raise Exception('Unexpected resolution units (expected PixelsPerInch, got {})'.format(resolution_units))
if resolution_x != resolution_y:
@ -57,14 +59,16 @@ def xcf_get_pages(input_file: str) -> InputPages:
return InputPages(
file_name=input_file,
num_pages=num_layers,
width=width,
height=height,
dpi=resolution_x,
pages=_do_get_pages(input_file, num_layers)
pages=_do_get_pages(input_file, magick_layers_data)
)
def _do_get_pages(input_file: str, num_layers: int) -> Generator[Image]:
for layer_num in range(num_layers):
def _do_get_pages(input_file: str, magick_layers_data: List[List[str]]) -> Generator[Image]:
for layer_num, magick_layer_data in enumerate(magick_layers_data):
# Read metadata
width = int(magick_layer_data[1])
height = int(magick_layer_data[2])
# Extract layer as PNG (to proc.stdout)
proc = subprocess.run(['magick', '{}[{}]'.format(input_file, layer_num), 'png:-'], capture_output=True)
@ -75,4 +79,7 @@ def _do_get_pages(input_file: str, num_layers: int) -> Generator[Image]:
png_data = io.BytesIO(proc.stdout)
image = Image.open(png_data)
if image.width != width or image.height != height:
raise Exception('Different resolution reported between metadata and layer data')
yield image

View File

@ -42,7 +42,7 @@ def djvu_write_pages(
# Combine foreground and background
_, page_djvu_file = tempfile.mkstemp(suffix='.djvu', dir=tempdir)
args = ['djvumake', page_djvu_file, 'INFO={},{},{}'.format(input_pages.width, input_pages.height, round(input_pages.dpi))]
args = ['djvumake', page_djvu_file, 'INFO={},{},{}'.format(compressed_page.width, compressed_page.height, round(input_pages.dpi))]
if compressed_page.fg:
args.append('Sjbz={}'.format(compressed_page.fg.filename))
if compressed_page.bg:

View File

@ -30,21 +30,21 @@ def pdf_write_pages(
compressed_pages: Generator[CompressedPage],
output_file: str
) -> None:
# Get size of image in PostScript points
width_pt = input_pages.width / input_pages.dpi * 72
height_pt = input_pages.height / input_pages.dpi * 72
# Build PDF
pdf = Pdf.new()
# Write each page
for compressed_page in compressed_pages:
# Get size of image in PostScript points
width_pt = compressed_page.width / input_pages.dpi * 72
height_pt = compressed_page.height / input_pages.dpi * 72
page = pdf.add_blank_page(page_size=(width_pt, height_pt))
# Write each layer to the page
content_instructions = []
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, compressed_page=compressed_page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, compressed_page=compressed_page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
# Clean up
if compressed_page.bg:
@ -69,6 +69,7 @@ def pdf_write_layer(
input_pages: InputPages,
pdf: Pdf,
page: Page,
compressed_page: CompressedPage,
layer: CompressedLayer,
is_foreground: bool,
content_instructions,
@ -83,6 +84,7 @@ def pdf_write_layer(
input_pages=input_pages,
pdf=pdf,
page=page,
compressed_page=compressed_page,
value=layer.data,
content_instructions=content_instructions,
ColorSpace=Name.DeviceGray,
@ -95,6 +97,7 @@ def pdf_write_layer(
input_pages=input_pages,
pdf=pdf,
page=page,
compressed_page=compressed_page,
value=layer.data,
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
@ -106,6 +109,7 @@ def pdf_write_layer(
input_pages=input_pages,
pdf=pdf,
page=page,
compressed_page=compressed_page,
value=layer.data,
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
@ -120,6 +124,7 @@ def pdf_write_layer(
input_pages=input_pages,
pdf=pdf,
page=page,
compressed_page=compressed_page,
value=layer.get_flate_data(),
content_instructions=content_instructions,
ColorSpace=Name.DeviceGray,
@ -129,7 +134,7 @@ def pdf_write_layer(
DecodeParms=Dictionary(
Predictor=15, # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file
BitsPerComponent=1, # Default is 8 so must set this here
Columns=input_pages.width
Columns=compressed_page.width
)
)
else:
@ -137,6 +142,7 @@ def pdf_write_layer(
input_pages=input_pages,
pdf=pdf,
page=page,
compressed_page=compressed_page,
value=layer.get_flate_data(),
content_instructions=content_instructions,
ColorSpace=Name.DeviceRGB,
@ -145,7 +151,7 @@ def pdf_write_layer(
DecodeParms=Dictionary(
Predictor=15,
Colors=3, # Default is 1 so must set this here
Columns=input_pages.width
Columns=compressed_page.width
)
)
else:
@ -155,6 +161,7 @@ def pdf_write_image(
input_pages: InputPages,
pdf: Pdf,
page: Page,
compressed_page: CompressedPage,
value: bytes,
content_instructions,
**kwargs
@ -168,8 +175,8 @@ def pdf_write_image(
value,
Type=Name.XObject,
Subtype=Name.Image,
Width=input_pages.width,
Height=input_pages.height,
Width=compressed_page.width,
Height=compressed_page.height,
**kwargs
)
xobj_name = page.add_resource(xobj, '/XObject')

View File

@ -24,6 +24,8 @@ from typing import Generator, List, Optional
@dataclass
class SegmentedPage:
width: int
height: int
fg: Optional[Image]
bg: Optional[Image]
@ -83,6 +85,6 @@ def segment_page(input_page: Image) -> SegmentedPage:
# Handle case where empty background or foreground
if numpy.any(fg_pixels):
return SegmentedPage(fg=image_fg, bg=image_bg)
return SegmentedPage(width=input_page.width, height=input_page.height, fg=image_fg, bg=image_bg)
else:
return SegmentedPage(fg=None, bg=image_bg)
return SegmentedPage(width=input_page.width, height=input_page.height, fg=None, bg=image_bg)