Support different-sized pages within same document
This commit is contained in:
parent
6fbd9de2c9
commit
76e50c0d5e
@ -43,6 +43,8 @@ class CompressionOptions:
|
||||
|
||||
@dataclass
|
||||
class CompressedPage:
|
||||
width: int
|
||||
height: int
|
||||
fg: Optional[CompressedLayer]
|
||||
bg: Optional[CompressedLayer]
|
||||
|
||||
@ -77,6 +79,8 @@ def compress_page(
|
||||
|
||||
# Compress foreground and background layers
|
||||
return CompressedPage(
|
||||
width=segmented_page.width,
|
||||
height=segmented_page.height,
|
||||
fg=compress_layer(
|
||||
input_pages=input_pages,
|
||||
layer=segmented_page.fg,
|
||||
|
||||
@ -23,7 +23,5 @@ from typing import Generator
|
||||
class InputPages:
|
||||
file_name: str
|
||||
num_pages: int
|
||||
width: int
|
||||
height: int
|
||||
dpi: float
|
||||
pages: Generator[Image]
|
||||
|
||||
@ -36,19 +36,21 @@ def xcf_get_pages(input_file: str) -> InputPages:
|
||||
|
||||
# Read metadata
|
||||
proc = subprocess.run(['magick', 'identify', '-format', '%n;%W;%H;%x;%y;%U|', input_file], capture_output=True, encoding='utf-8', check=True)
|
||||
magick_layers_data = proc.stdout.strip('|').split('|')
|
||||
magick_layers_data = [l.split(';') for l in proc.stdout.strip('|').split('|')]
|
||||
|
||||
if any(d != magick_layers_data[0] for d in magick_layers_data[1:]):
|
||||
raise Exception('Different metadata between XCF layers')
|
||||
if any(d[0] != magick_layers_data[0][0] for d in magick_layers_data[1:]):
|
||||
raise Exception('Different number of layers reported between XCF layers')
|
||||
if any(d[3:6] != magick_layers_data[0][3:6] for d in magick_layers_data[1:]):
|
||||
raise Exception('Different resolution reported between XCF layers')
|
||||
|
||||
magick_layer_data = magick_layers_data[0].split(';')
|
||||
magick_layer_data = magick_layers_data[0]
|
||||
num_layers = int(magick_layer_data[0])
|
||||
width = int(magick_layer_data[1])
|
||||
height = int(magick_layer_data[2])
|
||||
resolution_x = float(magick_layer_data[3])
|
||||
resolution_y = float(magick_layer_data[4])
|
||||
resolution_units = magick_layer_data[5]
|
||||
|
||||
if num_layers != len(magick_layers_data):
|
||||
raise Exception('Different number of layers reported between metadata and layer data')
|
||||
if resolution_units != 'PixelsPerInch':
|
||||
raise Exception('Unexpected resolution units (expected PixelsPerInch, got {})'.format(resolution_units))
|
||||
if resolution_x != resolution_y:
|
||||
@ -57,14 +59,16 @@ def xcf_get_pages(input_file: str) -> InputPages:
|
||||
return InputPages(
|
||||
file_name=input_file,
|
||||
num_pages=num_layers,
|
||||
width=width,
|
||||
height=height,
|
||||
dpi=resolution_x,
|
||||
pages=_do_get_pages(input_file, num_layers)
|
||||
pages=_do_get_pages(input_file, magick_layers_data)
|
||||
)
|
||||
|
||||
def _do_get_pages(input_file: str, num_layers: int) -> Generator[Image]:
|
||||
for layer_num in range(num_layers):
|
||||
def _do_get_pages(input_file: str, magick_layers_data: List[List[str]]) -> Generator[Image]:
|
||||
for layer_num, magick_layer_data in enumerate(magick_layers_data):
|
||||
# Read metadata
|
||||
width = int(magick_layer_data[1])
|
||||
height = int(magick_layer_data[2])
|
||||
|
||||
# Extract layer as PNG (to proc.stdout)
|
||||
proc = subprocess.run(['magick', '{}[{}]'.format(input_file, layer_num), 'png:-'], capture_output=True)
|
||||
|
||||
@ -75,4 +79,7 @@ def _do_get_pages(input_file: str, num_layers: int) -> Generator[Image]:
|
||||
png_data = io.BytesIO(proc.stdout)
|
||||
image = Image.open(png_data)
|
||||
|
||||
if image.width != width or image.height != height:
|
||||
raise Exception('Different resolution reported between metadata and layer data')
|
||||
|
||||
yield image
|
||||
|
||||
@ -42,7 +42,7 @@ def djvu_write_pages(
|
||||
# Combine foreground and background
|
||||
_, page_djvu_file = tempfile.mkstemp(suffix='.djvu', dir=tempdir)
|
||||
|
||||
args = ['djvumake', page_djvu_file, 'INFO={},{},{}'.format(input_pages.width, input_pages.height, round(input_pages.dpi))]
|
||||
args = ['djvumake', page_djvu_file, 'INFO={},{},{}'.format(compressed_page.width, compressed_page.height, round(input_pages.dpi))]
|
||||
if compressed_page.fg:
|
||||
args.append('Sjbz={}'.format(compressed_page.fg.filename))
|
||||
if compressed_page.bg:
|
||||
|
||||
@ -30,21 +30,21 @@ def pdf_write_pages(
|
||||
compressed_pages: Generator[CompressedPage],
|
||||
output_file: str
|
||||
) -> None:
|
||||
# Get size of image in PostScript points
|
||||
width_pt = input_pages.width / input_pages.dpi * 72
|
||||
height_pt = input_pages.height / input_pages.dpi * 72
|
||||
|
||||
# Build PDF
|
||||
pdf = Pdf.new()
|
||||
|
||||
# Write each page
|
||||
for compressed_page in compressed_pages:
|
||||
# Get size of image in PostScript points
|
||||
width_pt = compressed_page.width / input_pages.dpi * 72
|
||||
height_pt = compressed_page.height / input_pages.dpi * 72
|
||||
|
||||
page = pdf.add_blank_page(page_size=(width_pt, height_pt))
|
||||
|
||||
# Write each layer to the page
|
||||
content_instructions = []
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, compressed_page=compressed_page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
|
||||
pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, compressed_page=compressed_page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
|
||||
|
||||
# Clean up
|
||||
if compressed_page.bg:
|
||||
@ -69,6 +69,7 @@ def pdf_write_layer(
|
||||
input_pages: InputPages,
|
||||
pdf: Pdf,
|
||||
page: Page,
|
||||
compressed_page: CompressedPage,
|
||||
layer: CompressedLayer,
|
||||
is_foreground: bool,
|
||||
content_instructions,
|
||||
@ -83,6 +84,7 @@ def pdf_write_layer(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
compressed_page=compressed_page,
|
||||
value=layer.data,
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceGray,
|
||||
@ -95,6 +97,7 @@ def pdf_write_layer(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
compressed_page=compressed_page,
|
||||
value=layer.data,
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceRGB,
|
||||
@ -106,6 +109,7 @@ def pdf_write_layer(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
compressed_page=compressed_page,
|
||||
value=layer.data,
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceRGB,
|
||||
@ -120,6 +124,7 @@ def pdf_write_layer(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
compressed_page=compressed_page,
|
||||
value=layer.get_flate_data(),
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceGray,
|
||||
@ -129,7 +134,7 @@ def pdf_write_layer(
|
||||
DecodeParms=Dictionary(
|
||||
Predictor=15, # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file
|
||||
BitsPerComponent=1, # Default is 8 so must set this here
|
||||
Columns=input_pages.width
|
||||
Columns=compressed_page.width
|
||||
)
|
||||
)
|
||||
else:
|
||||
@ -137,6 +142,7 @@ def pdf_write_layer(
|
||||
input_pages=input_pages,
|
||||
pdf=pdf,
|
||||
page=page,
|
||||
compressed_page=compressed_page,
|
||||
value=layer.get_flate_data(),
|
||||
content_instructions=content_instructions,
|
||||
ColorSpace=Name.DeviceRGB,
|
||||
@ -145,7 +151,7 @@ def pdf_write_layer(
|
||||
DecodeParms=Dictionary(
|
||||
Predictor=15,
|
||||
Colors=3, # Default is 1 so must set this here
|
||||
Columns=input_pages.width
|
||||
Columns=compressed_page.width
|
||||
)
|
||||
)
|
||||
else:
|
||||
@ -155,6 +161,7 @@ def pdf_write_image(
|
||||
input_pages: InputPages,
|
||||
pdf: Pdf,
|
||||
page: Page,
|
||||
compressed_page: CompressedPage,
|
||||
value: bytes,
|
||||
content_instructions,
|
||||
**kwargs
|
||||
@ -168,8 +175,8 @@ def pdf_write_image(
|
||||
value,
|
||||
Type=Name.XObject,
|
||||
Subtype=Name.Image,
|
||||
Width=input_pages.width,
|
||||
Height=input_pages.height,
|
||||
Width=compressed_page.width,
|
||||
Height=compressed_page.height,
|
||||
**kwargs
|
||||
)
|
||||
xobj_name = page.add_resource(xobj, '/XObject')
|
||||
|
||||
@ -24,6 +24,8 @@ from typing import Generator, List, Optional
|
||||
|
||||
@dataclass
|
||||
class SegmentedPage:
|
||||
width: int
|
||||
height: int
|
||||
fg: Optional[Image]
|
||||
bg: Optional[Image]
|
||||
|
||||
@ -83,6 +85,6 @@ def segment_page(input_page: Image) -> SegmentedPage:
|
||||
|
||||
# Handle case where empty background or foreground
|
||||
if numpy.any(fg_pixels):
|
||||
return SegmentedPage(fg=image_fg, bg=image_bg)
|
||||
return SegmentedPage(width=input_page.width, height=input_page.height, fg=image_fg, bg=image_bg)
|
||||
else:
|
||||
return SegmentedPage(fg=None, bg=image_bg)
|
||||
return SegmentedPage(width=input_page.width, height=input_page.height, fg=None, bg=image_bg)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user