From 7ae4b0f55811d63d12b6eb9ac8a534f99e836e00 Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Thu, 8 May 2025 21:24:21 +1000 Subject: [PATCH] Improve performance of reading XCF metadata --- pdf_segmented/input/xcf.py | 50 ++++++++++++++------------------------ 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/pdf_segmented/input/xcf.py b/pdf_segmented/input/xcf.py index d7c2fb5..4652586 100644 --- a/pdf_segmented/input/xcf.py +++ b/pdf_segmented/input/xcf.py @@ -35,45 +35,31 @@ def xcf_get_pages(input_file: str) -> InputPages: height = None # Read metadata - proc = subprocess.run(['magick', 'identify', '-verbose', input_file], capture_output=True, encoding='utf-8', check=True) - for line in proc.stdout.splitlines(): - if line == 'Image:': - num_layers += 1 - elif line.startswith(' Geometry: '): - layer_width = float(line[len(' Geometry: '):line.index('x')]) - layer_height = float(line[line.index('x')+1:line.index('+')]) - if (width is not None and layer_width != width) or (height is not None and layer_height != height): - print('Error: Image with variable-dimension layers is not supported ({}x{} vs {}x{})'.format(layer_width, layer_height, width, height)) - sys.exit(1) - width = layer_width - height = layer_height - elif line.startswith(' Resolution: '): - resolution_x = float(line[len(' Resolution: '):line.index('x')]) - resolution_y = float(line[line.index('x')+1:]) - if resolution_x != resolution_y: - raise Exception('Unexpected non-square DPI ({}x{})'.format(resolution_x, resolution_y)) - if dpi is not None and resolution_x != dpi: - raise Exception('Unexpected variable DPI image ({} vs {})'.format(resolution_x, dpi)) - dpi = resolution_x - elif line.startswith(' Units: '): - if line != ' Units: PixelsPerInch': - raise Exception('Unexpected Units (expected PixelsPerInch, got {})'.format(line[len(' Units: '):])) + proc = subprocess.run(['magick', 'identify', '-format', '%n;%W;%H;%x;%y;%U|', input_file], capture_output=True, encoding='utf-8', check=True) + magick_layers_data = proc.stdout.strip('|').split('|') - if num_layers == 0: - raise Exception('Unexpected 0 layers') - if dpi is None: - raise Exception('Unexpected no DPI information') - if width is None: - raise Exception('Unexpected no width information') - if height is None: - raise Exception('Unexpected no height information') + if any(d != magick_layers_data[0] for d in magick_layers_data[1:]): + raise Exception('Different metadata between XCF layers') + + magick_layer_data = magick_layers_data[0].split(';') + num_layers = int(magick_layer_data[0]) + width = int(magick_layer_data[1]) + height = int(magick_layer_data[2]) + resolution_x = float(magick_layer_data[3]) + resolution_y = float(magick_layer_data[4]) + resolution_units = magick_layer_data[5] + + if resolution_units != 'PixelsPerInch': + raise Exception('Unexpected resolution units (expected PixelsPerInch, got {})'.format(resolution_units)) + if resolution_x != resolution_y: + raise Exception('Unexpected non-square DPI ({}x{})'.format(resolution_x, resolution_y)) return InputPages( file_name=input_file, num_pages=num_layers, width=width, height=height, - dpi=dpi, + dpi=resolution_x, pages=_do_get_pages(input_file, num_layers) )