pdf-segmented/pdf_segmented/input/xcf.py

#   pdf-segmented: Generate PDFs using separate compression for foreground and background
#   Copyright (C) 2025  Lee Yingtong Li
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.

from . import InputPages
from ..util import assert_has_imagemagick

from PIL import Image

import io
import subprocess
import sys
from typing import Generator, List

def xcf_get_pages(input_file: str) -> InputPages:
	# Check ImageMagick support
	assert_has_imagemagick('XCF support requires ImageMagick')

	# Init metadata
	num_layers = 0
	dpi = None
	width = None
	height = None

	# Read metadata
	proc = subprocess.run(['magick', 'identify', '-format', '%n;%W;%H;%x;%y;%U|', input_file], capture_output=True, encoding='utf-8', check=True)
	magick_layers_data = proc.stdout.strip('|').split('|')

	if any(d != magick_layers_data[0] for d in magick_layers_data[1:]):
		raise Exception('Different metadata between XCF layers')

	magick_layer_data = magick_layers_data[0].split(';')
	num_layers = int(magick_layer_data[0])
	width = int(magick_layer_data[1])
	height = int(magick_layer_data[2])
	resolution_x = float(magick_layer_data[3])
	resolution_y = float(magick_layer_data[4])
	resolution_units = magick_layer_data[5]

	if resolution_units != 'PixelsPerInch':
		raise Exception('Unexpected resolution units (expected PixelsPerInch, got {})'.format(resolution_units))
	if resolution_x != resolution_y:
		raise Exception('Unexpected non-square DPI ({}x{})'.format(resolution_x, resolution_y))

	return InputPages(
		file_name=input_file,
		num_pages=num_layers,
		width=width,
		height=height,
		dpi=resolution_x,
		pages=_do_get_pages(input_file, num_layers)
	)

def _do_get_pages(input_file: str, num_layers: int) -> Generator[Image]:
	for layer_num in range(num_layers):
		# Extract layer as PNG (to proc.stdout)
		proc = subprocess.run(['magick', '{}[{}]'.format(input_file, layer_num), 'png:-'], capture_output=True)

		if proc.returncode != 0:
			raise Exception('ImageMagick error')

		# Read into PIL Image
		png_data = io.BytesIO(proc.stdout)
		image = Image.open(png_data)

		yield image