From 6fbd9de2c9089ebfb258c8d51855660cf695a30b Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Tue, 2 Sep 2025 21:34:29 +1000 Subject: [PATCH] Do not include foreground layer if no foreground pixels --- pdf_segmented/compression/__init__.py | 7 +++++-- pdf_segmented/output/djvu.py | 13 ++++++++----- pdf_segmented/output/pdf.py | 10 +++++++--- pdf_segmented/segmentation.py | 14 ++++++++------ 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/pdf_segmented/compression/__init__.py b/pdf_segmented/compression/__init__.py index de5137b..e605997 100644 --- a/pdf_segmented/compression/__init__.py +++ b/pdf_segmented/compression/__init__.py @@ -43,8 +43,8 @@ class CompressionOptions: @dataclass class CompressedPage: - fg: CompressedLayer - bg: CompressedLayer + fg: Optional[CompressedLayer] + bg: Optional[CompressedLayer] def compress_pages( input_pages: InputPages, @@ -104,6 +104,9 @@ def compress_layer( tempdir: str ) -> CompressedLayer: + if layer is None: + return None + # Compress the given layer if compression == 'iw44': return iw44_compress_layer(layer=layer, dpi=input_pages.dpi, tempdir=tempdir) diff --git a/pdf_segmented/output/djvu.py b/pdf_segmented/output/djvu.py index 11a1018..d20ec5d 100644 --- a/pdf_segmented/output/djvu.py +++ b/pdf_segmented/output/djvu.py @@ -42,17 +42,20 @@ def djvu_write_pages( # Combine foreground and background _, page_djvu_file = tempfile.mkstemp(suffix='.djvu', dir=tempdir) - # TODO: Handle case where empty background or foreground args = ['djvumake', page_djvu_file, 'INFO={},{},{}'.format(input_pages.width, input_pages.height, round(input_pages.dpi))] - args.append('Sjbz={}'.format(compressed_page.fg.filename)) - args.append('BG44={}'.format(compressed_page.bg.filename)) + if compressed_page.fg: + args.append('Sjbz={}'.format(compressed_page.fg.filename)) + if compressed_page.bg: + args.append('BG44={}'.format(compressed_page.bg.filename)) subprocess.run(args, check=True, capture_output=True) djvu_page_files.append(page_djvu_file) finally: # Clean up - compressed_page.bg.cleanup() - compressed_page.fg.cleanup() + if compressed_page.bg: + compressed_page.bg.cleanup() + if compressed_page.fg: + compressed_page.fg.cleanup() # Combine pages subprocess.run(['djvm', '-c', output_file] + djvu_page_files, check=True) diff --git a/pdf_segmented/output/pdf.py b/pdf_segmented/output/pdf.py index 622f269..79debe7 100644 --- a/pdf_segmented/output/pdf.py +++ b/pdf_segmented/output/pdf.py @@ -42,14 +42,15 @@ def pdf_write_pages( page = pdf.add_blank_page(page_size=(width_pt, height_pt)) # Write each layer to the page - # TODO: Handle case where empty background or foreground content_instructions = [] pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions) pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions) # Clean up - compressed_page.bg.cleanup() - compressed_page.fg.cleanup() + if compressed_page.bg: + compressed_page.bg.cleanup() + if compressed_page.fg: + compressed_page.fg.cleanup() # Generate content stream wrapped_instructions = [ @@ -73,6 +74,9 @@ def pdf_write_layer( content_instructions, ) -> None: + if layer is None: + return + # Write the layer to PDF if isinstance(layer, JBIG2Layer): pdf_write_image( diff --git a/pdf_segmented/segmentation.py b/pdf_segmented/segmentation.py index bed7825..c5d20d2 100644 --- a/pdf_segmented/segmentation.py +++ b/pdf_segmented/segmentation.py @@ -20,12 +20,12 @@ import numpy from PIL import Image from dataclasses import dataclass -from typing import Generator, List +from typing import Generator, List, Optional @dataclass class SegmentedPage: - fg: Image - bg: Image + fg: Optional[Image] + bg: Optional[Image] def segment_pages(input_pages: InputPages) -> Generator[SegmentedPage]: for page_num, input_page in enumerate(input_pages.pages): @@ -81,6 +81,8 @@ def segment_page(input_page: Image) -> SegmentedPage: numpy_bg[fg_pixels,:] = [255, 255, 255] image_bg = Image.fromarray(numpy_bg, image_rgb.mode) - # TODO: Handle case where empty background or foreground - - return SegmentedPage(fg=image_fg, bg=image_bg) + # Handle case where empty background or foreground + if numpy.any(fg_pixels): + return SegmentedPage(fg=image_fg, bg=image_bg) + else: + return SegmentedPage(fg=None, bg=image_bg)