From 60c6ee92e59ac3c57dfcf1fc2775e9b3bfb485ee Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Thu, 8 May 2025 23:15:05 +1000
Subject: [PATCH] Implement PNG compression

---
 pdf_segmented/__main__.py             |  4 +--
 pdf_segmented/compression/__init__.py |  6 ++++
 pdf_segmented/output/pdf.py           | 44 +++++++++++++++++++++++++--
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/pdf_segmented/__main__.py b/pdf_segmented/__main__.py
index f6bda5b..0efabcd 100644
--- a/pdf_segmented/__main__.py
+++ b/pdf_segmented/__main__.py
@@ -28,8 +28,8 @@ parser.add_argument('input_file')
 parser.add_argument('output_file')
 parser.add_argument('--input-format', choices=['xcf'])
 parser.add_argument('--output-format', choices=['pdf'])
-parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
-parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2'])
+parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2', 'png'])
+parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2', 'png'])
 parser.add_argument('--jp2-lossless', action='store_true')
 parser.add_argument('--jp2-rate', type=float)
 parser.add_argument('--jpeg-quality', type=float)
diff --git a/pdf_segmented/compression/__init__.py b/pdf_segmented/compression/__init__.py
index d3028a0..d28496c 100644
--- a/pdf_segmented/compression/__init__.py
+++ b/pdf_segmented/compression/__init__.py
@@ -24,6 +24,7 @@ class CompressedLayer:
 from .jbig2 import jbig2_compress_layer
 from .jp2 import jp2_compress_layer
 from .jpeg import jpeg_compress_layer
+from .png import png_compress_layer
 from ..segmentation import SegmentedPage
 
 from PIL import Image
@@ -73,12 +74,14 @@ def compress_page(
 		fg=compress_layer(
 			layer=segmented_page.fg,
 			compression=fg_compression,
+			is_foreground=True,
 			options=options,
 			tempdir=tempdir
 		),
 		bg=compress_layer(
 			layer=segmented_page.bg,
 			compression=bg_compression,
+			is_foreground=False,
 			options=options,
 			tempdir=tempdir
 		)
@@ -87,6 +90,7 @@ def compress_page(
 def compress_layer(
 	layer: Image,
 	compression: str,
+	is_foreground: bool,
 	options: CompressionOptions,
 	tempdir: str
 ) -> CompressedLayer:
@@ -98,5 +102,7 @@ def compress_layer(
 		return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
 	elif compression == 'jpeg':
 		return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
+	elif compression == 'png':
+		return png_compress_layer(layer=layer, is_foreground=is_foreground)
 	else:
 		raise NotImplementedError()
diff --git a/pdf_segmented/output/pdf.py b/pdf_segmented/output/pdf.py
index 9df79c9..f098b39 100644
--- a/pdf_segmented/output/pdf.py
+++ b/pdf_segmented/output/pdf.py
@@ -18,9 +18,10 @@ from ..compression import CompressedLayer, CompressedPage
 from ..compression.jbig2 import JBIG2Layer
 from ..compression.jp2 import JP2Layer
 from ..compression.jpeg import JPEGLayer
+from ..compression.png import PNGLayer
 from ..input import InputPages
 
-from pikepdf import ContentStreamInstruction, Name, Operator, Page, Pdf, Stream, unparse_content_stream
+from pikepdf import ContentStreamInstruction, Dictionary, Name, Operator, Page, Pdf, Stream, unparse_content_stream
 
 from typing import Generator
 
@@ -42,8 +43,8 @@ def pdf_write_pages(
 		
 		# Write each layer to the page
 		content_instructions = []
-		pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, content_instructions=content_instructions)
-		pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, content_instructions=content_instructions)
+		pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.bg, is_foreground=False, content_instructions=content_instructions)
+		pdf_write_layer(input_pages=input_pages, pdf=pdf, page=page, layer=compressed_page.fg, is_foreground=True, content_instructions=content_instructions)
 		
 		# Generate content stream
 		wrapped_instructions = [
@@ -63,6 +64,7 @@ def pdf_write_layer(
 	pdf: Pdf,
 	page: Page,
 	layer: CompressedLayer,
+	is_foreground: bool,
 	content_instructions,
 ) -> None:
 	
@@ -101,6 +103,42 @@ def pdf_write_layer(
 			Filter=Name.DCTDecode,
 			BitsPerComponent=8
 		)
+	elif isinstance(layer, PNGLayer):
+		if is_foreground:
+			# See PDF 1.7 section 7.4.4.3
+			# See also the implementation in img2pdf
+			pdf_write_image(
+				input_pages=input_pages,
+				pdf=pdf,
+				page=page,
+				value=layer.get_flate_data(),
+				content_instructions=content_instructions,
+				ColorSpace=Name.DeviceGray,
+				Filter=Name.FlateDecode,
+				BitsPerComponent=1,
+				Mask=[1, 1],  # Layer mask
+				DecodeParms=Dictionary(
+					Predictor=15,  # PNG prediction (on encoding, PNG optimum) - this is the only allowed value in a PNG file
+					BitsPerComponent=1,  # Default is 8 so must set this here
+					Columns=input_pages.width
+				)
+			)
+		else:
+			pdf_write_image(
+				input_pages=input_pages,
+				pdf=pdf,
+				page=page,
+				value=layer.get_flate_data(),
+				content_instructions=content_instructions,
+				ColorSpace=Name.DeviceRGB,
+				Filter=Name.FlateDecode,
+				BitsPerComponent=8,
+				DecodeParms=Dictionary(
+					Predictor=15,
+					Colors=3,  # Default is 1 so must set this here
+					Columns=input_pages.width
+				)
+			)
 	else:
 		raise NotImplementedError()