From cf4afa44a3605f06bcfd20046b9a08b64613406c Mon Sep 17 00:00:00 2001
From: RunasSudo <runassudo@yingtongli.me>
Date: Thu, 8 May 2025 22:05:46 +1000
Subject: [PATCH] Implement JPEG2000 compression

---
 pdf_segmented/__init__.py             |  6 +++---
 pdf_segmented/__main__.py             | 11 +++++++++--
 pdf_segmented/compression/__init__.py | 23 ++++++++++++++++-------
 pdf_segmented/output/pdf.py           | 12 ++++++++++++
 4 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/pdf_segmented/__init__.py b/pdf_segmented/__init__.py
index 5b45ea2..0a84634 100644
--- a/pdf_segmented/__init__.py
+++ b/pdf_segmented/__init__.py
@@ -14,7 +14,7 @@
 #   You should have received a copy of the GNU Affero General Public License
 #   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-from .compression import compress_pages
+from .compression import CompressionOptions, compress_pages
 from .input.xcf import xcf_get_pages
 from .output.pdf import pdf_write_pages
 from .segmentation import segment_pages
@@ -31,7 +31,7 @@ def convert_file(
 	output_format: Optional[str] = None,
 	fg_compression: str = 'jbig2',
 	bg_compression: str = 'jpeg',
-	jpeg_quality: Optional[float] = None
+	options: CompressionOptions = CompressionOptions()
 ) -> None:
 	# Create temporary directory
 	tempdir = tempfile.mkdtemp('pdf-segmented')
@@ -63,7 +63,7 @@ def convert_file(
 			segmented_pages=segmented_pages,
 			fg_compression=fg_compression,
 			bg_compression=bg_compression,
-			jpeg_quality=jpeg_quality,
+			options=options,
 			tempdir=tempdir
 		)
 		
diff --git a/pdf_segmented/__main__.py b/pdf_segmented/__main__.py
index d8c4f0b..f6bda5b 100644
--- a/pdf_segmented/__main__.py
+++ b/pdf_segmented/__main__.py
@@ -15,6 +15,7 @@
 #   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 from . import convert_file
+from .compression import CompressionOptions
 
 import argparse
 
@@ -28,7 +29,9 @@ parser.add_argument('output_file')
 parser.add_argument('--input-format', choices=['xcf'])
 parser.add_argument('--output-format', choices=['pdf'])
 parser.add_argument('--fg-compression', default='jbig2', choices=['jbig2'])
-parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg'])
+parser.add_argument('--bg-compression', default='jpeg', choices=['jpeg', 'jp2'])
+parser.add_argument('--jp2-lossless', action='store_true')
+parser.add_argument('--jp2-rate', type=float)
 parser.add_argument('--jpeg-quality', type=float)
 
 args = parser.parse_args()
@@ -41,5 +44,9 @@ convert_file(
 	output_format=args.output_format,
 	fg_compression=args.fg_compression,
 	bg_compression=args.bg_compression,
-	jpeg_quality=args.jpeg_quality
+	options=CompressionOptions(
+		jp2_lossless=args.jp2_lossless,
+		jp2_rate=args.jp2_rate,
+		jpeg_quality=args.jpeg_quality
+	)
 )
diff --git a/pdf_segmented/compression/__init__.py b/pdf_segmented/compression/__init__.py
index 900973f..d3028a0 100644
--- a/pdf_segmented/compression/__init__.py
+++ b/pdf_segmented/compression/__init__.py
@@ -22,6 +22,7 @@ class CompressedLayer:
 		pass
 
 from .jbig2 import jbig2_compress_layer
+from .jp2 import jp2_compress_layer
 from .jpeg import jpeg_compress_layer
 from ..segmentation import SegmentedPage
 
@@ -30,6 +31,12 @@ from PIL import Image
 from dataclasses import dataclass
 from typing import Generator, Iterable, Optional
 
+@dataclass
+class CompressionOptions:
+	jp2_lossless: bool = False
+	jp2_rate: Optional[float] = None
+	jpeg_quality: Optional[float] = None
+
 @dataclass
 class CompressedPage:
 	fg: CompressedLayer
@@ -39,7 +46,7 @@ def compress_pages(
 	segmented_pages: Iterable[SegmentedPage],
 	fg_compression: str,
 	bg_compression: str,
-	jpeg_quality: Optional[float],
+	options: CompressionOptions,
 	tempdir: str
 ) -> Generator[CompressedPage]:
 	
@@ -49,7 +56,7 @@ def compress_pages(
 			segmented_page=segmented_page,
 			fg_compression=fg_compression,
 			bg_compression=bg_compression,
-			jpeg_quality=jpeg_quality,
+			options=options,
 			tempdir=tempdir
 		)
 
@@ -57,7 +64,7 @@ def compress_page(
 	segmented_page: SegmentedPage,
 	fg_compression: str,
 	bg_compression: str,
-	jpeg_quality: Optional[float],
+	options: CompressionOptions,
 	tempdir: str
 ) -> CompressedPage:
 	
@@ -66,13 +73,13 @@ def compress_page(
 		fg=compress_layer(
 			layer=segmented_page.fg,
 			compression=fg_compression,
-			jpeg_quality=jpeg_quality,
+			options=options,
 			tempdir=tempdir
 		),
 		bg=compress_layer(
 			layer=segmented_page.bg,
 			compression=bg_compression,
-			jpeg_quality=jpeg_quality,
+			options=options,
 			tempdir=tempdir
 		)
 	)
@@ -80,14 +87,16 @@ def compress_page(
 def compress_layer(
 	layer: Image,
 	compression: str,
-	jpeg_quality: Optional[float],
+	options: CompressionOptions,
 	tempdir: str
 ) -> CompressedLayer:
 	
 	# Compress the given layer
 	if compression == 'jbig2':
 		return jbig2_compress_layer(layer=layer, tempdir=tempdir)
+	elif compression == 'jp2':
+		return jp2_compress_layer(layer=layer, jp2_lossless=options.jp2_lossless, jp2_rate=options.jp2_rate)
 	elif compression == 'jpeg':
-		return jpeg_compress_layer(layer=layer, jpeg_quality=jpeg_quality)
+		return jpeg_compress_layer(layer=layer, jpeg_quality=options.jpeg_quality)
 	else:
 		raise NotImplementedError()
diff --git a/pdf_segmented/output/pdf.py b/pdf_segmented/output/pdf.py
index 56073a7..9df79c9 100644
--- a/pdf_segmented/output/pdf.py
+++ b/pdf_segmented/output/pdf.py
@@ -16,6 +16,7 @@
 
 from ..compression import CompressedLayer, CompressedPage
 from ..compression.jbig2 import JBIG2Layer
+from ..compression.jp2 import JP2Layer
 from ..compression.jpeg import JPEGLayer
 from ..input import InputPages
 
@@ -78,6 +79,17 @@ def pdf_write_layer(
 			BitsPerComponent=1,
 			Mask=[1, 1]  # Layer mask
 		)
+	elif isinstance(layer, JP2Layer):
+		pdf_write_image(
+			input_pages=input_pages,
+			pdf=pdf,
+			page=page,
+			value=layer.data,
+			content_instructions=content_instructions,
+			ColorSpace=Name.DeviceRGB,
+			Filter=Name.JPXDecode,
+			BitsPerComponent=8
+		)
 	elif isinstance(layer, JPEGLayer):
 		pdf_write_image(
 			input_pages=input_pages,