diff --git a/README.md b/README.md index 36c471d..89f96e6 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ python -m pdf_segmented input.xcf output.pdf Takes as input a [GIMP](https://www.gimp.org/) XCF file with one layer per page (bottom layer = first page). -All black pixels (#000000) will be considered to be foreground, and all remaining pixels will be considered to be background. This is most easily accomplished by selecting all colour graphics in GIMP, inverting the selection (Ctrl+I), then applying the [Threshold tool](https://docs.gimp.org/3.0/en/gimp-tool-threshold.html). +Pixels will be considered to be foreground if they are fully black (#000000) and all 8 of their adjacent pixels are also fully black or fully white. All remaining pixels will be considered to be background. This is most easily accomplished by selecting all colour graphics in GIMP, inverting the selection (Ctrl+I), then applying the [Threshold tool](https://docs.gimp.org/3.0/en/gimp-tool-threshold.html). The foreground will be compressed losslessly using [JBIG2](https://en.wikipedia.org/wiki/JBIG2). The background will be compressed lossily using [JPEG](https://en.wikipedia.org/wiki/JPEG). JPEG quality can be controlled using the `--jpeg-quality` option; the default is the Pillow default (75% at time of writing). @@ -18,10 +18,10 @@ Additional compression algorithms are supported (JPEG 2000, PNG); see `--help` f ## Dependencies -* [Python 3](https://www.python.org/) (tested using 3.13.3) - * [NumPy](https://numpy.org/) (tested using 2.2.5) - * [Pillow](https://pillow.readthedocs.io/en/stable/) (tested using 11.2.1) - * [pikepdf](https://pikepdf.readthedocs.io/en/latest/) (tested using 9.7.0) +* [Python 3](https://www.python.org/) (tested using 3.13.7) + * [NumPy](https://numpy.org/) (tested using 2.3.2) + * [Pillow](https://pillow.readthedocs.io/en/stable/) (tested using 11.3.0) + * [pikepdf](https://pikepdf.readthedocs.io/en/latest/) (tested using 9.10.2) * [DjVuLibre](https://djvu.sourceforge.net/) (tested using 3.5.28) – for DjVu output -* [ImageMagick](https://imagemagick.org/) (tested using 7.1.1.47) +* [ImageMagick](https://imagemagick.org/) (tested using 7.1.2.3) * [jbig2enc](https://github.com/agl/jbig2enc) (tested using 0.30) – for JBIG2 diff --git a/pdf_segmented/segmentation.py b/pdf_segmented/segmentation.py index 7254e23..bed7825 100644 --- a/pdf_segmented/segmentation.py +++ b/pdf_segmented/segmentation.py @@ -39,16 +39,46 @@ def segment_page(input_page: Image) -> SegmentedPage: # Convert image to foreground/background image_rgb = input_page.convert('RGB') numpy_rgb = numpy.asarray(image_rgb) - black_pixels = (numpy_rgb[:,:,0] == 0) & (numpy_rgb[:,:,1] == 0) & (numpy_rgb[:,:,2] == 0) - # Foreground is only black + # Precompute black and white pixels + black_pixels = (numpy_rgb[:,:,0] == 0) & (numpy_rgb[:,:,1] == 0) & (numpy_rgb[:,:,2] == 0) + white_pixels = (numpy_rgb[:,:,0] == 255) & (numpy_rgb[:,:,1] == 255) & (numpy_rgb[:,:,2] == 255) + black_or_white = black_pixels | white_pixels + + # Precompute pixels with all neighbours either black or white + bw1 = numpy.roll(black_or_white, (1, 1), (0, 1)) + bw1[0,:] = True + bw1[:,0] = True + bw2 = numpy.roll(black_or_white, (1, 0), (0, 1)) + bw2[0,:] = True + bw3 = numpy.roll(black_or_white, (1, -1), (0, 1)) + bw3[0,:] = True + bw3[:,-1] = True + bw4 = numpy.roll(black_or_white, (0, -1), (0, 1)) + bw4[:,-1] = True + bw5 = numpy.roll(black_or_white, (-1, -1), (0, 1)) + bw5[-1,:] = True + bw5[:,-1] = True + bw6 = numpy.roll(black_or_white, (-1, 0), (0, 1)) + bw6[-1,:] = True + bw7 = numpy.roll(black_or_white, (-1, 1), (0, 1)) + bw7[-1,:] = True + bw7[:,0] = True + bw8 = numpy.roll(black_or_white, (0, 1), (0, 1)) + bw8[:,0] = True + bw_neighbours = bw1 & bw2 & bw3 & bw4 & bw5 & bw6 & bw7 & bw8 + + # Foreground is only black pixels with all neighbours either black or white + fg_pixels = black_pixels & bw_neighbours + + # Foreground - white out all non-foreground pixels numpy_fg = numpy_rgb.copy() - numpy_fg[~black_pixels,:] = [255, 255, 255] + numpy_fg[~fg_pixels,:] = [255, 255, 255] image_fg = Image.fromarray(numpy_fg, image_rgb.mode) - # Background is only non-black + # Background - white out all foreground pixels numpy_bg = numpy_rgb.copy() - numpy_bg[black_pixels,:] = [255, 255, 255] + numpy_bg[fg_pixels,:] = [255, 255, 255] image_bg = Image.fromarray(numpy_bg, image_rgb.mode) # TODO: Handle case where empty background or foreground