# pdf-segmented: Generate PDFs using separate compression for foreground and background # Copyright (C) 2025 Lee Yingtong Li # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from .input import InputPages import numpy from PIL import Image from dataclasses import dataclass from typing import Generator, List @dataclass class SegmentedPage: fg: Image bg: Image def segment_pages(input_pages: InputPages) -> Generator[SegmentedPage]: for page_num, input_page in enumerate(input_pages.pages): print('Page {} of {}'.format(page_num + 1, input_pages.num_pages)) yield segment_page(input_page) def segment_page(input_page: Image) -> SegmentedPage: # Segment the input page into foreground and background # Convert image to foreground/background image_rgb = input_page.convert('RGB') numpy_rgb = numpy.asarray(image_rgb) black_pixels = (numpy_rgb[:,:,0] == 0) & (numpy_rgb[:,:,1] == 0) & (numpy_rgb[:,:,2] == 0) # Foreground is only black numpy_fg = numpy_rgb.copy() numpy_fg[~black_pixels,:] = [255, 255, 255] image_fg = Image.fromarray(numpy_fg, image_rgb.mode) # Background is only non-black numpy_bg = numpy_rgb.copy() numpy_bg[black_pixels,:] = [255, 255, 255] image_bg = Image.fromarray(numpy_bg, image_rgb.mode) # TODO: Handle case where empty background or foreground return SegmentedPage(fg=image_fg, bg=image_bg)