#!/usr/bin/env python3 # GIMP plug-in for JBIG2-encoded PDF files # Copyright (C) 2024 Lee Yingtong Li (RunasSudo) # # Adapted from jbig2enc by Adam Langley - Copyright (C) 2006 Google Inc., licensed under Apache v2 # Loosely adapted from file-openraster.py - Copyright (C) 2009 by Jon Nordby , licensed under the GPLv3 # In turn based on MyPaint source code by Martin Renold # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import glob import struct import sys from pathlib import Path dpi = 72 # Default DPI value class Ref: def __init__(self, x: int): self.x = x def __str__(self) -> str: return f"{self.x} 0 R" class Dict: def __init__(self, values: dict = None): if values is None: values = {} self.d = values.copy() def __str__(self) -> str: entries = [f"/{key} {value}" for key, value in self.d.items()] return f"<< {' '.join(entries)} >>\n" class Obj: next_id = 1 def __init__(self, d: dict = None, stream: str = None): if d is None: d = {} if stream is not None: d["Length"] = str(len(stream)) self.d = Dict(d) self.stream = stream self.id = Obj.next_id Obj.next_id += 1 def __str__(self) -> str: result = [str(self.d)] if self.stream is not None: result.append(f"stream\n{self.stream}\nendstream\n") result.append("endobj\n") return "".join(result) class Doc: def __init__(self): self.objs = [] self.pages = [] def add_object(self, obj: Obj) -> Obj: """Adds an object to the document.""" self.objs.append(obj) return obj def add_page(self, page: Obj) -> Obj: """Adds a page to the document and the list of objects.""" self.pages.append(page) return self.add_object(page) def __str__(self) -> str: output = [] offsets = [] current_offset = 0 def add_line(line: str): nonlocal current_offset output.append(line) current_offset += len(line) + 1 # Adding 1 for the newline character # PDF header add_line("%PDF-1.4") # Add each object and track its byte offset for obj in self.objs: offsets.append(current_offset) add_line(f"{obj.id} 0 obj") add_line(str(obj)) # Cross-reference table xref_start = current_offset add_line("xref") add_line(f"0 {len(offsets) + 1}") add_line("0000000000 65535 f ") for offset in offsets: add_line(f"{offset:010} 00000 n ") # Trailer and EOF add_line("trailer") add_line(f"<< /Size {len(offsets) + 1}\n/Root 1 0 R >>") add_line("startxref") add_line(str(xref_start)) add_line("%%EOF") return "\n".join(output) def ref(x: int) -> str: """Creates a PDF reference string.""" return f"{x} 0 R" def create_pdf(symboltable: str = "symboltable", pagefiles: list = None, outf = sys.stdout.buffer): """Creates a PDF document from a symbol table and a list of page files.""" if pagefiles is None: pagefiles = glob.glob("page-*") doc = Doc() # Add catalog and outlines objects catalog_obj = Obj({"Type": "/Catalog", "Outlines": ref(2), "Pages": ref(3)}) outlines_obj = Obj({"Type": "/Outlines", "Count": "0"}) pages_obj = Obj({"Type": "/Pages"}) doc.add_object(catalog_obj) doc.add_object(outlines_obj) doc.add_object(pages_obj) # Read the symbol table # RUNASSUDO: Remove this as unnecessary with lossless encoding #try: # with open(symboltable, "rb") as sym_file: # symd = doc.add_object(Obj({}, sym_file.read().decode("latin1"))) #except IOError: # sys.stderr.write(f"Error reading symbol table: {symboltable}\n") # return page_objs = [] pagefiles.sort() for p in pagefiles: try: with open(p, mode="rb") as page_file: contents = page_file.read() except IOError: sys.stderr.write(f"Error reading page file: {p}\n") continue try: width, height, xres, yres = struct.unpack(">IIII", contents[11:27]) except struct.error: sys.stderr.write(f"Error unpacking page file: {p}\n") continue # Set default resolution if missing xres = xres or dpi yres = yres or dpi # Create XObject (image) for the page xobj = Obj( { "Type": "/XObject", "Subtype": "/Image", "Width": str(width), "Height": str(height), "ColorSpace": "/DeviceGray", "BitsPerComponent": "1", "Filter": "/JBIG2Decode", #"DecodeParms": f"<< /JBIG2Globals {symd.id} 0 R >>", # RUNASSUDO: Remove this as unnecessary with lossless encoding }, contents.decode("latin1"), ) # Create content stream for the page contents_obj = Obj( {}, f"q {float(width * 72) / xres} 0 0 {float(height * 72) / yres} 0 0 cm /Im1 Do Q", ) # Create resource dictionary for the page resources_obj = Obj( {"ProcSet": "[/PDF /ImageB]", "XObject": f"<< /Im1 {xobj.id} 0 R >>"} ) # Create the page object page_obj = Obj( { "Type": "/Page", "Parent": "3 0 R", "MediaBox": f"[ 0 0 {float(width * 72) / xres} {float(height * 72) / yres} ]", "Contents": ref(contents_obj.id), "Resources": ref(resources_obj.id), } ) # Add objects to the document for obj in (xobj, contents_obj, resources_obj, page_obj): doc.add_object(obj) page_objs.append(page_obj) # Update pages object pages_obj.d.d["Count"] = str(len(page_objs)) pages_obj.d.d["Kids"] = "[" + " ".join([ref(x.id) for x in page_objs]) + "]" # Output the final PDF document to stdout outf.write(str(doc).encode("latin1")) def usage(script, msg): """Display usage information and an optional error message.""" if msg: sys.stderr.write("%s: %s\n" % (script, msg)) sys.stderr.write("Usage: %s [file_basename] > out.pdf\n" % script) sys.exit(1) if __name__ == "__main__": # RUNASSUDO: Overhauled to use lossless encoding pages = sys.argv[1:] # Validate that pages were found if not pages: usage(sys.argv[0], "no pages found!") create_pdf(None, pages)