235 lines
6.3 KiB
Python
235 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# GIMP plug-in for JBIG2-encoded PDF files
|
|
# Copyright (C) 2024 Lee Yingtong Li (RunasSudo)
|
|
#
|
|
# Adapted from jbig2enc by Adam Langley <agl@imperialviolet.org> - Copyright (C) 2006 Google Inc., licensed under Apache v2
|
|
# Loosely adapted from file-openraster.py - Copyright (C) 2009 by Jon Nordby <jononor@gmail.com>, licensed under the GPLv3
|
|
# In turn based on MyPaint source code by Martin Renold
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
import glob
|
|
import struct
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
dpi = 72 # Default DPI value
|
|
|
|
class Ref:
|
|
def __init__(self, x: int):
|
|
self.x = x
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.x} 0 R"
|
|
|
|
class Dict:
|
|
def __init__(self, values: dict = None):
|
|
if values is None:
|
|
values = {}
|
|
self.d = values.copy()
|
|
|
|
def __str__(self) -> str:
|
|
entries = [f"/{key} {value}" for key, value in self.d.items()]
|
|
return f"<< {' '.join(entries)} >>\n"
|
|
|
|
class Obj:
|
|
next_id = 1
|
|
|
|
def __init__(self, d: dict = None, stream: str = None):
|
|
if d is None:
|
|
d = {}
|
|
if stream is not None:
|
|
d["Length"] = str(len(stream))
|
|
self.d = Dict(d)
|
|
self.stream = stream
|
|
self.id = Obj.next_id
|
|
Obj.next_id += 1
|
|
|
|
def __str__(self) -> str:
|
|
result = [str(self.d)]
|
|
if self.stream is not None:
|
|
result.append(f"stream\n{self.stream}\nendstream\n")
|
|
result.append("endobj\n")
|
|
return "".join(result)
|
|
|
|
class Doc:
|
|
def __init__(self):
|
|
self.objs = []
|
|
self.pages = []
|
|
|
|
def add_object(self, obj: Obj) -> Obj:
|
|
"""Adds an object to the document."""
|
|
self.objs.append(obj)
|
|
return obj
|
|
|
|
def add_page(self, page: Obj) -> Obj:
|
|
"""Adds a page to the document and the list of objects."""
|
|
self.pages.append(page)
|
|
return self.add_object(page)
|
|
|
|
def __str__(self) -> str:
|
|
output = []
|
|
offsets = []
|
|
current_offset = 0
|
|
|
|
def add_line(line: str):
|
|
nonlocal current_offset
|
|
output.append(line)
|
|
current_offset += len(line) + 1 # Adding 1 for the newline character
|
|
|
|
# PDF header
|
|
add_line("%PDF-1.4")
|
|
|
|
# Add each object and track its byte offset
|
|
for obj in self.objs:
|
|
offsets.append(current_offset)
|
|
add_line(f"{obj.id} 0 obj")
|
|
add_line(str(obj))
|
|
|
|
# Cross-reference table
|
|
xref_start = current_offset
|
|
add_line("xref")
|
|
add_line(f"0 {len(offsets) + 1}")
|
|
add_line("0000000000 65535 f ")
|
|
for offset in offsets:
|
|
add_line(f"{offset:010} 00000 n ")
|
|
|
|
# Trailer and EOF
|
|
add_line("trailer")
|
|
add_line(f"<< /Size {len(offsets) + 1}\n/Root 1 0 R >>")
|
|
add_line("startxref")
|
|
add_line(str(xref_start))
|
|
add_line("%%EOF")
|
|
|
|
return "\n".join(output)
|
|
|
|
def ref(x: int) -> str:
|
|
"""Creates a PDF reference string."""
|
|
return f"{x} 0 R"
|
|
|
|
def create_pdf(symboltable: str = "symboltable", pagefiles: list = None, outf = sys.stdout.buffer):
|
|
"""Creates a PDF document from a symbol table and a list of page files."""
|
|
if pagefiles is None:
|
|
pagefiles = glob.glob("page-*")
|
|
|
|
doc = Doc()
|
|
|
|
# Add catalog and outlines objects
|
|
catalog_obj = Obj({"Type": "/Catalog", "Outlines": ref(2), "Pages": ref(3)})
|
|
outlines_obj = Obj({"Type": "/Outlines", "Count": "0"})
|
|
pages_obj = Obj({"Type": "/Pages"})
|
|
|
|
doc.add_object(catalog_obj)
|
|
doc.add_object(outlines_obj)
|
|
doc.add_object(pages_obj)
|
|
|
|
# Read the symbol table
|
|
# RUNASSUDO: Remove this as unnecessary with lossless encoding
|
|
#try:
|
|
# with open(symboltable, "rb") as sym_file:
|
|
# symd = doc.add_object(Obj({}, sym_file.read().decode("latin1")))
|
|
#except IOError:
|
|
# sys.stderr.write(f"Error reading symbol table: {symboltable}\n")
|
|
# return
|
|
|
|
page_objs = []
|
|
pagefiles.sort()
|
|
|
|
for p in pagefiles:
|
|
try:
|
|
with open(p, mode="rb") as page_file:
|
|
contents = page_file.read()
|
|
except IOError:
|
|
sys.stderr.write(f"Error reading page file: {p}\n")
|
|
continue
|
|
|
|
try:
|
|
width, height, xres, yres = struct.unpack(">IIII", contents[11:27])
|
|
except struct.error:
|
|
sys.stderr.write(f"Error unpacking page file: {p}\n")
|
|
continue
|
|
|
|
# Set default resolution if missing
|
|
xres = xres or dpi
|
|
yres = yres or dpi
|
|
|
|
# Create XObject (image) for the page
|
|
xobj = Obj(
|
|
{
|
|
"Type": "/XObject",
|
|
"Subtype": "/Image",
|
|
"Width": str(width),
|
|
"Height": str(height),
|
|
"ColorSpace": "/DeviceGray",
|
|
"BitsPerComponent": "1",
|
|
"Filter": "/JBIG2Decode",
|
|
#"DecodeParms": f"<< /JBIG2Globals {symd.id} 0 R >>", # RUNASSUDO: Remove this as unnecessary with lossless encoding
|
|
},
|
|
contents.decode("latin1"),
|
|
)
|
|
|
|
# Create content stream for the page
|
|
contents_obj = Obj(
|
|
{},
|
|
f"q {float(width * 72) / xres} 0 0 {float(height * 72) / yres} 0 0 cm /Im1 Do Q",
|
|
)
|
|
|
|
# Create resource dictionary for the page
|
|
resources_obj = Obj(
|
|
{"ProcSet": "[/PDF /ImageB]", "XObject": f"<< /Im1 {xobj.id} 0 R >>"}
|
|
)
|
|
|
|
# Create the page object
|
|
page_obj = Obj(
|
|
{
|
|
"Type": "/Page",
|
|
"Parent": "3 0 R",
|
|
"MediaBox": f"[ 0 0 {float(width * 72) / xres} {float(height * 72) / yres} ]",
|
|
"Contents": ref(contents_obj.id),
|
|
"Resources": ref(resources_obj.id),
|
|
}
|
|
)
|
|
|
|
# Add objects to the document
|
|
for obj in (xobj, contents_obj, resources_obj, page_obj):
|
|
doc.add_object(obj)
|
|
|
|
page_objs.append(page_obj)
|
|
|
|
# Update pages object
|
|
pages_obj.d.d["Count"] = str(len(page_objs))
|
|
pages_obj.d.d["Kids"] = "[" + " ".join([ref(x.id) for x in page_objs]) + "]"
|
|
|
|
# Output the final PDF document to stdout
|
|
outf.write(str(doc).encode("latin1"))
|
|
|
|
def usage(script, msg):
|
|
"""Display usage information and an optional error message."""
|
|
if msg:
|
|
sys.stderr.write("%s: %s\n" % (script, msg))
|
|
sys.stderr.write("Usage: %s [file_basename] > out.pdf\n" % script)
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
# RUNASSUDO: Overhauled to use lossless encoding
|
|
|
|
pages = sys.argv[1:]
|
|
|
|
# Validate that pages were found
|
|
if not pages:
|
|
usage(sys.argv[0], "no pages found!")
|
|
|
|
create_pdf(None, pages)
|