
| Current Path : /var/www/wsgi/www/api/venv/lib64/python3.12/site-packages/pyhanko/pdf_utils/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : /var/www/wsgi/www/api/venv/lib64/python3.12/site-packages/pyhanko/pdf_utils/images.py |
"""Utilities for embedding bitmap image data into PDF files.
The image data handling is done by
`Pillow <https://github.com/python-pillow/Pillow>`_.
.. note::
Note that also here we only support a subset of what the PDF standard
provides for. Most RGB and grayscale images (with or without transparency)
that can be read by PIL/Pillow can be used without issue.
PNG images with an indexed palette backed by one of these colour spaces
can also be used.
Currently there is no support for CMYK images or (direct) support for
embedding JPEG-encoded image data as such, but these features may be added
later.
"""
import uuid
from typing import Optional, Union
from . import generic
from .content import PdfContent, PdfResources, ResourceType
from .generic import IndirectObject, pdf_name
from .layout import BoxConstraints
from .writer import BasePdfFileWriter
try:
from PIL import Image
from PIL.ImagePalette import ImagePalette
except ImportError as e: # pragma: nocover
raise ImportError(
"pyhanko.pdf_utils.images requires pyHanko to be installed with "
"the [image-support] option. You can install missing "
"dependencies by running "
"\"pip install 'pyHanko[image-support]'\".",
e,
)
__all__ = ['pil_image', 'PdfImage']
def pil_image(img: Image.Image, writer: BasePdfFileWriter):
"""
This function writes a PIL/Pillow :class:`.Image` object to a PDF file
writer, as an image XObject.
:param img:
A Pillow :class:`.Image` object
:param writer:
A PDF file writer
:return:
A reference to the image XObject written.
"""
if img.mode not in ('RGB', 'RGBA', 'P', 'PA', 'L', 'LA'): # pragma: nocover
raise NotImplementedError
dict_data = {
pdf_name('/Type'): pdf_name('/XObject'),
pdf_name('/Subtype'): pdf_name('/Image'),
pdf_name('/Width'): generic.NumberObject(img.width),
pdf_name('/Height'): generic.NumberObject(img.height),
}
bpc = generic.NumberObject(8)
smask_image = None
if img.mode.endswith('A'):
# extract the alpha channel, and save it as a separate image object
smask_pil_image = img.split()[-1]
assert smask_pil_image.mode == 'L'
smask_image = pil_image(smask_pil_image, writer)
# finally, convert to RBG or L as appropriate
img = img.convert(img.mode[:-1])
clr_space: Union[generic.NameObject, generic.ArrayObject]
clr_space = (
pdf_name('/DeviceGray') if img.mode == 'L' else pdf_name('/DeviceRGB')
)
if img.mode == 'P':
palette: Optional[ImagePalette] = img.palette
assert palette is not None
palette_arr = palette.palette
if palette.mode != 'RGB': # pragma: nocover
raise NotImplementedError
palette_size = len(palette_arr) // 3
# declare an indexed colour space based on /DeviceRGB
# with 'palette_size' colours, with mapping defined as
# a byte string
clr_space = generic.ArrayObject(
[
pdf_name('/Indexed'),
pdf_name('/DeviceRGB'),
generic.NumberObject(palette_size - 1),
generic.ByteStringObject(palette_arr),
]
)
if smask_image is not None:
dict_data[pdf_name('/SMask')] = smask_image
dict_data[pdf_name('/ColorSpace')] = clr_space
dict_data[pdf_name('/BitsPerComponent')] = bpc
# TODO nice to have: I'd like to pack everything into minimal space here
# (but the flate compression should deal with it pretty nicely)
# NOTE the BPC values allowed by the standard are limited to 1, 2, 4, 8
# (and 12, 16, but those aren't allowed by PIL anyway in this context)
image_bytes = img.tobytes()
stream = generic.StreamObject(dict_data, stream_data=image_bytes)
stream.compress()
return writer.add_object(stream)
class PdfImage(PdfContent):
"""Wrapper class that implements the :class:`.PdfContent` interface for
image objects.
.. note::
Instances of this class are reusable, in the sense that the
implementation is aware of changes to the associated :attr:`writer`
object. This allows the same image to be embedded into multiple files
without instantiating a new :class:`.PdfImage` every time.
"""
def __init__(
self,
image: Union[Image.Image, str],
writer: Optional[BasePdfFileWriter] = None,
resources: Optional[PdfResources] = None,
name: Optional[str] = None,
opacity=None,
box: Optional[BoxConstraints] = None,
):
if isinstance(image, str):
image = Image.open(image)
self.image: Image.Image = image
self.name = name or str(uuid.uuid4())
self.opacity = opacity
if box is None:
# assume square pixels
box = BoxConstraints(self.image.width, self.image.height)
super().__init__(resources, writer=writer, box=box)
self._image_ref: Optional[IndirectObject] = None
@property
def image_ref(self) -> generic.IndirectObject:
"""Return a reference to the image XObject associated with this
:class:`.PdfImage` instance.
If no such reference is available, it will be created using
:func:`.pil_image`, and the result will be cached until the
:attr:`writer` attribute changes
(see :meth:`~pyhanko.pdf_utils.content.PdfContent.set_writer`).
:return: An indirect reference to an image XObject.
"""
assert self.writer is not None
# cache is invalidated if the writer changed
image_ref = self._image_ref
if image_ref is None or image_ref.get_pdf_handler() is not self.writer:
self._image_ref = image_ref = pil_image(self.image, self.writer)
assert image_ref is not None
return image_ref
def render(self) -> bytes:
img_ref_name = '/Img' + self.name
self.set_resource(
category=ResourceType.XOBJECT,
name=pdf_name(img_ref_name),
value=self.image_ref,
)
opacity = b''
if self.opacity is not None:
gs_name = '/GS' + str(uuid.uuid4())
self.set_resource(
category=ResourceType.EXT_G_STATE,
name=pdf_name(gs_name),
value=generic.DictionaryObject(
{pdf_name('/ca'): generic.FloatObject(self.opacity)}
),
)
opacity = gs_name.encode('ascii') + b' gs'
# Internally, the image is mapped to the unit square in
# user coordinates, irrespective of width/height.
# In particular, we might have to scale the x and y axes differently.
if not self.box.height_defined:
self.box.height = self.image.height
if not self.box.width_defined:
self.box.width = self.image.width
draw = b'%g 0 0 %g 0 0 cm %s Do' % (
self.box.width,
self.box.height,
img_ref_name.encode('ascii'),
)
return b'q %s %s Q' % (opacity, draw)