
| Current Path : /var/www/wsgi/www/api/venv/lib/python3.12/site-packages/pyhanko/pdf_utils/metadata/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : /var/www/wsgi/www/api/venv/lib/python3.12/site-packages/pyhanko/pdf_utils/metadata/info.py |
import logging
from datetime import datetime
from typing import Any, Dict, Optional, Union
import tzlocal
from pyhanko.pdf_utils import generic, misc
from . import model
__all__ = ['update_info_dict', 'view_from_info_dict']
logger = logging.getLogger(__name__)
def _write_meta_string(
dictionary: generic.DictionaryObject,
key: str,
meta_str: model.MetaString,
existing_only: bool,
) -> bool:
if isinstance(meta_str, misc.StringWithLanguage):
# don't bother with embedding language codes in strings,
# too few people implement escapes anyway, so meh
string = meta_str.value
elif isinstance(meta_str, str):
string = meta_str
else:
return False
pdf_str = generic.TextStringObject(string)
try:
old_value = dictionary[key]
mod = old_value != pdf_str
except KeyError:
mod = not existing_only
if mod:
dictionary[key] = pdf_str
return mod
def _write_meta_date(
dictionary: generic.DictionaryObject,
key: str,
meta_date: Union[datetime, str, None],
existing_only: bool,
) -> bool:
if isinstance(meta_date, datetime):
value = meta_date
elif meta_date == 'now':
value = datetime.now(tz=tzlocal.get_localzone())
else:
return False
if not existing_only or key in dictionary:
dictionary[key] = generic.pdf_date(value)
return True
else:
return False
def update_info_dict(
meta: model.DocumentMetadata,
info: generic.DictionaryObject,
only_update_existing: bool = False,
) -> bool:
mod = _write_meta_date(
info, "/ModDate", meta.last_modified, existing_only=only_update_existing
)
producer = model.VENDOR
try:
producer_string = info['/Producer']
if producer not in producer_string:
producer_string = generic.TextStringObject(
f"{producer_string}; {producer}"
)
mod = True
except (KeyError, TypeError):
producer_string = generic.TextStringObject(producer)
mod = True
# always override this
info['/Producer'] = producer_string
if meta.xmp_unmanaged:
return mod
mod |= _write_meta_string(
info, "/Title", meta.title, existing_only=only_update_existing
)
mod |= _write_meta_string(
info, "/Author", meta.author, existing_only=only_update_existing
)
mod |= _write_meta_string(
info, "/Subject", meta.subject, existing_only=only_update_existing
)
mod |= _write_meta_string(
info, "/Creator", meta.creator, existing_only=only_update_existing
)
mod |= _write_meta_date(
info, "/CreationDate", meta.created, existing_only=only_update_existing
)
if meta.keywords:
info['/Keywords'] = generic.TextStringObject(','.join(meta.keywords))
mod = True
return mod
def _read_date_from_dict(
info_dict: generic.DictionaryObject, key: str, strict: bool
) -> Optional[datetime]:
try:
date_str = info_dict[key]
except KeyError:
return None
try:
if isinstance(date_str, generic.TextStringObject):
return generic.parse_pdf_date(date_str, strict=strict)
except misc.PdfReadError:
pass
logger.warning(
"Key %s in info dict has value %s, which is not a valid date string",
key,
repr(date_str),
)
return None
def view_from_info_dict(
info_dict: generic.DictionaryObject, strict: bool = True
) -> model.DocumentMetadata:
kwargs: Dict[str, Any] = {}
for s_entry in ('title', 'author', 'subject', 'creator'):
try:
kwargs[s_entry] = str(info_dict[f"/{s_entry.title()}"])
except KeyError:
pass
creation_date = _read_date_from_dict(
info_dict, '/CreationDate', strict=strict
)
if creation_date is not None:
kwargs['created'] = creation_date
mod_date = _read_date_from_dict(info_dict, '/ModDate', strict=strict)
if mod_date is not None:
kwargs['last_modified'] = mod_date
if '/Keywords' in info_dict:
kwargs['keywords'] = str(info_dict['/Keywords']).split(',')
return model.DocumentMetadata(**kwargs)