Source code for cioprocessor.lib.epubize

"""ePub Generation."""

from os import walk
from os.path import exists, join, basename, relpath, dirname
from re import compile as re_compile, match, findall
from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile
from subprocess import PIPE, Popen

from lxml import etree

from chrysalio.lib.utils import mimetype_get
from chrysalio.lib.utils import load_guessing_encoding, make_digest
from chrysalio.lib.xml import load_xml
from .i18n import _


CONTAINER_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
OPF_NS = 'http://www.idpf.org/2007/opf'
REMOVE_PATTERN = r'(~|\.tmp)(\.\w{1,4})?$'


# =============================================================================
[docs]class EPubize(): """Class to transform a directory into an ePub.""" # -------------------------------------------------------------------------
[docs] def convert(self, step, epub_dir, epub_file): """Convert a dictionary into an ePub according to ``step``. :param dict step: Dictionary representing step of type `epubize`. :param str epub_dir: Absolute path to the directory representing the ePub. :param str epub_file: Absolute path to the ePub to create. :rtype: :class:`pyramid.i18n.TranslationString` or ``None`` """ # Check the structure if not exists(join(epub_dir, 'mimetype')) \ or not exists(join(epub_dir, 'META-INF', 'container.xml')): return _('Incorrect OCF structure') tree = load_xml(join(epub_dir, 'META-INF', 'container.xml')) opf_file = tree.xpath('/*/*/ns:rootfile/@full-path', namespaces={ 'ns': CONTAINER_NS}) if not opf_file: return _('Unable to find OPF file') opf_file = join(epub_dir, opf_file[0]) # Update manifest if step.get('complete-manifest') == 'true': error = self._update_manifest(epub_dir, opf_file) if error is not None: return error # Update size if step.get('fixed') == 'true': error = self._update_image_size(epub_dir, opf_file) if error is not None: return error # Create ZIP file exclude = re_compile(REMOVE_PATTERN) with ZipFile(epub_file, 'w', ZIP_DEFLATED) as zip_file: zip_file.write(join(epub_dir, 'mimetype'), 'mimetype', ZIP_STORED) for path, dirs, files in walk(epub_dir): for name in tuple(dirs): if exclude.search(name): dirs.remove(name) for name in files: if not exclude.search(name) and name != 'mimetype': try: zip_file.write( join(path, name), relpath(join(path, name), epub_dir)) except OSError: pass return None
# ------------------------------------------------------------------------- @classmethod def _update_manifest(cls, epub_dir, opf_file): """Update file list in manifest tag. :param str epub_dir: Absolute path to the directory representing the ePub. :param str opf_file: Absolute path to the OPF file. :rtype: :class:`pyramid.i18n.TranslationString` or ``None`` """ # pylint: disable = too-many-locals # Find the manifest element tree = load_xml( opf_file, parser=etree.XMLParser(remove_blank_text=True)) # pylint: disable = protected-access if not isinstance(tree, etree._ElementTree): return tree # pylint: enable = protected-access manifest_elt = tree.xpath( '/*/opf:manifest', namespaces={'opf': OPF_NS}) if not manifest_elt: return _('Manifest is missing.') manifest_elt = manifest_elt[0] # Browse declared files root = dirname(opf_file) done = set([ basename(opf_file), relpath(join(epub_dir, 'mimetype'), root)]) for elt in manifest_elt.iterchildren(tag=etree.Element): done.add(elt.get('href')) # Browse real files modified = False exclude = re_compile(REMOVE_PATTERN) for path, dirs, files in walk(epub_dir): for name in tuple(dirs): if name == 'META-INF': dirs.remove(name) for name in files: relname = relpath(join(path, name), root) if relname in done or exclude.search(name): continue if not modified: manifest_elt.append(etree.Comment('OTHER')) elt = etree.SubElement(manifest_elt, 'item') elt.set('id', 'x_{0}'.format(make_digest(relname))) elt.set('href', relname) elt.set('media-type', mimetype_get(join(path, name))[0]) modified = True # Save modified file if modified: tree.write( opf_file, encoding='utf-8', xml_declaration=True, pretty_print=True) return None # ------------------------------------------------------------------------- def _update_image_size(self, epub_dir, opf_file): """Detect the size of images in a fixed-layout ePub and replace height and width in the files. :param str epub_dir: Absolute path to the directory representing the ePub. :param str opf_file: Absolute path to the OPF file. :rtype: :class:`pyramid.i18n.TranslationString` or ``None`` """ # Compute height and width of ePub size, error = self._find_image_size(opf_file) if error is not None: return error if size['width'] == '0' or size['height'] == '0': return _('no image to figure out the ePub size') # Replace variables and SVG calls for root, ignored_, files in walk(epub_dir): for name in [ k for k in files if match('.+\\.(x?html|opf|css)', k)]: modified = False content = load_guessing_encoding(join(root, name)) # Replace ${...} for item in findall(r'\$\{([^}]+)\}', content): content = content.replace( '${{{0}}}'.format(item), size.get(item, '')) modified = True if modified: with open(join(root, name), 'wb') as hdl: hdl.write(content.encode('utf8')) return None # ------------------------------------------------------------------------- @classmethod def _find_image_size(cls, opf_file): """Find size of the biggest image. :param str opf_file: Absolute path to the OPF file. :rtype: tuple :return: A tuple such as ``(size, error)`` where ``size`` is a dictionary with keys ``'height'`` and ``'width'``. """ # Find the manifest element tree = load_xml(opf_file) # pylint: disable = protected-access if not isinstance(tree, etree._ElementTree): return {'width': '0', 'height': '0'}, tree # pylint: enable = protected-access manifest_elt = tree.xpath( '/*/opf:manifest', namespaces={'opf': OPF_NS})[0] # Browse images width = 0 height = 0 root = dirname(opf_file) for elt in manifest_elt.iterchildren(tag=etree.Element): if 'image' not in elt.get('media-type') \ or 'cover-image' in elt.get('properties', ''): continue try: with Popen( ['nice', 'identify', '-format', '%w %h', join(root, elt.get('href'))], stdout=PIPE, stderr=PIPE) as proc: result = proc.communicate()[0] except OSError: continue if not result: continue if width == 0 or width < abs(int(result.split()[0])): width = abs(int(result.split()[0])) if height == 0 or height < abs(int(result.split()[1])): height = int(result.split()[1]) return {'width': str(width), 'height': str(height)}, None