"""ePub Generation."""
from os import walk
from os.path import exists, join, basename, relpath, dirname
from re import compile as re_compile, match, findall
from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile
from subprocess import PIPE, Popen
from lxml import etree
from chrysalio.lib.utils import mimetype_get
from chrysalio.lib.utils import load_guessing_encoding, make_digest
from chrysalio.lib.xml import load_xml
from .i18n import _
CONTAINER_NS = 'urn:oasis:names:tc:opendocument:xmlns:container'
OPF_NS = 'http://www.idpf.org/2007/opf'
REMOVE_PATTERN = r'(~|\.tmp)(\.\w{1,4})?$'
# =============================================================================
[docs]class EPubize():
"""Class to transform a directory into an ePub."""
# -------------------------------------------------------------------------
[docs] def convert(self, step, epub_dir, epub_file):
"""Convert a dictionary into an ePub according to ``step``.
:param dict step:
Dictionary representing step of type `epubize`.
:param str epub_dir:
Absolute path to the directory representing the ePub.
:param str epub_file:
Absolute path to the ePub to create.
:rtype: :class:`pyramid.i18n.TranslationString` or ``None``
"""
# Check the structure
if not exists(join(epub_dir, 'mimetype')) \
or not exists(join(epub_dir, 'META-INF', 'container.xml')):
return _('Incorrect OCF structure')
tree = load_xml(join(epub_dir, 'META-INF', 'container.xml'))
opf_file = tree.xpath('/*/*/ns:rootfile/@full-path', namespaces={
'ns': CONTAINER_NS})
if not opf_file:
return _('Unable to find OPF file')
opf_file = join(epub_dir, opf_file[0])
# Update manifest
if step.get('complete-manifest') == 'true':
error = self._update_manifest(epub_dir, opf_file)
if error is not None:
return error
# Update size
if step.get('fixed') == 'true':
error = self._update_image_size(epub_dir, opf_file)
if error is not None:
return error
# Create ZIP file
exclude = re_compile(REMOVE_PATTERN)
with ZipFile(epub_file, 'w', ZIP_DEFLATED) as zip_file:
zip_file.write(join(epub_dir, 'mimetype'), 'mimetype', ZIP_STORED)
for path, dirs, files in walk(epub_dir):
for name in tuple(dirs):
if exclude.search(name):
dirs.remove(name)
for name in files:
if not exclude.search(name) and name != 'mimetype':
try:
zip_file.write(
join(path, name),
relpath(join(path, name), epub_dir))
except OSError:
pass
return None
# -------------------------------------------------------------------------
@classmethod
def _update_manifest(cls, epub_dir, opf_file):
"""Update file list in manifest tag.
:param str epub_dir:
Absolute path to the directory representing the ePub.
:param str opf_file:
Absolute path to the OPF file.
:rtype: :class:`pyramid.i18n.TranslationString` or ``None``
"""
# pylint: disable = too-many-locals
# Find the manifest element
tree = load_xml(
opf_file, parser=etree.XMLParser(remove_blank_text=True))
# pylint: disable = protected-access
if not isinstance(tree, etree._ElementTree):
return tree
# pylint: enable = protected-access
manifest_elt = tree.xpath(
'/*/opf:manifest', namespaces={'opf': OPF_NS})
if not manifest_elt:
return _('Manifest is missing.')
manifest_elt = manifest_elt[0]
# Browse declared files
root = dirname(opf_file)
done = set([
basename(opf_file), relpath(join(epub_dir, 'mimetype'), root)])
for elt in manifest_elt.iterchildren(tag=etree.Element):
done.add(elt.get('href'))
# Browse real files
modified = False
exclude = re_compile(REMOVE_PATTERN)
for path, dirs, files in walk(epub_dir):
for name in tuple(dirs):
if name == 'META-INF':
dirs.remove(name)
for name in files:
relname = relpath(join(path, name), root)
if relname in done or exclude.search(name):
continue
if not modified:
manifest_elt.append(etree.Comment('OTHER'))
elt = etree.SubElement(manifest_elt, 'item')
elt.set('id', 'x_{0}'.format(make_digest(relname)))
elt.set('href', relname)
elt.set('media-type', mimetype_get(join(path, name))[0])
modified = True
# Save modified file
if modified:
tree.write(
opf_file, encoding='utf-8', xml_declaration=True,
pretty_print=True)
return None
# -------------------------------------------------------------------------
def _update_image_size(self, epub_dir, opf_file):
"""Detect the size of images in a fixed-layout ePub and replace
height and width in the files.
:param str epub_dir:
Absolute path to the directory representing the ePub.
:param str opf_file:
Absolute path to the OPF file.
:rtype: :class:`pyramid.i18n.TranslationString` or ``None``
"""
# Compute height and width of ePub
size, error = self._find_image_size(opf_file)
if error is not None:
return error
if size['width'] == '0' or size['height'] == '0':
return _('no image to figure out the ePub size')
# Replace variables and SVG calls
for root, ignored_, files in walk(epub_dir):
for name in [
k for k in files if match('.+\\.(x?html|opf|css)', k)]:
modified = False
content = load_guessing_encoding(join(root, name))
# Replace ${...}
for item in findall(r'\$\{([^}]+)\}', content):
content = content.replace(
'${{{0}}}'.format(item), size.get(item, ''))
modified = True
if modified:
with open(join(root, name), 'wb') as hdl:
hdl.write(content.encode('utf8'))
return None
# -------------------------------------------------------------------------
@classmethod
def _find_image_size(cls, opf_file):
"""Find size of the biggest image.
:param str opf_file:
Absolute path to the OPF file.
:rtype: tuple
:return:
A tuple such as ``(size, error)`` where ``size`` is a dictionary
with keys ``'height'`` and ``'width'``.
"""
# Find the manifest element
tree = load_xml(opf_file)
# pylint: disable = protected-access
if not isinstance(tree, etree._ElementTree):
return {'width': '0', 'height': '0'}, tree
# pylint: enable = protected-access
manifest_elt = tree.xpath(
'/*/opf:manifest', namespaces={'opf': OPF_NS})[0]
# Browse images
width = 0
height = 0
root = dirname(opf_file)
for elt in manifest_elt.iterchildren(tag=etree.Element):
if 'image' not in elt.get('media-type') \
or 'cover-image' in elt.get('properties', ''):
continue
try:
with Popen(
['nice', 'identify', '-format', '%w %h',
join(root, elt.get('href'))],
stdout=PIPE, stderr=PIPE) as proc:
result = proc.communicate()[0]
except OSError:
continue
if not result:
continue
if width == 0 or width < abs(int(result.split()[0])):
width = abs(int(result.split()[0]))
if height == 0 or height < abs(int(result.split()[1])):
height = int(result.split()[1])
return {'width': str(width), 'height': str(height)}, None