Logo Search packages:      
Sourcecode: calibre version File versions  Download package

input.py

#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement

__license__   = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

import os, textwrap
from copy import deepcopy

from lxml import etree

from calibre.customize.conversion import InputFormatPlugin
from calibre import guess_type

class Canvas(etree.XSLTExtension):

    def __init__(self, doc, styles, text_block, log):
        self.doc = doc
        self.styles = styles
        self.text_block = text_block
        self.log = log
        self.processed = set([])

    def execute(self, context, self_node, input_node, output_parent):
        cid = input_node.get('objid', None)
        if cid is None or cid in self.processed:
            return
        self.processed.add(cid)
        input_node = self.doc.xpath('//Canvas[@objid="%s"]'%cid)[0]

        objects = list(self.get_objects(input_node))
        if len(objects) == 1 and objects[0][0].tag == 'ImageBlock':
            self.image_page(input_node, objects[0][0], output_parent)
        else:
            canvases = [input_node]
            for x in input_node.itersiblings():
                if x.tag == 'Canvas':
                    oid = x.get('objid', None)
                    if oid is not None:
                        canvases.append(x)
                        self.processed.add(oid)
                else:
                    break


            table = etree.Element('table')
            table.text = '\n\t'
            for canvas in canvases:
                oid = canvas.get('objid')
                tr = table.makeelement('tr')
                tr.set('id', oid)
                tr.tail = '\n\t'
                table.append(tr)
                for obj, x, y in self.get_objects(canvas):
                    if obj.tag != 'TextBlock':
                        self.log.warn(obj.tag, 'elements in Canvas not supported')
                        continue
                    td = table.makeelement('td')
                    self.text_block.render_block(obj, td)
                    tr.append(td)
            output_parent.append(table)

    def image_page(self, input_node, block, output_parent):
        div = etree.Element('div')
        div.set('id', input_node.get('objid', 'scuzzy'))
        div.set('class', 'image_page')
        width = self.styles.to_num(block.get("xsize", None))
        height = self.styles.to_num(block.get("ysize", None))
        img = div.makeelement('img')
        if width is not None:
            img.set('width', str(int(width)))
        if height is not None:
            img.set('height', str(int(height)))
        ref = block.get('refstream', None)
        if ref is not None:
            imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref)
            if imstr:
                src = imstr[0].get('file', None)
                if src:
                    img.set('src', src)
        div.append(img)
        output_parent.append(div)


    def get_objects(self, node):
        for x in node.xpath('descendant::PutObj[@refobj and @x1 and @y1]'):
            objs = node.xpath('//*[@objid="%s"]'%x.get('refobj'))
            x, y = map(self.styles.to_num, (x.get('x1'), x.get('y1')))
            if objs and x is not None and y is not None:
                yield objs[0], int(x), int(y)


class MediaType(etree.XSLTExtension):
    def execute(self, context, self_node, input_node, output_parent):
        name = input_node.get('file', None)
        typ = guess_type(name)[0]
        if not typ:
            typ = 'application/octet-stream'
        output_parent.text = typ

class ImageBlock(etree.XSLTExtension):

    def __init__(self, canvas):
        etree.XSLTExtension.__init__(self)
        self.canvas = canvas

    def execute(self, context, self_node, input_node, output_parent):
        self.canvas.image_page(input_node, input_node, output_parent)


class RuledLine(etree.XSLTExtension):

    def execute(self, context, self_node, input_node, output_parent):
        hr = etree.Element('hr')
        output_parent.append(hr)


class TextBlock(etree.XSLTExtension):

    def __init__(self, styles, char_button_map, plot_map, log):
        etree.XSLTExtension.__init__(self)
        self.styles = styles
        self.log = log
        self.char_button_map = char_button_map
        self.plot_map = plot_map

    def execute(self, context, self_node, input_node, output_parent):
        input_node = deepcopy(input_node)
        div = etree.Element('div')
        self.render_block(input_node, div)
        output_parent.append(div)

    def render_block(self, node, root):
        ts = node.get('textstyle', None)
        classes = []
        bs = node.get('blockstyle')
        if bs in self.styles.block_style_map:
            classes.append('bs%d'%self.styles.block_style_map[bs])
        if ts in self.styles.text_style_map:
            classes.append('ts%d'%self.styles.text_style_map[ts])
        if classes:
            root.set('class', ' '.join(classes))
        objid = node.get('objid', None)
        if objid:
            root.set('id', objid)
        root.text = node.text
        self.root = root
        self.parent = root
        self.add_text_to = (self.parent, 'text')
        for child in node:
            self.process_child(child)

    def add_text(self, text):
        if text:
            if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
                setattr(self.add_text_to[0], self.add_text_to[1], '')
            setattr(self.add_text_to[0], self.add_text_to[1],
                    getattr(self.add_text_to[0], self.add_text_to[1])+ text)

    def process_container(self, child, tgt):
        idx = self.styles.get_text_styles(child)
        if idx is not None:
            tgt.set('class', 'ts%d'%idx)
        self.parent.append(tgt)
        orig_parent = self.parent
        self.parent = tgt
        self.add_text_to = (self.parent, 'text')
        self.add_text(child.text)
        for gchild in child:
            self.process_child(gchild)
        self.parent = orig_parent
        self.add_text_to = (tgt, 'tail')
        self.add_text(child.tail)

    def process_child(self, child):
        if child.tag == 'CR':
            if self.parent == self.root:
                self.parent = self.root.makeelement('p')
                self.root.append(self.parent)
                self.add_text_to = (self.parent, 'text')
            else:
                br = self.parent.makeelement('br')
                self.parent.append(br)
                self.add_text_to = (br, 'tail')
            self.add_text(child.tail)
        elif child.tag in ('P', 'Span', 'EmpLine', 'NoBR'):
            span = self.root.makeelement('span')
            if child.tag == 'EmpLine':
                td = 'underline' if child.get('emplineposition', 'before') == 'before' else 'overline'
                span.set('style', 'text-decoration: '+td)
            self.process_container(child, span)
        elif child.tag == 'Sup':
            sup = self.root.makeelement('sup')
            self.process_container(child, sup)
        elif child.tag == 'Sub':
            sub = self.root.makeelement('sub')
            self.process_container(child, sub)
        elif child.tag == 'Italic':
            sup = self.root.makeelement('i')
            self.process_container(child, sup)
        elif child.tag == 'CharButton':
            a = self.root.makeelement('a')
            oid = child.get('refobj', None)
            if oid in self.char_button_map:
                a.set('href', self.char_button_map[oid])
            self.process_container(child, a)
        elif child.tag == 'Plot':
            xsize = self.styles.to_num(child.get('xsize', None), 166./720)
            ysize = self.styles.to_num(child.get('ysize', None), 166./720)
            img = self.root.makeelement('img')
            if xsize is not None:
                img.set('width', str(int(xsize)))
            if ysize is not None:
                img.set('height', str(int(ysize)))
            ro = child.get('refobj', None)
            if ro in self.plot_map:
                img.set('src', self.plot_map[ro])
            self.parent.append(img)
            self.add_text_to = (img, 'tail')
            self.add_text(child.tail)
        else:
            self.log.warn('Unhandled Text element:', child.tag)


class Styles(etree.XSLTExtension):

    def __init__(self):
        etree.XSLTExtension.__init__(self)
        self.text_styles, self.block_styles = [], []
        self.text_style_map, self.block_style_map = {}, {}
        self.CSS = textwrap.dedent('''
        .image_page { text-align:center }
        ''')

    def write(self, name='styles.css'):

        def join(style):
            ans = ['%s : %s;'%(k, v) for k, v in style.items()]
            if ans:
                ans[-1] = ans[-1][:-1]
            return '\n\t'.join(ans)

        with open(name, 'wb') as f:
            f.write(self.CSS)
            for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
                'bs')]:
                for i, s in enumerate(w):
                    if not s:
                        continue
                    rsel = '.%s%d'%(sel, i)
                    s = join(s)
                    f.write(rsel + ' {\n\t' + s + '\n}\n\n')



    def execute(self, context, self_node, input_node, output_parent):
        if input_node.tag == 'TextStyle':
            idx = self.get_text_styles(input_node)
            if idx is not None:
                self.text_style_map[input_node.get('objid')] = idx
        else:
            idx = self.get_block_styles(input_node)
            self.block_style_map[input_node.get('objid')] = idx

    def px_to_pt(self, px):
        try:
            px = float(px)
            return px * 72./166.
        except:
            return None

    def color(self, val):
        try:
            val = int(val, 16)
            r, g, b, a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
            if a == 255:
                return None
            if a == 0:
                return 'rgb(%d,%d,%d)'%(r,g,b)
            return 'rgba(%d,%d,%d,%f)'%(r,g,b,1.-a/255.)
        except:
            return None

    def get_block_styles(self, node):
        ans = {}
        sm = self.px_to_pt(node.get('sidemargin', None))
        if sm is not None:
            ans['margin-left'] = ans['margin-right'] = '%fpt'%sm
        ts = self.px_to_pt(node.get('topskip', None))
        if ts is not None:
            ans['margin-top'] = '%fpt'%ts
        fs = self.px_to_pt(node.get('footskip', None))
        if fs is not None:
            ans['margin-bottom'] = '%fpt'%fs
        fw = self.px_to_pt(node.get('framewidth', None))
        if fw is not None:
            ans['border-width'] = '%fpt'%fw
            ans['border-style'] = 'solid'
        fc = self.color(node.get('framecolor', None))
        if fc is not None:
            ans['border-color'] = fc
        bc = self.color(node.get('bgcolor', None))
        if bc is not None:
            ans['background-color'] = bc
        if ans not in self.block_styles:
            self.block_styles.append(ans)
        return self.block_styles.index(ans)

    def to_num(self, val, factor=1.):
        try:
            return float(val)*factor
        except:
            return None

    def get_text_styles(self, node):
        ans = {}
        fs = self.to_num(node.get('fontsize', None), 0.1)
        if fs is not None:
            ans['font-size'] = '%fpt'%fs
        fw = self.to_num(node.get('fontweight', None))
        if fw is not None:
            ans['font-weight'] = ('bold' if fw >= 700 else 'normal')
        #fn = getattr(obj, 'fontfacename', None)
        #if fn is not None:
        #    fn = cls.FONT_MAP[fn]
        #    item('font-family: %s;'%fn)
        fg = self.color(node.get('textcolor', None))
        if fg is not None:
            ans['color'] = fg
        bg = self.color(node.get('textbgcolor', None))
        if bg is not None:
            ans['background-color'] = bg
        al = node.get('align', None)
        if al is not None:
            all = dict(head='left', center='center', foot='right')
            ans['text-align'] = all.get(al, 'left')
        #lh = self.to_num(node.get('linespace', None), 0.1)
        #if lh is not None:
        #    ans['line-height'] = '%fpt'%lh
        pi = self.to_num(node.get('parindent', None), 0.1)
        if pi is not None:
            ans['text-indent'] = '%fpt'%pi
        if not ans:
            return None
        if ans not in self.text_styles:
            self.text_styles.append(ans)
        return self.text_styles.index(ans)



class LRFInput(InputFormatPlugin):

    name        = 'LRF Input'
    author      = 'Kovid Goyal'
    description = 'Convert LRF files to HTML'
    file_types  = set(['lrf'])

    def convert(self, stream, options, file_ext, log,
                accelerators):
        self.log = log
        self.log('Generating XML')
        from calibre.ebooks.lrf.lrfparser import LRFDocument
        d = LRFDocument(stream)
        d.parse()
        xml = d.to_xml(write_files=True)
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.fromstring(xml, parser=parser)
        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
            ro = x.get('refobj')
            jump_button = doc.xpath('//*[@objid="%s"]'%ro)
            if jump_button:
                jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
                if jump_to:
                    char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
                            jump_to[0].get('refobj'))
        plot_map = {}
        for x in doc.xpath('//Plot[@refobj]'):
            ro = x.get('refobj')
            image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
            if image:
                imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
                    image[0].get('refstream'))
                if imgstr:
                    plot_map[ro] = imgstr[0].get('file')

        self.log('Converting XML to HTML...')
        styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
        media_type = MediaType()
        styles = Styles()
        text_block = TextBlock(styles, char_button_map, plot_map, log)
        canvas = Canvas(doc, styles, text_block, log)
        image_block = ImageBlock(canvas)
        ruled_line = RuledLine()
        extensions = {
                ('calibre', 'media-type') : media_type,
                ('calibre', 'text-block') : text_block,
                ('calibre', 'ruled-line') : ruled_line,
                ('calibre', 'styles')     : styles,
                ('calibre', 'canvas')     : canvas,
                ('calibre', 'image-block'): image_block,
                }
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        with open('content.opf', 'wb') as f:
            f.write(result)
        styles.write()
        return os.path.abspath('content.opf')



Generated by  Doxygen 1.6.0   Back to index