Logo Search packages:      
Sourcecode: calibre version File versions  Download package

inline.py

import sys, os,  tempfile
from calibre.ebooks.rtf2xml import copy
"""
States.
1. default
    1. an open bracket ends this state.
    2. Text print out text. Print out any groups_in_waiting.
    3. closed bracket. Close groups
2. after an open bracket
    1. The lack of a control word ends this state.
    2. paragraph end -- close out all tags
    3. footnote beg -- close out all tags
"""
00014 class Inline:
    """
    Make inline tags within lists.
    Logic:
    """
00019     def __init__(self,
            in_file,
            bug_handler,
            copy=None,
            run_level = 1,):
        """
        Required:
            'file'--file to parse
        Optional:
            'copy'-- whether to make a copy of result for debugging
            'temp_dir' --where to output temporary results (default is
            directory from which the script is run.)
        Returns:
            nothing
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
00039     def __initiate_values(self):
        """
        Initiate all values.
        """
        self.__state_dict = {
            'default':              self.__default_func,
            'after_open_bracket':   self.__after_open_bracket_func,
        }
        self.__default_dict = {
            'ob<nu<open-brack':         self.__found_open_bracket_func,
            'tx<nu<__________'  :       self.__found_text_func,
            'tx<hx<__________'  :       self.__found_text_func,
            'tx<ut<__________'  :       self.__found_text_func,
            'mi<mk<inline-fld'  :       self.__found_text_func,
            'text'              :       self.__found_text_func,
            'cb<nu<clos-brack'  :       self.__close_bracket_func,
            'mi<mk<par-end___'  :       self.__end_para_func,
            'mi<mk<footnt-ope'  :       self.__end_para_func,
            'mi<mk<footnt-ind'  :       self.__end_para_func,
        }
        self.__after_open_bracket_dict = {
            'cb<nu<clos-brack'  :       self.__close_bracket_func,
            'tx<nu<__________'  :       self.__found_text_func,
            'tx<hx<__________'  :       self.__found_text_func,
            'tx<ut<__________'  :       self.__found_text_func,
            'text'              :       self.__found_text_func,
            'mi<mk<inline-fld'  :       self.__found_text_func,
            'ob<nu<open-brack':         self.__found_open_bracket_func,
            'mi<mk<par-end___'  :       self.__end_para_func,
            'mi<mk<footnt-ope'  :       self.__end_para_func,
            'mi<mk<footnt-ind'  :       self.__end_para_func,
            'cw<fd<field_____'  :       self.__found_field_func,
        }
        self.__state = 'default'
        self.__brac_count = 0 # do I need this?
        self.__list_inline_list = []
        self.__body_inline_list = []
        self.__groups_in_waiting_list = [0]
        self.__groups_in_waiting_body = [0]
        self.__groups_in_waiting = self.__groups_in_waiting_body
        self.__place = 'non_list'
        self.__inline_list = self.__body_inline_list
        self.__in_para = 0 #  not in paragraph
        self.__char_dict = {
        # character info => ci
        'annotation'    :       'annotation',
        'blue______'    :     'blue',
        'bold______'    :     'bold',
        'caps______'    :       'caps',
        'char-style'    :       'character-style',
        'dbl-strike'   :      'double-strike-through',
        'emboss____'    :     'emboss',
        'engrave___'    :     'engrave',
        'font-color'    :     'font-color',
        'font-down_'    :     'subscript',
        'font-size_'    :     'font-size',
        'font-style'    :     'font-style',
        'font-up___'    :     'superscript',
        'footnot-mk'    :       'footnote-marker',
        'green_____'    :     'green',
        'hidden____'    :     'hidden',
        'italics___'    :     'italics',
        'outline___'   :      'outline',
        'red_______'    :     'red',
        'shadow____'   :      'shadow',
        'small-caps'   :      'small-caps',
        'strike-thr'   :      'strike-through',
        'subscript_'    :     'subscript',
        'superscrip'    :     'superscript',
        'underlined'    :       'underlined',
        }
        self.__caps_list = ['false']
00111     def __set_list_func(self, line):
        """
        Requires:
            line--line of text
        Returns:
            nothing
        Logic:
        """
        if self.__place == 'in_list':
            if self.__token_info == 'mi<mk<lst-tx-end':
                self.__place = 'not_in_list'
                self.__inline_list = self.__body_inline_list
                self.__groups_in_waiting = self.__groups_in_waiting_body
        else:
            if self.__token_info == 'mi<mk<lst-tx-beg':
                self.__place = 'in_list'
                self.__inline_list = self.__list_inline_list
                self.__groups_in_waiting = self.__groups_in_waiting_list
00129     def __default_func(self, line):
        """
        Requires:
            line-- line of text
        Returns:
            nothing
        Logic:
        """
        action = self.__default_dict.get(self.__token_info)
        if action:
            action(line)
        self.__write_obj.write(line)
00141     def __found_open_bracket_func(self, line):
        """
        Requires:
            line -- current line of text
        Returns:
            nothing
        Logic:
            Change the state to 'after_open_bracket'
        """
        self.__state = 'after_open_bracket'
        self.__brac_count += 1
        self.__groups_in_waiting[0] += 1
        self.__inline_list.append({})
        self.__inline_list[-1]['contains_inline'] = 0
00155     def __after_open_bracket_func(self, line):
        """
        Requires:
            line --line of text
        Returns:
            nothing
        Logic:
            If the token is a control word for character info (cw<ci), use another
            method to add to the dictionary.
            Use the dictionary to get the approriate function.
            Always print out the line.
        """
        if line[0:2] == 'cw':
            self.__handle_control_word(line)
        else:
            action = self.__after_open_bracket_dict.get(self.__token_info)
            if action:
                self.__state = 'default' #  a non control word?
                action(line)
        self.__write_obj.write(line)
00175     def __handle_control_word(self, line):
        """
        Required:
            line --line of text
        Returns:
            nothing
        Logic:
            Handle the control word for inline groups.
            Add each name - value to a dictionary.
            If the font style of Symbol, Wingdings, or Dingbats is found,
            always mark this. I need this later to convert the text to
            the right utf.
        """
        # cw<ci<shadow_____<nu<true
        # self.__char_dict = {
        char_info = line[6:16]
        char_value = line[20:-1]
        name = self.__char_dict.get(char_info)
        if name:
            self.__inline_list[-1]['contains_inline'] = 1
            self.__inline_list[-1][name] = char_value
            """
            if name == 'font-style':
                if char_value == 'Symbol':
                    self.__write_obj.write('mi<mk<font-symbo\n')
                elif char_value == 'Wingdings':
                    self.__write_obj.write('mi<mk<font-wingd\n')
                elif char_value == 'Zapf Dingbats':
                    self.__write_obj.write('mi<mk<font-dingb\n')
            """
00205     def __close_bracket_func(self, line):
        """
        Requires:
            line --line of text
        Returns:
            Nothing
        Logic:
            If there are no inline groups, do nothing.
            Get the keys of the last dictionary in the inline_groups.
            If 'contains_inline' in the keys, write a close tag.
            If the_dict contains font information, write a mk tag.
        """
        if len(self.__inline_list) == 0:
            # nothing to add
            return
        the_dict = self.__inline_list[-1]
        the_keys = the_dict.keys()
        # always close out
        if self.__place == 'in_list':
            if 'contains_inline' in the_keys and the_dict['contains_inline'] == 1\
                and self.__groups_in_waiting[0] == 0:
                self.__write_obj.write('mi<tg<close_____<inline\n')
                if 'font-style' in the_keys:
                    self.__write_obj.write('mi<mk<font-end__\n')
                if 'caps' in the_keys:
                    self.__write_obj.write('mi<mk<caps-end__\n')
        else:
            # close out only if in a paragraph
            if 'contains_inline' in the_keys and the_dict['contains_inline'] == 1\
                and self.__in_para and self.__groups_in_waiting[0] == 0:
                self.__write_obj.write('mi<tg<close_____<inline\n')
                if 'font-style' in the_keys:
                    self.__write_obj.write('mi<mk<font-end__\n')
                if 'caps' in the_keys:
                    self.__write_obj.write('mi<mk<caps-end__\n')
        self.__inline_list.pop()
        if self.__groups_in_waiting[0] != 0:
            self.__groups_in_waiting[0] -= 1
00243     def __found_text_func(self, line):
        """
        Required:
            line--line of text
        Return:
            nothing
        Logic:
            Two cases:
            1. in a list. Simply write inline
            2. Not in a list
                Text can mark the start of a paragraph.
                If already in a paragraph, check to see if any groups are waiting
                to be added. If so, use another method to write these groups.
        """
        if self.__place == 'in_list':
            self.__write_inline()
        else:
            if not self.__in_para:
                self.__in_para = 1
                self.__start_para_func(line)
            else:
                if self.__groups_in_waiting[0] != 0:
                    self.__write_inline()
00266     def __write_inline(self):
        """
        Required:
            nothing
        Returns
            Nothing
        Logic:
            Method for writing inline when text is found.
            Only write those groups that are "waiting", or that have no
            tags yet.
            First, slice the list self.__inline list to get just the groups
            in waiting.
            Iterate through this slice, which contains only dictionaries.
            Get the keys in each dictionary. If 'font-style' is in the keys,
            write a marker tag. (I will use this marker tag later when conerting
            hext text to utf8.)
            Write a tag for the inline vaues.
        """
        if self.__groups_in_waiting[0] != 0:
            last_index = -1 * self.__groups_in_waiting[0]
            inline_list = self.__inline_list[last_index:]
            if len(inline_list) <= 0:
                if self.__run_level > 3:
                    msg = 'self.__inline_list is %s\n' % self.__inline_list
                    raise self.__bug_handler, msg
                self.__write_obj.write('error\n')
                self.__groups_in_waiting[0] = 0
                return
            for the_dict in inline_list:
                if the_dict['contains_inline']:
                    the_keys = the_dict.keys()
                    if 'font-style' in the_keys:
                        face = the_dict['font-style']
                        self.__write_obj.write('mi<mk<font______<%s\n' % face)
                    if 'caps' in the_keys:
                        value = the_dict['caps']
                        self.__write_obj.write('mi<mk<caps______<%s\n' % value)
                    self.__write_obj.write('mi<tg<open-att__<inline')
                    for the_key in the_keys:
                        if the_key != 'contains_inline':
                            self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                    self.__write_obj.write('\n')
        self.__groups_in_waiting[0] = 0
00309     def __end_para_func(self, line):
        """
        Requires:
            line -- line of text
        Returns:
            nothing
        Logic:
            Slice from the end the groups in waiting.
            Iterate through the list. If the dictionary contaings info, write
            a closing tag.
        """
        if not self.__in_para:
            return
        if self.__groups_in_waiting[0] == 0:
            inline_list = self.__inline_list
        else:
            last_index = -1 * self.__groups_in_waiting[0]
            inline_list = self.__inline_list[0:last_index]
        for the_dict in inline_list:
            contains_info = the_dict.get('contains_inline')
            if contains_info:
                the_keys = the_dict.keys()
                if 'font-style' in the_keys:
                    self.__write_obj.write('mi<mk<font-end__\n')
                if 'caps' in the_keys:
                    self.__write_obj.write('mi<mk<caps-end__\n')
                self.__write_obj.write('mi<tg<close_____<inline\n')
        self.__in_para = 0
00337     def __start_para_func(self, line):
        """
        Requires:
            line -- line of text
        Returns:
            nothing
        Logic:
            Iterate through the self.__inline_list to get each dict.
            If the dict containst inline info, get the keys.
            Iterate through the keys and print out the key and value.
        """
        for the_dict in self.__inline_list:
            contains_info = the_dict.get('contains_inline')
            if contains_info :
                the_keys = the_dict.keys()
                if 'font-style' in the_keys:
                    face = the_dict['font-style']
                    self.__write_obj.write('mi<mk<font______<%s\n' % face)
                if 'caps' in the_keys:
                    value = the_dict['caps']
                    self.__write_obj.write('mi<mk<caps______<%s\n' % value)
                self.__write_obj.write('mi<tg<open-att__<inline')
                for the_key in the_keys:
                    if the_key != 'contains_inline':
                        self.__write_obj.write('<%s>%s' % (the_key, the_dict[the_key]))
                self.__write_obj.write('\n')
        self.__groups_in_waiting[0] = 0
00364     def __found_field_func(self, line):
        """
        Just a default function to make sure I don't prematurely exit
        default state
        """
        pass
00370     def form_tags(self):
        """
        Requires:
            area--area to parse (list or non-list)
        Returns:
            nothing
        Logic:
            Read one line in at a time. Determine what action to take based on
            the state.
        """
        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        line_to_read = 1
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            token = line[0:-1]
            self.__token_info = ''
            if token == 'tx<mc<__________<rdblquote'\
                or token == 'tx<mc<__________<ldblquote'\
                or token == 'tx<mc<__________<lquote'\
                or token == 'tx<mc<__________<rquote'\
                or token == 'tx<mc<__________<emdash'\
                or token == 'tx<mc<__________<endash'\
                or token == 'tx<mc<__________<bullet':
                self.__token_info = 'text'
            else:
                self.__token_info = line[:16]
            self.__set_list_func(line)
            action = self.__state_dict.get(self.__state)
            if action == None:
                sys.stderr.write('No matching state in module inline_for_lists.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "inline.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)

Generated by  Doxygen 1.6.0   Back to index