Logo Search packages:      
Sourcecode: calibre version File versions  Download package

preamble_div.py

#########################################################################
#                                                                       #
#                                                                       #
#   copyright 2002 Paul Henry Tremblay                                  #
#                                                                       #
#   This program is distributed in the hope that it will be useful,     #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
#   General Public License for more details.                            #
#                                                                       #
#   You should have received a copy of the GNU General Public License   #
#   along with this program; if not, write to the Free Software         #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
#   02111-1307 USA                                                      #
#                                                                       #
#                                                                       #
#########################################################################
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy, override_table, list_table
00020 class PreambleDiv:
    """
    Break the preamble into divisions.
    """
00024     def __init__(self, in_file,
            bug_handler,
            copy = None,
            no_namespace = None,
            run_level = 1,
            ):
        """
        Required:
            'file'
        Optional:
            'copy'-- whether to make a copy of result for debugging
            'temp_dir' --where to output temporary results (default is
            directory from which the script is run.)
        Returns:
            nothing
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__no_namespace = no_namespace
        self.__write_to = tempfile.mktemp()
        self.__run_level = run_level
00046     def __initiate_values(self):
        """
        Set values, including those for the dictionary.
        """
        self.__all_lists = {}
        self.__page = {
        'margin-top'    : 72,
        'margin-bottom' : 72,
        'margin-left'   : 90,
        'margin-right'  : 90,
        'gutter'        : 0,
        }
        self.__cb_count = ''
        self.__ob_count = ''
        self.__state = 'preamble'
        self.__rtf_final = ''
        self.__close_group_count = ''
        self.__found_font_table = 0
        self.__list_table_final = ''
        self.__override_table_final = ''
        self.__revision_table_final = ''
        self.__doc_info_table_final = ''
        self.__state_dict = {
        'default'           :   self.__default_func,
        'rtf_header'        :   self.__rtf_head_func,
        'preamble'          :   self.__preamble_func,
        'font_table'        :   self.__font_table_func,
        'color_table'       :   self.__color_table_func,
        'style_sheet'       :   self.__style_sheet_func,
        'list_table'        :   self.__list_table_func,
        'override_table'    :   self.__override_table_func,
        'revision_table'    :   self.__revision_table_func,
        'doc_info'          :   self.__doc_info_func,
        'body'              :   self.__body_func,
        'ignore'            :   self.__ignore_func,
        'cw<ri<rtf_______'  :   self.__found_rtf_head_func,
        'cw<pf<par-def___'  :   self.__para_def_func,
        'tx<nu<__________'  :   self.__text_func,
        'cw<tb<row-def___'  :   self.__row_def_func,
        'cw<sc<section___'  :   self.__new_section_func,
        'cw<sc<sect-defin'  :   self.__new_section_func,
        'cw<it<font-table'  :   self.__found_font_table_func,
        'cw<it<colr-table'  :   self.__found_color_table_func,
        'cw<ss<style-shet'  :   self.__found_style_sheet_func,
        'cw<it<listtable_'  :   self.__found_list_table_func,
        'cw<it<lovr-table'  :   self.__found_override_table_func,
        'cw<it<revi-table'  :   self.__found_revision_table_func,
        'cw<di<doc-info__'  :   self.__found_doc_info_func,
        'cw<pa<margin-lef'  :   self.__margin_func,
        'cw<pa<margin-rig'  :   self.__margin_func,
        'cw<pa<margin-top'  :   self.__margin_func,
        'cw<pa<margin-bot'  :   self.__margin_func,
        'cw<pa<gutter____'  :   self.__margin_func,
        'cw<pa<paper-widt'  :   self.__margin_func,
        'cw<pa<paper-hght'  :   self.__margin_func,
        # 'cw<tb<columns___'  :   self.__section_func,
        }
        self.__margin_dict = {
        'margin-lef'        :   'margin-left',
        'margin-rig'        :   'margin-right',
        'margin-top'        :   'margin-top',
        'margin-bot'        :   'margin-bottom',
        'gutter____'        :   'gutter',
        'paper-widt'        :   'paper-width',
        'paper-hght'        :   'paper-height',
        }
        self.__translate_sec = {
        'columns___'        :   'column',
        }
        self.__section = {}
        # self.__write_obj.write(self.__color_table_final)
        self.__color_table_final = ''
        self.__style_sheet_final = ''
        self.__individual_font = 0
        self.__old_font = 0
        self.__ob_group = 0 # depth of group
        self.__font_table_final = 0
        self.__list_table_obj = list_table.ListTable(
                run_level = self.__run_level,
                bug_handler = self.__bug_handler,
                )
00127     def __ignore_func(self, line):
        """
        Ignore all  lines, until the bracket is found that marks the end of
        the group.
        """
        if self.__ignore_num == self.__cb_count:
            self.__state = self.__previous_state
    def __found_rtf_head_func(self, line):
        self.__state = 'rtf_header'
    def __rtf_head_func(self, line):
        if self.__ob_count == '0002':
            self.__rtf_final = (
            'mi<mk<rtfhed-beg\n' +
            self.__rtf_final +
            'mi<mk<rtfhed-end\n'
            )
            self.__state = 'preamble'
        elif self.__token_info == 'tx<nu<__________' or \
            self.__token_info == 'cw<pf<par-def___':
            self.__state = 'body'
            self.__rtf_final = (
            'mi<mk<rtfhed-beg\n' +
            self.__rtf_final +
            'mi<mk<rtfhed-end\n'
            )
            self.__make_default_font_table()
            self.__write_preamble()
            self.__write_obj.write(line)
        else:
            self.__rtf_final = self.__rtf_final + line
00157     def __make_default_font_table(self):
        """
        If not font table is fount, need to write one out.
        """
        self.__font_table_final = 'mi<tg<open______<font-table\n'
        self.__font_table_final += 'mi<mk<fonttb-beg\n'
        self.__font_table_final += 'mi<mk<fontit-beg\n'
        self.__font_table_final += 'cw<ci<font-style<nu<0\n'
        self.__font_table_final += 'tx<nu<__________<Times;\n'
        self.__font_table_final += 'mi<mk<fontit-end\n'
        self.__font_table_final +=  'mi<mk<fonttb-end\n'
        self.__font_table_final += 'mi<tg<close_____<font-table\n'
00169     def __make_default_color_table(self):
        """
        If no color table is found, write a string for a default one
        """
        self.__color_table_final = 'mi<tg<open______<color-table\n'
        self.__color_table_final += 'mi<mk<clrtbl-beg\n'
        self.__color_table_final += 'cw<ci<red_______<nu<00\n'
        self.__color_table_final += 'cw<ci<green_____<nu<00\n'
        self.__color_table_final += 'cw<ci<blue______<en<00\n'
        self.__color_table_final += 'mi<mk<clrtbl-end\n'
        self.__color_table_final += 'mi<tg<close_____<color-table\n'
00180     def __make_default_style_table(self):
        """
        If not font table is found, make a string for a default one
        """
        """
        self.__style_sheet_final = 'mi<tg<open______<style-table\n'
        self.__style_sheet_final +=
        self.__style_sheet_final +=
        self.__style_sheet_final +=
        self.__style_sheet_final +=
        self.__style_sheet_final +=
        self.__style_sheet_final += 'mi<tg<close_____<style-table\n'
        """
        self.__style_sheet_final = """mi<tg<open______<style-table
mi<mk<styles-beg
mi<mk<stylei-beg
cw<ci<font-style<nu<0
tx<nu<__________<Normal;
mi<mk<stylei-end
mi<mk<stylei-beg
cw<ss<char-style<nu<0
tx<nu<__________<Default Paragraph Font;
mi<mk<stylei-end
mi<mk<styles-end
mi<tg<close_____<style-table
"""
    def __found_font_table_func(self, line):
        if self.__found_font_table:
            self.__state = 'ignore'
        else:
            self.__state = 'font_table'
            self.__font_table_final = ''
        self.__close_group_count = self.__ob_count
        self.__cb_count = 0
        self.__found_font_table = 1
00215     def __font_table_func(self, line):
        """
        Keep adding to the self.__individual_font string until end of group
        found. If a bracket is found, check that it is only one bracket deep.
        If it is, then set the marker for an individual font. If it is not,
        then ignore all data in this group.
cw<ci<font-style<nu<0
        """
        if self.__cb_count == self.__close_group_count:
            self.__state = 'preamble'
            self.__font_table_final = 'mi<tg<open______<font-table\n' + \
            'mi<mk<fonttb-beg\n' + self.__font_table_final
            self.__font_table_final += \
            'mi<mk<fonttb-end\n' + 'mi<tg<close_____<font-table\n'
        elif self.__token_info == 'ob<nu<open-brack':
            if int(self.__ob_count) == int(self.__close_group_count) + 1:
                self.__font_table_final +=  \
                'mi<mk<fontit-beg\n'
                self.__individual_font = 1
            else:
                # ignore
                self.__previous_state = 'font_table'
                self.__state = 'ignore'
                self.__ignore_num = self.__ob_count
        elif self.__token_info == 'cb<nu<clos-brack':
            if int(self.__cb_count) == int(self.__close_group_count) + 1:
                self.__individual_font = 0
                self.__font_table_final +=  \
                'mi<mk<fontit-end\n'
        elif self.__individual_font:
            if self.__old_font and self.__token_info == 'tx<nu<__________':
                if ';' in line:
                    self.__font_table_final +=  line
                    self.__font_table_final +=   'mi<mk<fontit-end\n'
                    self.__individual_font = 0
            else:
                self.__font_table_final +=  line
        elif self.__token_info == 'cw<ci<font-style':
            self.__old_font = 1
            self.__individual_font = 1
            self.__font_table_final +=   'mi<mk<fontit-beg\n'
            self.__font_table_final +=  line
00257     def __old_font_func(self, line):
        """
        Required:
            line --line to parse
        Returns:
            nothing
        Logic:
            used for older forms of RTF:
            \f3\fswiss\fcharset77 Helvetica-Oblique;\f4\fnil\fcharset77 Geneva;}
            Note how each font is not divided by a bracket
        """
00268     def __found_color_table_func(self, line):
        """
        all functions that start with __found operate the same. They set the
        state, initiate a string, determine the self.__close_group_count, and
        set self.__cb_count to zero.
        """
        self.__state = 'color_table'
        self.__color_table_final = ''
        self.__close_group_count = self.__ob_count
        self.__cb_count = 0
    def __color_table_func(self, line):
        if int(self.__cb_count) == int(self.__close_group_count):
            self.__state = 'preamble'
            self.__color_table_final = 'mi<tg<open______<color-table\n' + \
            'mi<mk<clrtbl-beg\n' + self.__color_table_final
            self.__color_table_final += \
            'mi<mk<clrtbl-end\n' + 'mi<tg<close_____<color-table\n'
        else:
            self.__color_table_final += line
    def __found_style_sheet_func(self, line):
        self.__state = 'style_sheet'
        self.__style_sheet_final = ''
        self.__close_group_count = self.__ob_count
        self.__cb_count = 0
00292     def __style_sheet_func(self, line):
        """
        Same logic as the  font_table_func.
        """
        if self.__cb_count == self.__close_group_count:
            self.__state = 'preamble'
            self.__style_sheet_final = 'mi<tg<open______<style-table\n' + \
            'mi<mk<styles-beg\n' + self.__style_sheet_final
            self.__style_sheet_final += \
            'mi<mk<styles-end\n' + 'mi<tg<close_____<style-table\n'
        elif self.__token_info == 'ob<nu<open-brack':
            if int(self.__ob_count) == int(self.__close_group_count) + 1:
                self.__style_sheet_final +=  \
                'mi<mk<stylei-beg\n'
        elif self.__token_info == 'cb<nu<clos-brack':
            if int(self.__cb_count) == int(self.__close_group_count) + 1:
                self.__style_sheet_final +=  \
                'mi<mk<stylei-end\n'
        else:
            self.__style_sheet_final +=  line
    def __found_list_table_func(self, line):
        self.__state = 'list_table'
        self.__list_table_final = ''
        self.__close_group_count = self.__ob_count
        self.__cb_count = 0
    def __list_table_func(self, line):
        if self.__cb_count == self.__close_group_count:
            self.__state = 'preamble'
            self.__list_table_final, self.__all_lists =\
                self.__list_table_obj.parse_list_table(
                self.__list_table_final)
            # sys.stderr.write(repr(all_lists))
        elif self.__token_info == '':
            pass
        else:
            self.__list_table_final += line
            pass
    def __found_override_table_func(self, line):
        self.__override_table_obj = override_table.OverrideTable(
            run_level = self.__run_level,
            list_of_lists = self.__all_lists,
            )
        self.__state = 'override_table'
        self.__override_table_final = ''
        self.__close_group_count = self.__ob_count
        self.__cb_count = 0
        # cw<it<lovr-table
    def __override_table_func(self, line):
        if self.__cb_count == self.__close_group_count:
            self.__state = 'preamble'
            self.__override_table_final, self.__all_lists =\
                self.__override_table_obj.parse_override_table(self.__override_table_final)
        elif self.__token_info == '':
            pass
        else:
            self.__override_table_final += line
    def __found_revision_table_func(self, line):
        self.__state = 'revision_table'
        self.__revision_table_final = ''
        self.__close_group_count = self.__ob_count
        self.__cb_count = 0
    def __revision_table_func(self, line):
        if int(self.__cb_count) == int(self.__close_group_count):
            self.__state = 'preamble'
            self.__revision_table_final = 'mi<tg<open______<revision-table\n' + \
            'mi<mk<revtbl-beg\n' + self.__revision_table_final
            self.__revision_table_final += \
            'mi<mk<revtbl-end\n' + 'mi<tg<close_____<revision-table\n'
        else:
            self.__revision_table_final += line
    def __found_doc_info_func(self, line):
        self.__state = 'doc_info'
        self.__doc_info_table_final = ''
        self.__close_group_count = self.__ob_count
        self.__cb_count = 0
    def __doc_info_func(self, line):
        if self.__cb_count == self.__close_group_count:
            self.__state = 'preamble'
            self.__doc_info_table_final = 'mi<tg<open______<doc-information\n' + \
            'mi<mk<doc-in-beg\n' + self.__doc_info_table_final
            self.__doc_info_table_final += \
            'mi<mk<doc-in-end\n' + 'mi<tg<close_____<doc-information\n'
        elif self.__token_info == 'ob<nu<open-brack':
            if int(self.__ob_count) == int(self.__close_group_count) + 1:
                self.__doc_info_table_final +=  \
                'mi<mk<docinf-beg\n'
        elif self.__token_info == 'cb<nu<clos-brack':
            if int(self.__cb_count) == int(self.__close_group_count) + 1:
                self.__doc_info_table_final +=  \
                'mi<mk<docinf-end\n'
        else:
            self.__doc_info_table_final +=  line
00384     def __margin_func(self, line):
        """
        Handles lines that describe page info. Add the apporpriate info in the
        token to the self.__margin_dict dicitonary.
        """
        info = line[6:16]
        changed = self.__margin_dict.get(info)
        if changed == None:
            print 'woops!'
        else:
            self.__page[changed] = line[20:-1]
        #cw<pa<margin-lef<nu<1728
    def __print_page_info(self):
        self.__write_obj.write('mi<tg<empty-att_<page-definition')
        for key in self.__page.keys():
            self.__write_obj.write(
            '<%s>%s' % (key, self.__page[key])
            )
        self.__write_obj.write('\n')
#mi<tg<open-att__<footn
00404     def __print_sec_info(self):
        """
        Check if there is any section info. If so, print it out.
        If not, print out an empty tag to satisfy the dtd.
        """
        if len(self.__section.keys()) == 0:
            self.__write_obj.write(
            'mi<tg<open______<section-definition\n'
                    )
        else:
            self.__write_obj.write(
            'mi<tg<open-att__<section-definition')
            keys = self.__section.keys()
            for key in keys:
                self.__write_obj.write(
                '<%s>%s' %  (key, self.__section[key])
                )
            self.__write_obj.write('\n')
00422     def __section_func(self, line):
        """
        Add info pertaining to section to the self.__section dictionary, to be
        printed out later.
        """
        info = self.__translate_sec.get(line[6:16])
        if info == None:
            sys.stderr.write ('woops!\n')
        else:
            self.__section[info] = 'true'
    def __body_func(self, line):
        self.__write_obj.write(line)
    def __default_func(self, line):
        # either in preamble or in body
        pass
    def __para_def_func(self, line):
        # if self.__ob_group == 1
        # this tells dept of group
        if self.__cb_count == '0002':
            self.__state = 'body'
            self.__write_preamble()
        self.__write_obj.write(line)
00444     def __text_func(self, line):
        """
        If the cb_count is less than 1, you have hit the body
        For older RTF
        Newer RTF should never have to use this function
        """
        if self.__cb_count == '':
            cb_count = '0002'
        else:
            cb_count = self.__cb_count
        # ignore previous lines
        # should be
        # if self.__ob_group == 1
        # this tells dept of group
        if cb_count == '0002':
            self.__state = 'body'
            self.__write_preamble()
        self.__write_obj.write(line)
    def __row_def_func(self, line):
        # if self.__ob_group == 1
        # this tells dept of group
        if self.__cb_count == '0002':
            self.__state = 'body'
            self.__write_preamble()
        self.__write_obj.write(line)
00469     def __new_section_func(self, line):
        """
        This is new. The start of a section marks the end of the preamble
        """
        if self.__cb_count == '0002':
            self.__state = 'body'
            self.__write_preamble()
        else:
            sys.stderr.write('module is preamble_div\n')
            sys.stderr.write('method is __new_section_func\n')
            sys.stderr.write('bracket count should be 2?\n')
        self.__write_obj.write(line)
00481     def __write_preamble(self):
        """
        Write all the strings, which represent all the data in the preamble.
        Write a body and section beginning.
        """
        if self.__no_namespace:
            self.__write_obj.write(
                'mi<tg<open______<doc\n'
                    )
        else:
            self.__write_obj.write(
                    'mi<tg<open-att__<doc<xmlns>http://rtf2xml.sourceforge.net/\n')
        self.__write_obj.write('mi<tg<open______<preamble\n')
        self.__write_obj.write(self.__rtf_final)
        if not self.__color_table_final:
            self.__make_default_color_table()
        if not self.__font_table_final:
            self.__make_default_font_table()
        self.__write_obj.write(self.__font_table_final)
        self.__write_obj.write(self.__color_table_final)
        if not self.__style_sheet_final:
            self.__make_default_style_table()
        self.__write_obj.write(self.__style_sheet_final)
        self.__write_obj.write(self.__list_table_final)
        self.__write_obj.write(self.__override_table_final)
        self.__write_obj.write(self.__revision_table_final)
        self.__write_obj.write(self.__doc_info_table_final)
        self.__print_page_info()
        self.__write_obj.write('ob<nu<open-brack<0001\n')
        self.__write_obj.write('ob<nu<open-brack<0002\n')
        self.__write_obj.write('cb<nu<clos-brack<0002\n')
        self.__write_obj.write('mi<tg<close_____<preamble\n')
        self.__write_obj.write('mi<tg<open______<body\n')
        # self.__write_obj.write('mi<tg<open-att__<section<num>1\n')
        # self.__print_sec_info()
        # self.__write_obj.write('mi<tg<open______<headers-and-footers\n')
        # self.__write_obj.write('mi<mk<head_foot_<\n')
        # self.__write_obj.write('mi<tg<close_____<headers-and-footers\n')
        self.__write_obj.write('mi<mk<body-open_\n')
00520     def __preamble_func(self, line):
        """
        Check if the token info belongs to the dictionary. If so, take the
        appropriate action.
        """
        action = self.__state_dict.get(self.__token_info)
        if action:
            action(line)
    def make_preamble_divisions(self):
        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        line_to_read = 1
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            self.__token_info = line[:16]
            if self.__token_info == 'ob<nu<open-brack':
                self.__ob_count = line[-5:-1]
                self.__ob_group += 1
            if self.__token_info == 'cb<nu<clos-brack':
                self.__cb_count = line[-5:-1]
                self.__ob_group -= 1
            action = self.__state_dict.get(self.__state)
            if action == None:
                print self.__state
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "preamble_div.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
        return self.__all_lists

Generated by  Doxygen 1.6.0   Back to index