Logo Search packages:      
Sourcecode: calibre version File versions  Download package

sections.py

#########################################################################
#                                                                       #
#                                                                       #
#   copyright 2002 Paul Henry Tremblay                                  #
#                                                                       #
#   This program is distributed in the hope that it will be useful,     #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
#   General Public License for more details.                            #
#                                                                       #
#   You should have received a copy of the GNU General Public License   #
#   along with this program; if not, write to the Free Software         #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
#   02111-1307 USA                                                      #
#                                                                       #
#                                                                       #
#########################################################################
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
00020 class Sections:
    """
=================
Purpose
=================
Write section tags for a tokenized file. (This module won't be any use to use
to you unless you use it as part of the other modules.)
---------------
logic
---------------
The tags for the first section breaks have already been written.
RTF stores section breaks with the \sect tag. Each time this tag is
encountered, add one to the counter.
When I encounter the \sectd tag, I want to collect all the appropriate tokens
that describe the section. When I reach a \pard, I know I an stop collecting
tokens and write the section tags.
The exception to this method occurs when sections occur in field blocks, such
as the index. Normally, two section break occur within the index and other
field-blocks. (If less or more section breaks occurr, this code may not work.)
I want the sections to occurr outside of the index. That is, the index
should be nested inside one section tag. After the index is complete, a new
section should begin.
In order to write the sections outside of the field blocks, I have to store
all of the field block as a string. When I ecounter the \sect tag, add one to
the section counter, but store this number in a list. Likewise, store the
information describing the section in another list.
When I reach the end of the field block, choose the first item from the
numbered list as the section number. Choose the first item in the description
list as the values and attributes of the section. Enclose the field string
between the section tags.
Start a new section outside the field-block strings. Use the second number in
the list; use the second item in the description list.
CHANGE (2004-04-26) No longer write sections that occurr in field-blocks.
Instead, ingore all section information in a field-block.
    """
00055     def __init__(self,
            in_file,
            bug_handler,
            copy = None,
            run_level = 1):
        """
        Required:
            'file'--file to parse
        Optional:
            'copy'-- whether to make a copy of result for debugging
            'temp_dir' --where to output temporary results (default is
            directory from which the script is run.)
        Returns:
            nothing
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
00075     def __initiate_values(self):
        """
        Initiate all values.
        """
        self.__mark_start = 'mi<mk<sect-start\n'
        self.__mark_end =   'mi<mk<sect-end__\n'
        self.__in_field = 0
        self.__section_values = {}
        self.__list_of_sec_values = []
        self.__field_num = []
        self.__section_num = 0
        self.__state = 'before_body'
        self.__found_first_sec = 0
        self.__text_string = ''
        self.__field_instruction_string = ''
        self.__state_dict = {
        'before_body'       : self.__before_body_func,
        'body'              : self.__body_func,
        'before_first_sec'  : self.__before_first_sec_func,
        'section'           : self.__section_func,
        'section_def'       : self.__section_def_func,
        'sec_in_field'      : self.__sec_in_field_func,
        }
        # cw<sc<sect-defin<nu<true
        self.__body_dict = {
        'cw<sc<section___'      : self.__found_section_func,
        'mi<mk<sec-fd-beg'      : self.__found_sec_in_field_func,
        'cw<sc<sect-defin'      : self.__found_section_def_bef_sec_func,
        }
        self.__section_def_dict = {
        'cw<pf<par-def___'      : (self.__end_sec_def_func, None),
        'mi<mk<body-open_'      : (self.__end_sec_def_func, None),
        'cw<tb<columns___'      : (self.__attribute_func, 'columns'),
        'cw<pa<margin-lef'      : (self.__attribute_func, 'margin-left'),
        'cw<pa<margin-rig'      : (self.__attribute_func, 'margin-right'),
        'mi<mk<header-ind'      : (self.__end_sec_def_func, None),
        # premature endings
        #__end_sec_premature_func
        'tx<nu<__________'      : (self.__end_sec_premature_func, None),
        'cw<ci<font-style'      : (self.__end_sec_premature_func, None),
        'cw<ci<font-size_'      : (self.__end_sec_premature_func, None),
        }
        self.__sec_in_field_dict = {
        'mi<mk<sec-fd-end'      : self.__end_sec_in_field_func,
        # changed this 2004-04-26
        # two lines
        # 'cw<sc<section___'      : self.__found_section_in_field_func,
        # 'cw<sc<sect-defin'      : self.__found_section_def_in_field_func,
        }
00124     def __found_section_def_func(self, line):
        """
        Required:
            line -- the line to parse
        Returns:
            nothing
        Logic:
            I have found a section definition. Change the state to
            setion_def (so subsequent lines will be processesed as part of
            the section definition), and clear the section_values dictionary.
        """
        self.__state = 'section_def'
        self.__section_values.clear()
00137     def __attribute_func(self, line, name):
        """
        Required:
            line -- the line to be parsed
            name -- the changed, readable name (as opposed to the
            abbreviated one)
        Returns:
            nothing
        Logic:
            I need to add the right data to the section values dictionary so I
            can retrive it later. The attribute (or key) is the name; the
            value is the last part of the text string.
            ex: cw<tb<columns___<nu<2
        """
        attribute = name
        value = line[20:-1]
        self.__section_values[attribute] = value
00154     def __found_section_func(self, line):
        """
        Requires:
            line -- the line to parse
        Returns:
            nothing
        Logic:
            I have found the beginning of a section, so change the state
            accordingly. Also add one to the section counter.
        """
        self.__state = 'section'
        self.__write_obj.write(line)
        self.__section_num += 1
00167     def __found_section_def_bef_sec_func(self, line):
        """
        Requires:
            line -- the line to parse
        Returns:
            nothing
        Logic:
            I have found the beginning of a section, so change the state
            accordingly. Also add one to the section counter.
        """
        self.__section_num += 1
        self.__found_section_def_func(line)
        self.__write_obj.write(line)
00180     def __section_func(self, line):
        """
        Requires:
            line --the line to parse
        Returns:
            nothing
        Logic:
        """
        if self.__token_info == 'cw<sc<sect-defin':
            self.__found_section_def_func(line)
        self.__write_obj.write(line)
00191     def __section_def_func(self, line):
        """
        Required:
            line --line to parse
        Returns:
            nothing
        Logic:
            I have found a section definition. Check if the line is the end of
            the defnition (a paragraph defintion), or if it contains info that
            should be added to the values dictionary. If neither of these
            cases are true, output the line to a file.
        """
        action, name = self.__section_def_dict.get(self.__token_info, (None, None))
        if action:
            action(line, name)
            if self.__in_field:
                self.__sec_in_field_string += line
            else:
                self.__write_obj.write(line)
        else:
            self.__write_obj.write(line)
00212     def __end_sec_def_func(self, line, name):
        """
        Requires:
            line --the line to parse
            name --changed, readable name
        Returns:
            nothing
        Logic:
            The end of the section definition has been found. Reset the state.
            Call on the write_section method.
        """
        if not self.__in_field:
            self.__state = 'body'
        else:
            self.__state = 'sec_in_field'
        self.__write_section(line)
00228     def __end_sec_premature_func(self, line, name):
        """
        Requires:
            line --the line to parse
            name --changed, readable name
        Returns:
            nothing
        Logic:
            Text or control words indicating text have been found
            before \pard. This shoud indicate older RTF. Reset the state
            Write the section defintion. Insert a paragraph definition.
            Insert {} to mark the end of a paragraph defintion
        """
        if not self.__in_field:
            self.__state = 'body'
        else:
            self.__state = 'sec_in_field'
        self.__write_section(line)
        self.__write_obj.write('cw<pf<par-def___<nu<true\n')
        self.__write_obj.write('ob<nu<open-brack<0000\n')
        self.__write_obj.write('cb<nu<clos-brack<0000\n')
00249     def __write_section(self, line):
        """
        Requires:
            nothing
        Returns:
            nothing
        Logic:
            Form a string of attributes and values. If you are not in a field
            block, write this string to the output file. Otherwise, call on
            the handle_sec_def method to handle this string.
        """
        my_string = self.__mark_start
        if self.__found_first_sec:
            my_string += 'mi<tg<close_____<section\n'
        else:
            self.__found_first_sec = 1
        my_string += 'mi<tg<open-att__<section<num>%s' % str(self.__section_num)
        my_string += '<num-in-level>%s' % str(self.__section_num)
        my_string += '<type>rtf-native'
        my_string += '<level>0'
        keys = self.__section_values.keys()
        if len(keys) > 0:
            for key in keys:
                my_string += '<%s>%s' % (key, self.__section_values[key])
        my_string += '\n'
        my_string += self.__mark_end
        # # my_string += line
        if self.__state == 'body':
            self.__write_obj.write(my_string)
        elif self.__state == 'sec_in_field':
            self.__handle_sec_def(my_string)
        elif self.__run_level > 3:
            msg = 'missed a flag\n'
            raise self.__bug_handler, msg
00283     def __handle_sec_def(self, my_string):
        """
        Requires:
            my_string -- the string of attributes and values. (Do I need this?)
        Returns:
            nothing
        Logic:
            I need to append the dictionary of attributes and values to list
            so I can use it later when I reach the end of the field-block.
        """
        values_dict = self.__section_values
        self.__list_of_sec_values.append(values_dict)
00295     def __body_func(self, line):
        """
        Requires:
            line --the line to parse
        Returns:
            nothing
        Logic:
            Look for the beginning of a section. Otherwise, print the line to
            the output file.
        """
        action = self.__body_dict.get(self.__token_info)
        if action:
            action(line)
        else:
            self.__write_obj.write(line)
00310     def __before_body_func(self, line):
        """
        Requires:
            line --line to parse
        Returns:
            nothing
        Logic:
            Look for the beginning of the body. Always print out the line.
        """
        if self.__token_info == 'mi<mk<body-open_':
            self.__state = 'before_first_sec'
        self.__write_obj.write(line)
00322     def __before_first_sec_func(self, line):
        """
        Requires:
            line -- line to parse
        Returns:
            nothing
        Logic:
            Look for the beginning of the first section. This can be \\sectd,
            but in older RTF it could mean the any paragraph or row definition
        """
        if self.__token_info == 'cw<sc<sect-defin':
            self.__state = 'section_def'
            self.__section_num += 1
            self.__section_values.clear()
        elif self.__token_info == 'cw<pf<par-def___':
            self.__state = 'body'
            self.__section_num += 1
            self.__write_obj.write (
                    'mi<tg<open-att__<section<num>%s'
                    '<num-in-level>%s'
                    '<type>rtf-native'
                    '<level>0\n'
                    % (str(self.__section_num), str(self.__section_num))
                    )
            self.__found_first_sec = 1
        elif self.__token_info == 'tx<nu<__________':
            self.__state = 'body'
            self.__section_num += 1
            self.__write_obj.write (
                    'mi<tg<open-att__<section<num>%s'
                    '<num-in-level>%s'
                    '<type>rtf-native'
                    '<level>0\n'
                    % (str(self.__section_num), str(self.__section_num))
                    )
            self.__write_obj.write(
                'cw<pf<par-def___<true\n'
                    )
            self.__found_first_sec = 1
        self.__write_obj.write(line)
00362     def __found_sec_in_field_func(self, line):
        """
        Requires:
            line --line to parse
        Returns:
            nothing
        Logic:
            I have found the beginning of a field that has a section (or
            really, two) inside of it. Change the state, and start adding to
            one long string.
        """
        self.__state = 'sec_in_field'
        self.__sec_in_field_string = line
        self.__in_field = 1
00376     def __sec_in_field_func(self, line):
        """
        Requires:
            line --the line to parse
        Returns:
            nothing
        Logic:
            Check for the end of the field, or the beginning of a section
            definition.
            CHANGED! Just print out each line. Ignore any sections or
            section definition info.
        """
        action = self.__sec_in_field_dict.get(self.__token_info)
        if action:
            action(line)
        else:
            # change this 2004-04-26
            # self.__sec_in_field_string += line
            self.__write_obj.write(line)
00395     def __end_sec_in_field_func(self, line):
        """
        Requires:
            line --line to parse
        Returns:
            nothing
        Logic:
            Add the last line to the field string. Call on the method
            print_field_sec_attributes to write the close and beginning of a
            section tag. Print out the field string. Call on the same method
            to again write the close and beginning of a section tag.
            Change the state.
        """
        # change this 2004-04-26
        # Don't do anyting
        """
        self.__sec_in_field_string += line
        self.__print_field_sec_attributes()
        self.__write_obj.write(self.__sec_in_field_string)
        self.__print_field_sec_attributes()
        """
        self.__state = 'body'
        self.__in_field = 0
        # this is changed too
        self.__write_obj.write(line)
00420     def __print_field_sec_attributes(self):
        """
        Requires:
            nothing
        Returns:
            nothing
        Logic:
            Get the number and dictionary of values from the lists. The number
            and dictionary will be the first item of each list. Write the
            close tag. Write the start tag. Write the attribute and values in
            the dictionary. Get rid of the first item in each list.
        keys = self.__section_values.keys()
        if len(keys) > 0:
            my_string += 'mi<tg<open-att__<section-definition'
            for key in keys:
                my_string += '<%s>%s' % (key, self.__section_values[key])
            my_string += '\n'
        else:
            my_string += 'mi<tg<open______<section-definition\n'
        """
        num = self.__field_num[0]
        self.__field_num = self.__field_num[1:]
        self.__write_obj.write(
        'mi<tg<close_____<section\n'
        'mi<tg<open-att__<section<num>%s' % str(num)
        )
        if self.__list_of_sec_values:
            keys =  self.__list_of_sec_values[0].keys()
            for key in keys:
                self.__write_obj.write(
                '<%s>%s\n' % (key, self.__list_of_sec_values[0][key]))
            self.__list_of_sec_values = self.__list_of_sec_values[1:]
        self.__write_obj.write('<level>0')
        self.__write_obj.write('<type>rtf-native')
        self.__write_obj.write('<num-in-level>%s' % str(self.__section_num))
        self.__write_obj.write('\n')
        # Look here
00457     def __found_section_in_field_func(self, line):
        """
        Requires:
            line --line to parse
        Returns:
            nothing
        Logic:
            I have found a section in a field block. Add one to section
            counter, and append this number to a list.
        """
        self.__section_num += 1
        self.__field_num.append(self.__section_num)
        self.__sec_in_field_string += line
00470     def __found_section_def_in_field_func(self, line):
        """
        Requires:
            line --line to parse
        Returns:
            nothing
        Logic:
            I have found a section definition in a filed block. Change the
            state and clear the values dictionary.
        """
        self.__state = 'section_def'
        self.__section_values.clear()
00482     def make_sections(self):
        """
        Requires:
            nothing
        Returns:
            nothing (changes the original file)
        Logic:
            Read one line in at a time. Determine what action to take based on
            the state. If the state is before the body, look for the
            beginning of the body.
            If the state is body, send the line to the body method.
        """
        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        line_to_read = 1
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            self.__token_info = line[:16]
            action = self.__state_dict.get(self.__state)
            if action == None:
                sys.stderr.write('no no matching state in module sections.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "sections.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)

Generated by  Doxygen 1.6.0   Back to index