Logo Search packages:      
Sourcecode: calibre version File versions  Download package

group_borders.py
#########################################################################
#                                                                       #
#                                                                       #
#   copyright 2002 Paul Henry Tremblay                                  #
#                                                                       #
#   This program is distributed in the hope that it will be useful,     #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
#   General Public License for more details.                            #
#                                                                       #
#   You should have received a copy of the GNU General Public License   #
#   along with this program; if not, write to the Free Software         #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
#   02111-1307 USA                                                      #
#                                                                       #
#                                                                       #
#########################################################################
import sys, os, tempfile,  re
from calibre.ebooks.rtf2xml import copy
00020 class GroupBorders:
    """
    Form lists.
    Use RTF's own formatting to determine if a paragraph definition is part of a
    list.
    Use indents to determine items and how lists are nested.
    """
00027     def __init__(self,
            in_file,
            bug_handler,
            copy = None,
            run_level = 1,
            wrap = 0,
            ):
        """
        Required:
            'file'
        Optional:
            'copy'-- whether to make a copy of result for debugging
            'temp_dir' --where to output temporary results (default is
            directory from which the script is run.)
        Returns:
            nothing
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
        self.__wrap = wrap
00050     def __initiate_values(self):
        """
        Required:
            Nothing
        Return:
            Nothing
        Logic:
            The self.__end_list is a list of tokens that will force a list to end.
            Likewise, the self.__end_lines is a list of lines that forces a list to end.
        """
        self.__state = "default"
        self.__left_indent = 0
        self.__border_num = 0
        self.__list_type = 'not-defined'
        self.__pard_def = ""
        self.__all_lists = []
        self.__list_chunk = ''
        self.__state_dict={
        'default'           :   self.__default_func,
        'in_pard'           :   self.__in_pard_func,
        'after_pard'        :   self.__after_pard_func,
        }
        # section end
        self.__end_list = [
        # section end
        'mi<mk<sect-close',
        'mi<mk<sect-start',
        # table begin
        'mi<mk<tabl-start',
        # field block begin
        'mi<mk<fldbk-end_',
        'mi<mk<fldbkstart',
        # cell end
        'mi<mk<close_cell',
        # item end
        'mi<tg<item_end__',
        # footnote end
        'mi<mk<foot___clo',
        'mi<mk<footnt-ope',
        # heading end
        'mi<mk<header-beg',
        'mi<mk<header-end',
        'mi<mk<head___clo',
        # lists
        'mi<tg<item_end__',
        'mi<tg<item_end__',
        'mi<mk<list_start'
        # body close
        #
        # style-group
        'mi<mk<style-grp_',
        'mi<mk<style_grp_',
        'mi<mk<style_gend',
        'mi<mk<stylegend_',
        # don't use
        # 'mi<mk<body-close',
        # 'mi<mk<par-in-fld',
        # 'cw<tb<cell______',
        # 'cw<tb<row-def___',
        # 'cw<tb<row_______',
        # 'mi<mk<sec-fd-beg',
        ]
        # <name>Normal<
        self.__name_regex = re.compile(r'(<name>[^<]+)')
        self.__border_regex = re.compile(r'border-paragraph')
        self.__found_appt = 0
        self.__line_num = 0
        self.__border_regex  = re.compile(r'(<border-paragraph[^<]+|<border-for-every-paragraph[^<]+)')
        self.__last_border_string = ''
00119     def __in_pard_func(self, line):
        """
        Required:
            line -- the line of current text.
        Return:
            Nothing
        Logic:
            You are in a list, but in the middle of a paragraph definition.
            Don't do anything until you find the end of the paragraph definition.
        """
        if self.__token_info == 'mi<tg<close_____' \
            and line[17:-1] == 'paragraph-definition':
            self.__state = 'after_pard'
        else:
            self.__write_obj.write(line)
00134     def __after_pard_func(self, line):
        """
        Required:
            line -- the line of current text.
        Return:
            Nothing
        Logic:
        """
        if self.__token_info == 'mi<tg<open-att__' \
            and line[17:37] == 'paragraph-definition':
            # found paragraph definition
            self.__pard_after_par_def_func(line)
        elif self.__token_info == 'mi<tg<close_____' \
            and line[17:-1] == 'paragraph-definition':
            sys.stderr.write('Wrong flag in __after_pard_func\n')
            if self.__run_level > 2:
               msg =  'wrong flag'
               raise self.__bug_handler, msg
        elif self.__token_info in self.__end_list:
            self.__write_obj.write('mi<tg<close_____<paragraph-definition\n')
            self.__write_end_border_tag()
            self.__write_obj.write(self.__list_chunk)
            self.__list_chunk = ''
            self.__state = 'default'
            self.__write_obj.write(line)
        else:
            self.__list_chunk += line
    def __close_pard_(self, line):
        self.__write_obj.write(self.__list_chunk)
        self.__write_obj.write('mi<tg<close_____<paragraph-definition\n')
        self.__write_end_wrap()
        self.__list_chunk = ''
        self.__state = 'default'
00167     def __pard_after_par_def_func(self, line):
        """
        Required:
            line -- the line of current text.
            id -- the id of the current list
        Return:
            Nothing
        Logic:
        """
        is_border = self.__is_border_func(line)
        if not is_border:
            self.__write_obj.write('mi<tg<close_____<paragraph-definition\n')
            self.__write_end_border_tag()
            self.__write_obj.write(self.__list_chunk)
            self.__write_obj.write(line)
            self.__state = 'default'
            self.__list_chunk = ''
        else:
            border_string, pard_string = self.__parse_pard_with_border(line)
            if self.__last_border_string == border_string:
                # just keep going
                self.__write_obj.write('mi<tg<close_____<paragraph-definition\n')
                self.__write_obj.write(self.__list_chunk)
                self.__list_chunk = ''
                self.__state = 'in_pard'
                self.__write_obj.write(pard_string)
            else:
                # different name for the paragraph definition
                self.__write_obj.write('mi<tg<close_____<paragraph-definition\n')
                self.__write_end_border_tag()
                self.__write_obj.write(self.__list_chunk)
                self.__write_start_border_tag(border_string)
                self.__write_obj.write(pard_string)
                self.__state = 'in_pard'
                self.__last_border_string = border_string
                self.__list_chunk = ''
00203     def __default_func(self, line):
        """
        Required:
            self, line
        Returns:
            Nothing
        Logic
            Look for the start of a paragraph defintion. If one is found, check if
            it contains a list-id. If it does, start a list. Change the state to
            in_pard.
            """
        if self.__token_info == 'mi<tg<open-att__' \
            and line[17:37] == 'paragraph-definition':
            contains_border = self.__is_border_func(line)
            if contains_border:
                border_string, pard_string = self.__parse_pard_with_border(line)
                self.__write_start_border_tag(border_string)
                self.__write_obj.write(pard_string)
                self.__last_border_string = border_string
                self.__state = 'in_pard'
            else:
                self.__write_obj.write(line)
        else:
            self.__write_obj.write(line)
    def __write_start_border_tag(self, the_string):
        self.__write_obj.write('mi<mk<start-brdg\n' )
        self.__border_num += 1
        num = '%04d' % self.__border_num
        num_string = 's%s' % num
        the_string += '<num>%s' % num_string
        self.__write_obj.write('mi<tg<open-att__<border-group%s\n' % the_string)
    def __write_end_border_tag(self):
        self.__write_obj.write('mi<mk<end-brdg__\n' )
        self.__write_obj.write('mi<tg<close_____<border-group\n')
    def __is_border_func(self, line):
        line = re.sub(self.__name_regex, '', line)
        index = line.find('border-paragraph')
        if index > -1:
            return 1
        return 0
    def __parse_pard_with_border(self, line):
        border_string = ''
        pard_string = ''
        tokens = re.split(self.__border_regex, line)
        for token in tokens:
            if token[0:17] == '<border-paragraph':
                border_string += token
            else:
                pard_string += token
        return border_string, pard_string
    def __write_pard_with_border(self, line):
        border_string = ''
        pard_string = ''
        tokens = re.split(self.__border_regex, line)
        for token in tokens:
            if token[0:17] == '<border-paragraph':
                border_string += token
            else:
                pard_string += token
        self.__write_start_border_tag(border_string)
        self.__write_obj.write(pard_string)
    def __get_style_name(self, line):
        if self.__token_info == 'mi<mk<style-name':
            self.__style_name = line[17:-1]
00267     def group_borders(self):
        """
        Required:
            nothing
        Returns:
            original file will be changed
        Logic:
        """
        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        line_to_read = 1
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            self.__token_info = line[:16]
            self.__get_style_name(line)
            action = self.__state_dict.get(self.__state)
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "group_borders.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)

Generated by  Doxygen 1.6.0   Back to index