Logo Search packages:      
Sourcecode: calibre version File versions  Download package

table.py
#########################################################################
#                                                                       #
#                                                                       #
#   copyright 2002 Paul Henry Tremblay                                  #
#                                                                       #
#   This program is distributed in the hope that it will be useful,     #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
#   General Public License for more details.                            #
#                                                                       #
#   You should have received a copy of the GNU General Public License   #
#   along with this program; if not, write to the Free Software         #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
#   02111-1307 USA                                                      #
#                                                                       #
#                                                                       #
#########################################################################
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy, border_parse
"""
States.
1. 'not_in_table'
    1. 'cw<tb<row-def___' start a row definition
    2. 'mi<mk<in-table__' start table
2. 'in_table'
    1. 'mi<mk<pard-start', start of a row, cell
    2. 'mi<mk<not-in-tbl', end the table.
    3. 'cw<tb<row-def___' start a row definition
3. in_row_definition
    1.  'mi<mk<not-in-tbl'  :   end the row defintion. If in table, end the table.
    2.  'mi<mk<pard-start'  :   end the row defintion
        if already in the table, start a row and cell.
    3.  'cw<tb<row_______'  : end the row definition, end the row
    4.  'cw...' use another method to handle the control word
        control word might be added to dictionary.
    5.  'mi<mk<in-table__' If already in table, do nothing. Otherwise
        start the table.
4. 'in_row'
    1. 'mi<mk<pard-start', start  cell
    2. 'mi<mk<not-in-tbl'  end table,
    3. 'cw<tb<row_______'  close row,
5. 'in_cell'
    1. 'mi<mk<not-in-tbl', end table
    2. 'cw<tb<cell______', end cell
"""
00046 class Table:
    """
    Make tables.
    Logic:
    Read one line at a time. The default state (self.__state) is
    'not_in_table'. Look for either a 'cw<tb<in-table__', or a row definition.
    """
00053     def __init__(self,
            in_file,
            bug_handler,
            copy = None,
            run_level = 1,):
        """
        Required:
            'file'--file to parse
        Optional:
            'copy'-- whether to make a copy of result for debugging
            'temp_dir' --where to output temporary results (default is
            directory from which the script is run.)
        Returns:
            nothing
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
00073     def __initiate_values(self):
        """
        Initiate all values.
        """
        self.__state_dict = {
        'in_table':         self.__in_table_func,
        'in_row_def':       self.__in_row_def_func,
        'not_in_table':     self.__not_in_table_func,
        'in_cell':          self.__in_cell_func,
        'in_row':           self.__in_row_func,
        }
        self.__not_in_table_dict = {
        'cw<tb<row-def___':   self.__found_row_def_func,
        'cw<tb<in-table__': self.__start_table_func,
        'mi<mk<in-table__'  : self.__start_table_func,
        }
        # can't use this dictionary. When in row_definition, many tokens
        # require multiple definitions
        self.__in_row_definition_dict = {
        'mi<mk<not-in-tbl'  :   self.__end_row_table_func,
        'mi<mk<pard-start'  :   self.__end_row_def_func,
        }
        self.__in_row_dict = {
        'mi<mk<not-in-tbl'  :   self.__close_table,
        'mi<mk<pard-start'  :   self.__start_cell_func,
        'cw<tb<row_______'  :   self.__end_row_func,
        'cw<tb<cell______'  :   self.__empty_cell,
        }
        # set the default state
        self.__state = ['not_in_table']
        # set empty data for all tables
        self.__table_data = []
        # just in case there is no table data
        self.__row_dict = {}
        self.__cell_list = []
        self.__cell_widths = []
00109     def __in_table_func(self, line):
        """
        Requires:
            line -- line to parse
        Logic:
            Look for the end of the table. If found, close out the table.
            Look for  'mi<mk<pard-start', which marks the beginning of a row. Start
            a row and start a cell.
        """
        # 'cell'               :    ('tb', 'cell______', self.default_func),
        if self.__token_info == 'mi<mk<not-in-tbl' or\
            self.__token_info == 'mi<mk<sect-start' or\
            self.__token_info == 'mi<mk<sect-close' or\
            self.__token_info == 'mi<mk<body-close':
            self.__close_table(line)
        elif self.__token_info == 'mi<mk<pard-start':
            self.__start_row_func(line)
            self.__start_cell_func(line)
        elif self.__token_info == 'cw<tb<row-def___':
            self.__found_row_def_func(line)
        elif self.__token_info == 'cw<tb<cell______':
            self.__start_row_func(line)
            self.__empty_cell( line)
        self.__write_obj.write(line)
00133     def __not_in_table_func(self, line):
        """
        Requires:
            line -- the line of text read in from document
        Returns:
            nothing
        Logic:
            The state is not in a table, so look for the two tokens that
            mark the start of a table: 'cw<tb<row-def', or 'cw<tb<in-table__'.
            If these tokens are found, use another method to start a table
            and change states. Otherwise, just output the line.
        """
        action = self.__not_in_table_dict.get(self.__token_info)
        if action:
            action(line)
        self.__write_obj.write(line)
00149     def __close_table(self, line):
        """
        Requires:
            line -- line to parse
        Returns:
            ?
        Logic:
            Write the end marker for the table.
            Write the end tag for the table.
            Set the state to ['not_in_table']
        """
        self.__write_obj.write('mi<mk<table-end_\n')
        self.__state = ['not_in_table']
        self.__table_data[-1]['number-of-columns'] = self.__max_number_cells_in_row
        self.__table_data[-1]['number-of-rows'] = self.__rows_in_table
        average_cells_in_row = self.__mode(self.__list_of_cells_in_row)
        self.__table_data[-1]['average-cells-per-row'] = average_cells_in_row
        average_cell_width = self.__mode(self.__cell_widths)
        self.__table_data[-1]['average-cell-width'] = average_cell_width
00168     def __found_row_def_func(self, line):
        """
        Requires:
            line don't need this except for consistency with other methods.
        Returns:
            nothing
        Logic:
            A row definition has been found. Collect all the data from this
            to use later in writing attributes for the table.
        """
        self.__state.append('in_row_def')
        self.__last_cell_position = 0
        self.__row_dict = {}
        self.__cell_list = []
        self.__cell_list.append({})
        self.__cell_widths = []
00184     def __start_table_func(self, line):
        """
        Requires:
            line -- line to parse
        Returns:
            ?
        Logic:
            Add the 'in_table' to the state list.
            Write out the table marker.
            Initialize table values (not sure about these yet)
        """
      self.__rows_in_table = 0;
      self.__cells_in_table = 0;
      self.__cells_in_row = 0;
        self.__max_number_cells_in_row = 0
        self.__table_data.append({})
        self.__list_of_cells_in_row = []
        self.__write_obj.write('mi<mk<tabl-start\n')
        self.__state.append('in_table')
00203     def __end_row_table_func(self, line):
        """
        Requires:
            line --just for consistencey
        Returns:
            ?
        Logic:
            ?
        """
        self.__close_table(self, line)
00213     def __end_row_def_func(self, line):
        """
        Requires:
            line --just for consistency
        Returns:
            nothing
        Logic:
            change the state.
            get rid of the last {} in the cell list
            figure out the number of cells based on the self.__row_dict[widths]
            ('122, 122')
        """
        if len(self.__state) > 0:
            if self.__state[-1] == 'in_row_def':
                self.__state.pop()
        # added [{]] at the *end* of each /cell. Get rid of extra one
        self.__cell_list.pop()
        widths = self.__row_dict.get('widths')
        if widths:
            width_list = widths.split(',')
            num_cells = len (width_list)
            self.__row_dict['number-of-cells'] = num_cells
00235     def __in_row_def_func(self, line):
        """
        Requires:
            line --line to parse
        Returns:
            nothing
        Logic:
            In the text that defines a row. If a control word is found, handle the
            control word with another method.
            Check for states that will end this state.
            While in the row definition, certain tokens can end a row or end a table.
            If a paragrah definition (pard-start) is found, and the you are already in
            a table, start of a row.
        """
        if self.__token_info == 'cw<tb<row_______':
            # write tags
            self.__end_row_func(line)
            # change the state
            self.__end_row_def_func(line)
            self.__write_obj.write(line)
        elif line[0:2] == 'cw':
            self.__handle_row_token(line)
            self.__write_obj.write(line)
        elif self.__token_info == 'mi<mk<not-in-tbl' and 'in_table' in self.__state:
            self.__end_row_def_func(line)
            self.__close_table(line)
            self.__write_obj.write(line)
        elif self.__token_info == 'mi<mk<pard-start':
            self.__end_row_def_func(line)
            # if already in the table, start a row, then cell.
            if (self.__state) > 0 and self.__state[-1] == 'in_table':
                self.__start_row_func(line)
                self.__start_cell_func(line)
            self.__write_obj.write(line)
        elif self.__token_info == 'mi<mk<in-table__':
            self.__end_row_def_func(line)
            # if not in table, start a new table
            if len(self.__state) > 0 and self.__state[-1] != 'in_table':
                self.__start_table_func(line)
            self.__write_obj.write(line)
        else:
            self.__write_obj.write(line)
00277     def __handle_row_token(self, line):
        """
        Requires:
            line -- line to parse
        Returns:
            ?
        Logic:
            the tokens in the row definition contain the following information:
               1. row borders.
               2. cell borders for all cells in the row.
               3. cell postions for all cells in the row.
            Put all information about row borders into a row dictionary.
            Put all information about cell borders into into the dictionary in
            the last item in the cell list. ([{border:something, width:something},
                    {border:something, width:something}])
    cw<bd<bor-t-r-to<nu<bdr-hair__|bdr-li-wid:0.50
        """
        if line[3:5] == 'bd':
            border_obj = border_parse.BorderParse()
            the_dict = border_obj.parse_border(line)
            keys = the_dict.keys()
            # border-cell-top-hairline
            in_cell = 0
            for key in keys:
                if key[0:11] == 'border-cell':
                    in_cell = 1
            for key in keys:
                if in_cell:
                    self.__cell_list[-1][key] = the_dict[key]
                else:
                    self.__row_dict[key] = the_dict[key]
        # cw<tb<cell-posit<nu<216.00
        elif self.__token_info == 'cw<tb<cell-posit':
            self.__found_cell_position(line)
        # cw<tb<row-pos-le<nu<-5.40
        elif self.__token_info == 'cw<tb<row-pos-le':
            position = line[20:-1]
            self.__row_dict['left-row-position'] = position
        elif self.__token_info == 'cw<tb<row-header':
            self.__row_dict['header'] = 'true'
00317     def __start_cell_func(self, line):
        """
        Required:
            line -- the line of text
        Returns:
            nothing
        Logic:
            Append 'in_cell' for states
            If the self.__cell list containst dictionaries, get the last dictionary.
            Write value => attributes for key=> value
            pop the self.__cell_list.
            Otherwise, print out a cell tag.
        """
        self.__state.append('in_cell')
        # self.__cell_list = []
        if len(self.__cell_list) > 0:
            self.__write_obj.write('mi<tg<open-att__<cell')
            # cell_dict = self.__cell_list[-1]
            cell_dict = self.__cell_list[0]
            keys = cell_dict.keys()
            for key in keys:
                self.__write_obj.write('<%s>%s' % (key, cell_dict[key]))
            self.__write_obj.write('\n')
            # self.__cell_list.pop()
            self.__cell_list.pop(0)
            # self.__cell_list = self.__cell_list[1:]
        else:
            self.__write_obj.write('mi<tg<open______<cell\n')
        self.__cells_in_table += 1
        self.__cells_in_row += 1
00347     def __start_row_func(self, line):
        """
        Required:
            line -- the line of text
        Returns:
            nothing
        Logic:
            Append 'in_row' for states
            Write value => attributes for key=> value
        """
        self.__state.append('in_row')
        self.__write_obj.write('mi<tg<open-att__<row')
        keys = self.__row_dict.keys()
        for key in keys:
            self.__write_obj.write('<%s>%s' % (key, self.__row_dict[key]))
        self.__write_obj.write('\n')
        self.__cells_in_row = 0
        self.__rows_in_table += 1
00365     def __found_cell_position(self, line):
        """
        needs:
            line: current line
        returns:
            nothing
        logic:
           Calculate the cell width.
           If the cell is the first cell, you should add the left cell position to it.
           (This value is often negative.)
            Next, set the new last_cell_position to the current cell position.
        """
        # cw<tb<cell-posit<nu<216.00
        new_cell_position = round(float(line[20:-1]), 2)
        left_position = 0
        if self.__last_cell_position == 0:
            left_position = self.__row_dict.get('left-row-position', 0)
            left_position = float(left_position)
        width = new_cell_position - self.__last_cell_position - left_position
        # width = round(width, 2)
        width = str('%.2f' % width)
        self.__last_cell_position = new_cell_position
        widths_exists = self.__row_dict.get('widths')
        if widths_exists:
            self.__row_dict['widths'] += ', %s' % str(width)
        else:
            self.__row_dict['widths'] = str(width)
        self.__cell_list[-1]['width'] = width
        self.__cell_list.append({})
        self.__cell_widths.append(width)
00395     def __in_cell_func(self, line):
        """
        Required:
            line
        Returns:
            nothing
        Logic:
            In the middle of a cell.
            Look for the close of the table. If found, use the close table function to close
            the table.
            Look for the close of the cell. If found, use the close cell function to close out
            the cell.
            Otherwise, print out the line.
        """
        # cw<tb<cell______<nu<true
        # mi<mk<sect-start
        if self.__token_info == 'mi<mk<not-in-tbl' or\
            self.__token_info == 'mi<mk<sect-start' or\
            self.__token_info == 'mi<mk<sect-close' or\
            self.__token_info == 'mi<mk<body-close':
            self.__end_cell_func(line)
            self.__end_row_func(line)
            self.__close_table(line)
            self.__write_obj.write(line)
        elif self.__token_info ==  'cw<tb<cell______':
            self.__end_cell_func(line)
        else:
            self.__write_obj.write(line)
00423     def __end_cell_func(self, line):
        """
        Requires:
            line
        Returns:
            nothing
        Logic:
            End the cell. Print out the closing marks. Pop the self.__state.
        """
        if len(self.__state) > 1:
            if self.__state[-1] == 'in_cell':
                self.__state.pop()
        self.__write_obj.write('mi<mk<close_cell\n')
        self.__write_obj.write('mi<tg<close_____<cell\n')
        self.__write_obj.write('mi<mk<closecell_\n')
    def __in_row_func(self, line):
        if self.__token_info == 'mi<mk<not-in-tbl' or\
            self.__token_info == 'mi<mk<sect-start' or\
            self.__token_info == 'mi<mk<sect-close' or\
            self.__token_info == 'mi<mk<body-close':
            self.__end_row_func(line)
            self.__close_table(line)
            self.__write_obj.write(line)
        else:
            action = self.__in_row_dict.get(self.__token_info)
            if action:
                action(line)
            self.__write_obj.write(line)
        """
        elif self.__token_info == 'mi<mk<pard-start':
            self.__start_cell_func(line)
            self.__write_obj.write(line)
        elif self.__token_info == 'cw<tb<row_______':
            self.__end_row_func(line)
            self.__write_obj.write(line)
        else:
            self.__write_obj.write(line)
        """
00461     def __end_row_func(self, line):
        """
        """
        if len(self.__state) > 1 and self.__state[-1] == 'in_row':
            self.__state.pop()
            self.__write_obj.write('mi<tg<close_____<row\n')
        else:
            self.__write_obj.write('mi<tg<empty_____<row\n')
            self.__rows_in_table += 1
        if self.__cells_in_row > self.__max_number_cells_in_row:
            self.__max_number_cells_in_row = self.__cells_in_row
        self.__list_of_cells_in_row.append(self.__cells_in_row)
00473     def __empty_cell(self, line):
        """
        Required:
            line -- line of text
        Returns:
            nothing
        Logic:
            Write an empty tag with attributes if there are attributes.
            Otherwise, writen an empty tag with cell as element.
        """
        if len(self.__cell_list) > 0:
            self.__write_obj.write('mi<tg<empty-att_<cell')
            cell_dict = self.__cell_list[-1]
            keys = cell_dict.keys()
            for key in keys:
                self.__write_obj.write('<%s>%s' % (key, cell_dict[key]))
            self.__write_obj.write('\n')
        else:
            self.__write_obj.write('mi<tg<empty_____<cell\n')
        self.__cells_in_table += 1
        self.__cells_in_row += 1
00494     def __mode(self, the_list):
        """
        Required:
            the_list -- a list of something
        Returns:
            the number that occurs the most
        Logic:
            get the count of each item in list. The count that is the greatest
            is the mode.
        """
        max = 0
        mode = 'not-defined'
        for item in the_list:
            num_of_values = the_list.count(item)
            if num_of_values > max:
                mode = item
                max = num_of_values
        return mode
00512     def make_table(self):
        """
        Requires:
            nothing
        Returns:
            A dictionary of values for the beginning of the table.
        Logic:
            Read one line in at a time. Determine what action to take based on
            the state.
        """
        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        line_to_read = 1
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            self.__token_info = line[:16]
            action = self.__state_dict.get(self.__state[-1])
            # print self.__state[-1]
            if action == None:
                sys.stderr.write('No matching state in module table.py\n')
                sys.stderr.write(self.__state[-1] + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "table.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)
        return self.__table_data

Generated by  Doxygen 1.6.0   Back to index