Logo Search packages:      
Sourcecode: calibre version File versions

info.py

#########################################################################
#                                                                       #
#                                                                       #
#   copyright 2002 Paul Henry Tremblay                                  #
#                                                                       #
#   This program is distributed in the hope that it will be useful,     #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
#   General Public License for more details.                            #
#                                                                       #
#   You should have received a copy of the GNU General Public License   #
#   along with this program; if not, write to the Free Software         #
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA            #
#   02111-1307 USA                                                      #
#                                                                       #
#                                                                       #
#########################################################################
import sys, os, tempfile
from calibre.ebooks.rtf2xml import copy
00020 class Info:
    """
    Make tags for document-information
    """
00024     def __init__(self,
            in_file,
            bug_handler,
            copy = None,
            run_level = 1,
            ):
        """
        Required:
            'file'--file to parse
        Optional:
            'copy'-- whether to make a copy of result for debugging
            'temp_dir' --where to output temporary results (default is
            directory from which the script is run.)
        Returns:
            nothing
            """
        self.__file = in_file
        self.__bug_handler = bug_handler
        self.__copy = copy
        self.__run_level = run_level
        self.__write_to = tempfile.mktemp()
00045     def __initiate_values(self):
        """
        Initiate all values.
        """
        self.__text_string = ''
        self.__state = 'before_info_table'
        self.__state_dict = {
        'before_info_table': self.__before_info_table_func,
        'after_info_table': self.__after_info_table_func,
        'in_info_table'    : self.__in_info_table_func,
        'collect_text'      : self.__collect_text_func,
        'collect_tokens'      : self.__collect_tokens_func,
        }
        self.__info_table_dict = {
        'cw<di<title_____'  : (self.__found_tag_with_text_func, 'title'),
        'cw<di<author____'  : (self.__found_tag_with_text_func, 'author'),
        'cw<di<keywords__'  : (self.__found_tag_with_text_func, 'keywords'),
        'cw<di<doc-notes_'  : (self.__found_tag_with_text_func, 'doc-notes'),
        'cw<di<subject___'  : (self.__found_tag_with_text_func, 'subject'),
        'cw<di<operator__'  : (self.__found_tag_with_text_func, 'operator'),
        'cw<di<create-tim'  : (self.__found_tag_with_tokens_func, 'creation-time'),
        'cw<di<revis-time'  :  (self.__found_tag_with_tokens_func, 'revision-time'),
        'cw<di<edit-time_'  : (self.__single_field_func, 'editing-time'),
        'cw<di<num-of-wor'  : (self.__single_field_func, 'number-of-words'),
        'cw<di<num-of-chr'  : (self.__single_field_func, 'number-of-characters'),
        'cw<di<num-of-pag'  : (self.__single_field_func, 'number-of-pages'),
        }
        self.__token_dict = {
        'year______'        : 'year',
        'month_____'        : 'month',
        'day_______'        : 'day',
        'minute____'        : 'minute',
        'revis-time'        : 'revision-time',
        'num-of-wor'        : 'number-of-words',
        'num-of-chr'        : 'number-of-characters',
        'num-of-pag'        : 'number-of-pages',
        }
00082     def __before_info_table_func(self, line):
        """
        Required:
            line -- the line to parse
        Returns:
            nothing
        Logic:
            Check for the beginning of the informatin table. When found, set
            the state to the information table. Always write the line.
        """
        if self.__token_info == 'mi<mk<doc-in-beg':
            self.__state = 'in_info_table'
        self.__write_obj.write(line)
00095     def __in_info_table_func(self, line):
        """
        Requires:
            line -- line to parse
        Returns:
            nothing.
        Logic:
            Check for the end of information. If not found, check if the
            token has a special value in the info table dictionay. If it
            does, execute that function.
            Otherwise, output the line to the file.
        """
        if self.__token_info == 'mi<mk<doc-in-end':
            self.__state = 'after_info_table'
        else:
            action, tag = self.__info_table_dict.get(self.__token_info, (None, None))
            if action:
                action(line, tag)
            else:
                self.__write_obj.write(line)
00115     def __found_tag_with_text_func(self, line, tag):
        """
        Requires:
            line -- line to parse
            tag --what kind of line
        Returns:
            nothing
        Logic:
            This function marks the beginning of informatin fields that have
            text that must be collected.  Set the type of information field
            with the tag option. Set the state to collecting text
        """
        self.__tag = tag
        self.__state = 'collect_text'
00129     def __collect_text_func(self, line):
        """
        Requires:
            line -- line to parse
        Returns:
            nothing
        Logic:
            If the end of the information field is found, write the text
            string to the file.
            Otherwise, if the line contains text, add it to the text string.
        """
        if self.__token_info == 'mi<mk<docinf-end':
            self.__state = 'in_info_table'
            self.__write_obj.write(
                'mi<tg<open______<%s\n'
                'tx<nu<__________<%s\n'
                'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
            )
            self.__text_string = ''
        elif line[0:2] == 'tx':
            self.__text_string += line[17:-1]
00150     def __found_tag_with_tokens_func(self, line, tag):
        """
        Requires:
            line -- line to parse
            tag -- type of field
        Returns:
            nothing
        Logic:
            Some fields have a series of tokens (cw<di<year______<nu<2003)
            that must be parsed as attributes for the element.
            Set the state to collect tokesn, and set the text string to
            start an empty element with attributes.
        """
        self.__state = 'collect_tokens'
        self.__text_string = 'mi<tg<empty-att_<%s' % tag
        #mi<tg<empty-att_<page-definition<margin>33\n
00166     def __collect_tokens_func(self, line):
        """
        Requires:
            line -- line to parse
        Returns:
            nothing
        Logic:
            This function collects all the token information and adds it to
            the text string until the end of the field is found.
            First check of the end of the information field. If found, write
            the text string to the file.
            If not found, get the relevant information from the text string.
            This information cannot be directly added to the text string,
            because it exists in abbreviated form.  (num-of-wor)
            I want to check this information in a dictionary to convert it
            to a longer, readable form. If the key does not exist in the
            dictionary, print out an error message. Otherise add the value
            to the text string.
            (num-of-wor => number-of-words)
        """
        #cw<di<year______<nu<2003
        if self.__token_info == 'mi<mk<docinf-end':
            self.__state = 'in_info_table'
            self.__write_obj.write(
            '%s\n' % self.__text_string
            )
            self.__text_string = ''
        else:
            att = line[6:16]
            value = line[20:-1]
            att_changed = self.__token_dict.get(att)
            if att_changed == None:
                if self.__run_level > 3:
                    msg = 'no dictionary match for %s\n' % att
                    raise self.__bug_handler, msg
            else:
                self.__text_string += '<%s>%s' % (att_changed, value)
    def __single_field_func(self, line, tag):
        value = line[20:-1]
        self.__write_obj.write(
        'mi<tg<empty-att_<%s'
        '<%s>%s\n' % (tag, tag, value)
        )
00209     def __after_info_table_func(self, line):
        """
        Requires:
            line --line to write to file
        Returns:
            nothing
        Logic:
            After the end of the information table, simple write the line to
            the file.
        """
        self.__write_obj.write(line)
00220     def fix_info(self):
        """
        Requires:
            nothing
        Returns:
            nothing (changes the original file)
        Logic:
            Read one line in at a time. Determine what action to take based on
            the state. If the state is before the information table, look for the
            beginning of the style table.
            If the state is in the information table, use other methods to
            parse the information
            style table, look for lines with style info, and substitute the
            number with the name of the style.  If the state if afer the
            information table, simply write the line to the output file.
        """
        self.__initiate_values()
        read_obj = open(self.__file, 'r')
        self.__write_obj = open(self.__write_to, 'w')
        line_to_read = 1
        while line_to_read:
            line_to_read = read_obj.readline()
            line = line_to_read
            self.__token_info = line[:16]
            action = self.__state_dict.get(self.__state)
            if action == None:
                sys.stderr.write('no no matching state in module styles.py\n')
                sys.stderr.write(self.__state + '\n')
            action(line)
        read_obj.close()
        self.__write_obj.close()
        copy_obj = copy.Copy(bug_handler = self.__bug_handler)
        if self.__copy:
            copy_obj.copy_file(self.__write_to, "info.data")
        copy_obj.rename(self.__write_to, self.__file)
        os.remove(self.__write_to)

Generated by  Doxygen 1.6.0   Back to index