Logo Search packages:      
Sourcecode: calibre version File versions

tuple calibre::_filename_sanitize

Initial value:

re.compile(r'[\xae\0\\|\?\*<":>\+/]')def sanitize_file_name(name, substitute='_', as_unicode=False):    '''    Sanitize the filename `name`. All invalid characters are replaced by `substitute`.    The set of invalid characters is the union of the invalid characters in Windows,    OS X and Linux. Also removes leading and trailing whitespace.    **WARNING:** This function also replaces path separators, so only pass file names    and not full paths to it.    *NOTE:* This function always returns byte strings, not unicode objects. The byte strings    are encoded in the filesystem encoding of the platform, or UTF-8.    '''    if isinstance(name, unicode):        name = name.encode(filesystem_encoding, 'ignore')    one = _filename_sanitize.sub(substitute, name)    one = re.sub(r'\s', ' ', one).strip()    one = re.sub(r'^\.+$', '_', one)    if as_unicode:        one = one.decode(filesystem_encoding)    one = one.replace('..', substitute)    return onedef prints(*args, **kwargs):    '''    Print unicode arguments safely by encoding them to preferred_encoding    Has the same signature as the print function from Python 3, except for the    additional keyword argument safe_encode, which if set to True will cause the    function to use repr when encoding fails.    '''    file = kwargs.get('file', sys.stdout)    sep  = kwargs.get('sep', ' ')    end  = kwargs.get('end', '\n')    enc = preferred_encoding    safe_encode = kwargs.get('safe_encode', False)    if 'CALIBRE_WORKER' in os.environ:        enc = 'utf-8'    for i, arg in enumerate(args):        if isinstance(arg, unicode):            try:                arg = arg.encode(enc)            except UnicodeEncodeError:                if not safe_encode:                    raise                arg = repr(arg)        if not isinstance(arg, str):            try:                arg = str(arg)            except ValueError:                arg = unicode(arg)            if isinstance(arg, unicode):                try:                    arg = arg.encode(enc)                except UnicodeEncodeError:                    if not safe_encode:                        raise                    arg = repr(arg)        file.write(arg)        if i != len(args)-1:            file.write(sep)    file.write(end)class CommandLineError(Exception):    passdef setup_cli_handlers(logger, level):    if os.environ.get('CALIBRE_WORKER', None) is not None and logger.handlers:        return    logger.setLevel(level)    if level == logging.WARNING:        handler = logging.StreamHandler(sys.stdout)        handler.setFormatter(logging.Formatter('%(levelname)s: %(message)s'))        handler.setLevel(logging.WARNING)    elif level == logging.INFO:        handler = logging.StreamHandler(sys.stdout)        handler.setFormatter(logging.Formatter())        handler.setLevel(logging.INFO)    elif level == logging.DEBUG:        handler = logging.StreamHandler(sys.stderr)        handler.setLevel(logging.DEBUG)        handler.setFormatter(logging.Formatter('[%(levelname)s] %(filename)s:%(lineno)s: %(message)s'))    logger.addHandler(handler)def load_library(name, cdll):    if iswindows:        return cdll.LoadLibrary(name)    if isosx:        name += '.dylib'        if hasattr(sys, 'frameworks_dir'):            return cdll.LoadLibrary(os.path.join(getattr(sys, 'frameworks_dir'), name))        return cdll.LoadLibrary(name)    return cdll.LoadLibrary(name+'.so')def filename_to_utf8(name):    '''Return C{name} encoded in utf8. Unhandled characters are replaced. '''    if isinstance(name, unicode):        return name.encode('utf8')    codec = 'cp1252' if iswindows else 'utf8'    return name.decode(codec, 'replace').encode('utf8')def extract(path, dir):    ext = os.path.splitext(path)[1][1:].lower()    extractor = None    if ext in ['zip', 'cbz', 'epub', 'oebzip']:        from calibre.libunzip import extract as zipextract        extractor = zipextract    elif ext in ['cbr', 'rar']:        from calibre.libunrar import extract as rarextract        extractor = rarextract    if extractor is None:        raise Exception('Unknown archive type')    extractor(path, dir)def get_proxies():    proxies = {}    for q in ('http', 'ftp'):        proxy =  os.environ.get(q+'_proxy', None)        if not proxy: continue        if proxy.startswith(q+'://'):            proxy = proxy[7:]        proxies[q] = proxy    if iswindows:        try:            winreg = __import__('_winreg')            settings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,                                      'Software\\Microsoft\\Windows'                                      '\\CurrentVersion\\Internet Settings')            proxy = winreg.QueryValueEx(settings, "ProxyEnable")[0]            if proxy:                server = str(winreg.QueryValueEx(settings, 'ProxyServer')[0])                if ';' in server:                    for p in server.split(';'):                        protocol, address = p.split('=')                        proxies[protocol] = address                else:                    proxies['http'] = server                    proxies['ftp'] =  server            settings.Close()        except Exception, e:            prints('Unable to detect proxy settings: %s' % str(e))    for x in list(proxies):        if len(proxies[x]) < 5:            prints('Removing invalid', x, 'proxy:', proxies[x])            del proxies[x]    if proxies:        prints('Using proxies: %s' % proxies)    return proxiesdef browser(honor_time=True, max_time=2, mobile_browser=False):    '''    Create a mechanize browser for web scraping. The browser handles cookies,    refresh requests and ignores robots.txt. Also uses proxy if avaialable.    :param honor_time: If True honors pause time in refresh requests    :param max_time: Maximum time in seconds to wait during a refresh request    '''    opener = mechanize.Browser()    opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)    opener.set_handle_robots(False)    opener.addheaders = [('User-agent', ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
                          'Mozilla/5.0 (X11; U; i686 Linux; en_US; rv: Gecko/20060508 Firefox/')]    http_proxy = get_proxies().get('http', None)    if http_proxy:        opener.set_proxies({'http':http_proxy})    return openerdef fit_image(width, height, pwidth, pheight):    '''    Fit image in box of width pwidth and height pheight.    @param width: Width of image    @param height: Height of image    @param pwidth: Width of box    @param pheight: Height of box    @return: scaled, new_width, new_height. scaled is True iff new_width and/or new_height is different from width or height.    '''    scaled = height > pheight or width > pwidth    if height > pheight:        corrf = pheight/float(height)        width, height = floor(corrf*width), pheight    if width > pwidth:        corrf = pwidth/float(width)        width, height = pwidth, floor(corrf*height)    if height > pheight:        corrf = pheight/float(height)        width, height = floor(corrf*width), pheight    return scaled, int(width), int(height)class CurrentDir(object):    def __init__(self, path):        self.path = path        self.cwd = None    def __enter__(self, *args):        self.cwd = os.getcwd()        os.chdir(self.path)        return self.cwd    def __exit__(self, *args):        os.chdir(self.cwd)class StreamReadWrapper(object):    '''    Used primarily with pyPdf to ensure the stream is properly closed.    '''    def __init__(self, stream):        for x in ('read', 'seek', 'tell'):            setattr(self, x, getattr(stream, x))    def __exit__(self, *args):        for x in ('read', 'seek', 'tell'):            setattr(self, x, None)    def __enter__(self):        return selfdef detect_ncpus():    """Detects the number of effective CPUs in the system"""    ans = -1    try:        ans = multiprocessing.cpu_count()    except:        from PyQt4.Qt import QThread        ans = QThread.idealThreadCount()    if ans < 1:        ans = 1    return ansdef launch(path_or_url):    if os.path.exists(path_or_url):        path_or_url = 'file:'+path_or_url    QDesktopServices.openUrl(QUrl(path_or_url))relpath = os.path.relpath_spat = re.compile(r'^the\s+|^a\s+|^an\s+', re.IGNORECASE)def english_sort(x, y):    '''    Comapare two english phrases ignoring starting prepositions.    '''    return cmp(_spat.sub('', x), _spat.sub('', y))def walk(dir):    ''' A nice interface to os.walk '''    for record in os.walk(dir):        for f in record[-1]:            yield os.path.join(record[0], f)def strftime(fmt, t=None):    ''' A version of strftime that returns unicode strings and tries to handle dates    before 1900 '''    if t is None:        t = time.localtime()    early_year = t[0] < 1900    if early_year:        fmt = fmt.replace('%Y', '_early year hack##')        t = list(t)        orig_year = t[0]        t[0] = 1900    ans = None    if iswindows:        if isinstance(fmt, unicode):            fmt = fmt.encode('mbcs')        ans = plugins['winutil'][0].strftime(fmt, t)    ans = time.strftime(fmt, t).decode(preferred_encoding, 'replace')    if early_year:        ans = ans.replace('_early year hack##', str(orig_year))    return ansdef my_unichr(num):    try:        return unichr(num)    except ValueError:        return u'?'def entity_to_unicode(match, exceptions=[], encoding='cp1252'):    '''    @param match: A match object such that '&'+match.group(1)';' is the entity.    @param exceptions: A list of entities to not convert (Each entry is the name of the entity, for e.g. 'apos' or '#1234'    @param encoding: The encoding to use to decode numeric entities between 128 and 256.    If None, the Unicode UCS encoding is used. A common encoding is cp1252.    '''    ent = match.group(1)    if ent in exceptions:        return '&'+ent+';'    if ent == 'apos':        return "'"    if ent == 'hellips':        ent = 'hellip'    if ent.startswith(u'#x'):        num = int(ent[2:], 16)        if encoding is None or num > 255:            return my_unichr(num)        return chr(num).decode(encoding)    if ent.startswith(u'#'):        try:            num = int(ent[1:])        except ValueError:            return '&'+ent+';'        if encoding is None or num > 255:            return my_unichr(num)        try:            return chr(num).decode(encoding)        except UnicodeDecodeError:            return my_unichr(num)    try:        return my_unichr(name2codepoint[ent])    except KeyError:        return '&'+ent+';'_ent_pat = re.compile(r'&(\S+?);')def prepare_string_for_xml(raw, attribute=False):    raw = _ent_pat.sub(entity_to_unicode, raw)    raw = raw.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')    if attribute:        raw = raw.replace('"', '&quot;')

Definition at line 77 of file __init__.py.

Generated by  Doxygen 1.6.0   Back to index