Changeset 548:755b538cb146

Show
Ignore:
Timestamp:
2006-10-14 01:48:43 (4 years ago)
Author:
Stefan Schwarzer <sschwarzer@…>
Branch:
add_stat_caching
convert_revision:
svn:778c30c8-61e0-0310-89d4-fe2f97a467b2/branches/add_stat_caching@571
Message:
Let ftputil use the stat cache in ftp_stat_cache.py. With the script
sandbox/list_dir_test.py, this gives a reduction of the running time
to about a tenth!

Warning: The code isn't production-ready yet. For example, the cache
code isn't thouroughly tested and has no expiration strategy at all.
Files:
5 modified

Legend:

Unmodified
Added
Removed
  • _test_ftp_stat.py

    r539 r548  
    325325        """Test non-switching of parser format with `PermanentError`.""" 
    326326        self.assertEqual(self.stat._allow_parser_switching, True) 
    327         # if there's a `PermanentError`, don't switch because we 
    328         #  don't know if the file was missed due to a wrong parser 
     327        # with these directory contents, we get a `ParserError` for 
     328        #  the Unix parser, so `_allow_parser_switching` can be 
     329        #  switched off no matter whether we got a `PermanentError` 
     330        #  or not 
    329331        self.assertRaises(ftp_error.PermanentError, self.stat.lstat, 
    330332                          "/home/msformat/nonexistent") 
    331         self.assertEqual(self.stat._allow_parser_switching, True) 
     333        self.assertEqual(self.stat._allow_parser_switching, False) 
    332334 
    333335    def test_parser_switching_default_to_unix(self): 
  • _test_ftputil.py

    r539 r548  
    122122        def getmtime(self, file_name): 
    123123            return self._mtime 
     124        def abspath(self, path): 
     125            return "/home/sschwarzer/_ftputil_sync_" 
    124126        # needed for `isdir` in `FTPHost.remove` 
    125127        def isfile(self, path): 
  • ftp_stat.py

    r540 r548  
    4141 
    4242import ftp_error 
     43import ftp_stat_cache 
    4344 
    4445 
     
    292293        #  doesn't work 
    293294        self._allow_parser_switching = True 
     295        # cache only lstat results; `stat` works locally on `lstat` results 
     296        self._lstat_cache = ftp_stat_cache.StatCache() 
    294297 
    295298    def _host_dir(self, path): 
     
    331334        return names 
    332335 
    333     def _stat_candidates(self, lines, wanted_name): 
    334         """Return candidate lines for further analysis.""" 
    335         # return only lines that contain the name of the file to stat 
    336         #  (however, the string may be _anywhere_ on the line but not 
    337         #  necessarily the file's basename; e. g. the string could 
    338         #  occur as the name of the file's group) 
    339         return [line  for line in lines  if wanted_name in line] 
    340  
    341336    def _real_lstat(self, path, _exception_for_missing_path=True): 
    342337        """ 
     
    353348        _not_ intended for use by ftputil clients.) 
    354349        """ 
     350        path = self._path.abspath(path) 
     351        # if the path is in the cache, return the lstat result 
     352        if path in self._lstat_cache: 
     353            return self._lstat_cache[path] 
    355354        # get output from FTP's `DIR` command 
    356355        lines = [] 
    357         path = self._path.abspath(path) 
    358356        # Note: (l)stat works by going one directory up and parsing 
    359357        #  the output of an FTP `DIR` command. Unfortunately, it is 
     
    361359        if path == '/': 
    362360            raise ftp_error.RootDirError( 
    363                   "can't invoke stat for remote root directory") 
     361                  "can't stat remote root directory") 
    364362        dirname, basename = self._path.split(path) 
     363        lstat_result_for_path = None 
     364        # loop through all lines of the directory listing; we 
     365        #  probably won't need all lines for the particular path but 
     366        #  we want to collect as many stat results in the cache as 
     367        #  possible 
    365368        lines = self._host_dir(dirname) 
    366         # search for name to be stat'ed without parsing the whole 
    367         #  directory listing 
    368         candidates = self._stat_candidates(lines, basename) 
    369         # parse candidates; return the first stat result where the 
    370         #  calculated name matches the previously determined 
    371         #  basename 
    372         for line in candidates: 
     369        for line in lines: 
    373370            try: 
    374371                stat_result = self._parser.parse_line(line, 
    375372                              self._host.time_shift()) 
     373                loop_path = self._path.join(dirname, stat_result._st_name) 
     374                self._lstat_cache[loop_path] = stat_result 
     375                # needed to work without or with disabled cache 
    376376                if stat_result._st_name == basename: 
    377                     return stat_result 
     377                    lstat_result_for_path = stat_result 
    378378            except ftp_error.ParserError: 
    379379                # ignore things like "total 17", as found in some 
     
    381381                if not line.lower().startswith("total"): 
    382382                    raise 
     383        if lstat_result_for_path: 
     384            return lstat_result_for_path 
    383385        # path was not found 
    384386        if _exception_for_missing_path: 
     
    426428            path = self._path.normpath(path) 
    427429            # check for cyclic structure 
    428             if visited_paths.has_key(path): 
     430            if path in visited_paths: 
    429431                # we had this path already 
    430432                raise ftp_error.PermanentError( 
  • ftp_stat_cache.py

    r544 r548  
    4545        self._cache = {} 
    4646        self._debug = False 
    47         self.enabled = False 
     47        self.enabled = True 
    4848 
    4949    def _debug_output(self, text): 
  • ftputil.py

    r543 r548  
    138138        # lstat, stat, listdir services 
    139139        self._stat = ftp_stat._Stat(self) 
     140        self.stat_cache = self._stat._lstat_cache 
    140141        # save (cache) current directory 
    141142        self._current_dir = ftp_error._try_with_oserror(self._session.pwd) 
     
    499500                                               # use "old" current dir 
    500501                                               self._current_dir, path)) 
    501         #self._current_dir = ftp_error._try_with_oserror(self._session.pwd) 
    502502 
    503503    def mkdir(self, path, mode=None): 
     
    542542        is no longer supported. 
    543543        """ 
     544        path = self.path.abspath(path) 
    544545        if self.listdir(path): 
    545             path = self.path.abspath(path) 
    546546            raise ftp_error.PermanentError("directory '%s' not empty" % path) 
    547547        #XXX how will `rmd` work with links? 
    548548        ftp_error._try_with_oserror(self._session.rmd, path) 
     549        self.stat_cache.invalidate(path) 
    549550 
    550551    def remove(self, path): 
    551552        """Remove the given file or link.""" 
     553        path = self.path.abspath(path) 
    552554        # though `isfile` includes also links to files, `islink` 
    553555        #  is needed to include links to directories 
     
    557559            raise ftp_error.PermanentError("remove/unlink can only delete " 
    558560                                           "files and links, not directories") 
     561        self.stat_cache.invalidate(path) 
    559562 
    560563    def unlink(self, path):