Changeset 548:755b538cb146
- Timestamp:
- 2006-10-14 01:48:43 (4 years ago)
- Author:
- Stefan Schwarzer <sschwarzer@…>
- Branch:
- add_stat_caching
- convert_revision:
- svn:778c30c8-61e0-0310-89d4-fe2f97a467b2/branches/add_stat_caching@571
- Message:
-
Let ftputil use the stat cache in ftp_stat_cache.py. With the script
sandbox/list_dir_test.py, this gives a reduction of the running time
to about a tenth!
Warning: The code isn't production-ready yet. For example, the cache
code isn't thouroughly tested and has no expiration strategy at all.
- Files:
-
Legend:
- Unmodified
- Added
- Removed
-
|
r539
|
r548
|
|
| 325 | 325 | """Test non-switching of parser format with `PermanentError`.""" |
| 326 | 326 | self.assertEqual(self.stat._allow_parser_switching, True) |
| 327 | | # if there's a `PermanentError`, don't switch because we |
| 328 | | # don't know if the file was missed due to a wrong parser |
| | 327 | # with these directory contents, we get a `ParserError` for |
| | 328 | # the Unix parser, so `_allow_parser_switching` can be |
| | 329 | # switched off no matter whether we got a `PermanentError` |
| | 330 | # or not |
| 329 | 331 | self.assertRaises(ftp_error.PermanentError, self.stat.lstat, |
| 330 | 332 | "/home/msformat/nonexistent") |
| 331 | | self.assertEqual(self.stat._allow_parser_switching, True) |
| | 333 | self.assertEqual(self.stat._allow_parser_switching, False) |
| 332 | 334 | |
| 333 | 335 | def test_parser_switching_default_to_unix(self): |
-
|
r539
|
r548
|
|
| 122 | 122 | def getmtime(self, file_name): |
| 123 | 123 | return self._mtime |
| | 124 | def abspath(self, path): |
| | 125 | return "/home/sschwarzer/_ftputil_sync_" |
| 124 | 126 | # needed for `isdir` in `FTPHost.remove` |
| 125 | 127 | def isfile(self, path): |
-
|
r540
|
r548
|
|
| 41 | 41 | |
| 42 | 42 | import ftp_error |
| | 43 | import ftp_stat_cache |
| 43 | 44 | |
| 44 | 45 | |
| … |
… |
|
| 292 | 293 | # doesn't work |
| 293 | 294 | self._allow_parser_switching = True |
| | 295 | # cache only lstat results; `stat` works locally on `lstat` results |
| | 296 | self._lstat_cache = ftp_stat_cache.StatCache() |
| 294 | 297 | |
| 295 | 298 | def _host_dir(self, path): |
| … |
… |
|
| 331 | 334 | return names |
| 332 | 335 | |
| 333 | | def _stat_candidates(self, lines, wanted_name): |
| 334 | | """Return candidate lines for further analysis.""" |
| 335 | | # return only lines that contain the name of the file to stat |
| 336 | | # (however, the string may be _anywhere_ on the line but not |
| 337 | | # necessarily the file's basename; e. g. the string could |
| 338 | | # occur as the name of the file's group) |
| 339 | | return [line for line in lines if wanted_name in line] |
| 340 | | |
| 341 | 336 | def _real_lstat(self, path, _exception_for_missing_path=True): |
| 342 | 337 | """ |
| … |
… |
|
| 353 | 348 | _not_ intended for use by ftputil clients.) |
| 354 | 349 | """ |
| | 350 | path = self._path.abspath(path) |
| | 351 | # if the path is in the cache, return the lstat result |
| | 352 | if path in self._lstat_cache: |
| | 353 | return self._lstat_cache[path] |
| 355 | 354 | # get output from FTP's `DIR` command |
| 356 | 355 | lines = [] |
| 357 | | path = self._path.abspath(path) |
| 358 | 356 | # Note: (l)stat works by going one directory up and parsing |
| 359 | 357 | # the output of an FTP `DIR` command. Unfortunately, it is |
| … |
… |
|
| 361 | 359 | if path == '/': |
| 362 | 360 | raise ftp_error.RootDirError( |
| 363 | | "can't invoke stat for remote root directory") |
| | 361 | "can't stat remote root directory") |
| 364 | 362 | dirname, basename = self._path.split(path) |
| | 363 | lstat_result_for_path = None |
| | 364 | # loop through all lines of the directory listing; we |
| | 365 | # probably won't need all lines for the particular path but |
| | 366 | # we want to collect as many stat results in the cache as |
| | 367 | # possible |
| 365 | 368 | lines = self._host_dir(dirname) |
| 366 | | # search for name to be stat'ed without parsing the whole |
| 367 | | # directory listing |
| 368 | | candidates = self._stat_candidates(lines, basename) |
| 369 | | # parse candidates; return the first stat result where the |
| 370 | | # calculated name matches the previously determined |
| 371 | | # basename |
| 372 | | for line in candidates: |
| | 369 | for line in lines: |
| 373 | 370 | try: |
| 374 | 371 | stat_result = self._parser.parse_line(line, |
| 375 | 372 | self._host.time_shift()) |
| | 373 | loop_path = self._path.join(dirname, stat_result._st_name) |
| | 374 | self._lstat_cache[loop_path] = stat_result |
| | 375 | # needed to work without or with disabled cache |
| 376 | 376 | if stat_result._st_name == basename: |
| 377 | | return stat_result |
| | 377 | lstat_result_for_path = stat_result |
| 378 | 378 | except ftp_error.ParserError: |
| 379 | 379 | # ignore things like "total 17", as found in some |
| … |
… |
|
| 381 | 381 | if not line.lower().startswith("total"): |
| 382 | 382 | raise |
| | 383 | if lstat_result_for_path: |
| | 384 | return lstat_result_for_path |
| 383 | 385 | # path was not found |
| 384 | 386 | if _exception_for_missing_path: |
| … |
… |
|
| 426 | 428 | path = self._path.normpath(path) |
| 427 | 429 | # check for cyclic structure |
| 428 | | if visited_paths.has_key(path): |
| | 430 | if path in visited_paths: |
| 429 | 431 | # we had this path already |
| 430 | 432 | raise ftp_error.PermanentError( |
-
|
r544
|
r548
|
|
| 45 | 45 | self._cache = {} |
| 46 | 46 | self._debug = False |
| 47 | | self.enabled = False |
| | 47 | self.enabled = True |
| 48 | 48 | |
| 49 | 49 | def _debug_output(self, text): |
-
|
r543
|
r548
|
|
| 138 | 138 | # lstat, stat, listdir services |
| 139 | 139 | self._stat = ftp_stat._Stat(self) |
| | 140 | self.stat_cache = self._stat._lstat_cache |
| 140 | 141 | # save (cache) current directory |
| 141 | 142 | self._current_dir = ftp_error._try_with_oserror(self._session.pwd) |
| … |
… |
|
| 499 | 500 | # use "old" current dir |
| 500 | 501 | self._current_dir, path)) |
| 501 | | #self._current_dir = ftp_error._try_with_oserror(self._session.pwd) |
| 502 | 502 | |
| 503 | 503 | def mkdir(self, path, mode=None): |
| … |
… |
|
| 542 | 542 | is no longer supported. |
| 543 | 543 | """ |
| | 544 | path = self.path.abspath(path) |
| 544 | 545 | if self.listdir(path): |
| 545 | | path = self.path.abspath(path) |
| 546 | 546 | raise ftp_error.PermanentError("directory '%s' not empty" % path) |
| 547 | 547 | #XXX how will `rmd` work with links? |
| 548 | 548 | ftp_error._try_with_oserror(self._session.rmd, path) |
| | 549 | self.stat_cache.invalidate(path) |
| 549 | 550 | |
| 550 | 551 | def remove(self, path): |
| 551 | 552 | """Remove the given file or link.""" |
| | 553 | path = self.path.abspath(path) |
| 552 | 554 | # though `isfile` includes also links to files, `islink` |
| 553 | 555 | # is needed to include links to directories |
| … |
… |
|
| 557 | 559 | raise ftp_error.PermanentError("remove/unlink can only delete " |
| 558 | 560 | "files and links, not directories") |
| | 561 | self.stat_cache.invalidate(path) |
| 559 | 562 | |
| 560 | 563 | def unlink(self, path): |