root/trunk/ftp_stat.py

Revision 749, 23.0 kB (checked in by schwa, 3 weeks ago)
Dealt with warnings/errors in production code mentioned by pylint.
  • Property svn:mime-type set to text/x-python
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1 # Copyright (C) 2002-2008, Stefan Schwarzer
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 # - Redistributions of source code must retain the above copyright
9 #   notice, this list of conditions and the following disclaimer.
10 #
11 # - Redistributions in binary form must reproduce the above copyright
12 #   notice, this list of conditions and the following disclaimer in the
13 #   documentation and/or other materials provided with the distribution.
14 #
15 # - Neither the name of the above author nor the names of the
16 #   contributors to the software may be used to endorse or promote
17 #   products derived from this software without specific prior written
18 #   permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
24 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32 """
33 ftp_stat.py - stat result, parsers, and FTP stat'ing for `ftputil`
34 """
35
36 # $Id$
37
38 import re
39 import stat
40 import time
41
42 import ftp_error
43 import ftp_stat_cache
44
45
46 class StatResult(tuple):
47     """
48     Support class resembling a tuple like that returned from
49     `os.(l)stat`.
50     """
51
52     _index_mapping = {
53       'st_mode':  0, 'st_ino':   1, 'st_dev':    2, 'st_nlink':    3,
54       'st_uid':   4, 'st_gid':   5, 'st_size':   6, 'st_atime':    7,
55       'st_mtime': 8, 'st_ctime': 9, '_st_name': 10, '_st_target': 11}
56
57     def __init__(self, sequence):
58         super(StatResult, self).__init__(sequence)
59         # these may be overwritten in a `Parser.parse_line` method
60         self._st_name = ""
61         self._st_target = None
62
63     def __getattr__(self, attr_name):
64         if self._index_mapping.has_key(attr_name):
65             return self[self._index_mapping[attr_name]]
66         else:
67             raise AttributeError("'StatResult' object has no attribute '%s'" %
68                                  attr_name)
69
70 #
71 # FTP directory parsers
72 #
73 class Parser(object):
74     """
75     Represent a parser for directory lines. Parsers for specific
76     directory formats inherit from this class.
77     """
78
79     # map month abbreviations to month numbers
80     _month_numbers = {
81       'jan':  1, 'feb':  2, 'mar':  3, 'apr':  4,
82       'may':  5, 'jun':  6, 'jul':  7, 'aug':  8,
83       'sep':  9, 'oct': 10, 'nov': 11, 'dec': 12}
84
85     _total_regex = re.compile(r"^total\s+\d+")
86
87     def ignores_line(self, line):
88         """
89         Return a true value if the line should be ignored, i. e. is
90         assumed to _not_ contain actual directory/file/link data.
91         A typical example are summary lines like "total 23" which
92         are emitted by some FTP servers.
93
94         If the line should be used to extract stat data from it,
95         return a false value.
96         """
97         # either a `_SRE_Match` instance or `None`
98         match = self._total_regex.search(line)
99         return bool(match)
100
101     def parse_line(self, line, time_shift=0.0):
102         """
103         Return a `StatResult` object as derived from the string
104         `line`. The parser code to use depends on the directory format
105         the FTP server delivers (also see examples at end of file).
106
107         If the given text line can't be parsed, raise a `ParserError`.
108
109         For the definition of `time_shift` see the docstring of
110         `FTPHost.set_time_shift` in `ftputil.py`. Not all parsers
111         use the `time_shift` parameter.
112         """
113         raise NotImplementedError("must be defined by subclass")
114
115     #
116     # helper methods for parts of a directory listing line
117     #
118     def parse_unix_mode(self, mode_string):
119         """
120         Return an integer from the `mode_string`, compatible with
121         the `st_mode` value in stat results. Such a mode string
122         may look like "drwxr-xr-x".
123
124         If the mode string can't be parsed, raise an
125         `ftp_error.ParserError`.
126         """
127         st_mode = 0
128         if len(mode_string) != 10:
129             raise ftp_error.ParserError("invalid mode string '%s'" %
130                                         mode_string)
131         for bit in mode_string[1:10]:
132             bit = (bit != '-')
133             st_mode = (st_mode << 1) + bit
134         if mode_string[3] == 's':
135             st_mode = st_mode | stat.S_ISUID
136         if mode_string[6] == 's':
137             st_mode = st_mode | stat.S_ISGID
138         file_type_to_mode = {'d': stat.S_IFDIR, 'l': stat.S_IFLNK,
139                              'c': stat.S_IFCHR, '-': stat.S_IFREG}
140         file_type = mode_string[0]
141         if file_type in file_type_to_mode:
142             st_mode = st_mode | file_type_to_mode[file_type]
143         else:
144             raise ftp_error.ParserError(
145                   "unknown file type character '%s'" % file_type)
146         return st_mode
147
148     def parse_unix_time(self, month_abbreviation, day, year_or_time,
149                         time_shift):
150         """
151         Return a floating point number, like from `time.mktime`, by
152         parsing the string arguments `month_abbreviation`, `day` and
153         `year_or_time`. The parameter `time_shift` is the difference
154         "time on server" - "time on client" and is available as the
155         `time_shift` parameter in the `parse_line` interface.
156
157         Times in Unix-style directory listings typically have one of
158         these formats:
159
160         - "Nov 23 02:33" (month name, day of month, time)
161
162         - "May 26  2005" (month name, day of month, year)
163
164         If this method can not make sense of the given arguments, it
165         raises an `ftp_error.ParserError`.
166         """
167         try:
168             month = self._month_numbers[month_abbreviation.lower()]
169         except KeyError:
170             raise ftp_error.ParserError("invalid month name '%s'" % month)
171         day = int(day)
172         if ":" not in year_or_time:
173             # `year_or_time` is really a year
174             year, hour, minute = int(year_or_time), 0, 0
175             st_mtime = time.mktime( (year, month, day,
176                                      hour, minute, 0, 0, 0, -1) )
177         else:
178             # `year_or_time` is a time hh:mm
179             hour, minute = year_or_time.split(':')
180             year, hour, minute = None, int(hour), int(minute)
181             # try the current year
182             year = time.localtime()[0]
183             st_mtime = time.mktime( (year, month, day,
184                                      hour, minute, 0, 0, 0, -1) )
185             # rhs of comparison: transform client time to server time
186             #  (as on the lhs), so both can be compared with respect
187             #  to the set time shift (see the definition of the time
188             #  shift in `FTPHost.set_time_shift`'s docstring); the
189             #  last addend allows for small deviations between the
190             #  supposed (rounded) and the actual time shift
191             # #XXX the downside of this "correction" is that there is
192             #  a one-minute time interval exactly one year ago that
193             #  may cause that datetime to be recognized as the current
194             #  datetime, but after all the datetime from the server
195             #  can only be exact up to a minute
196             if st_mtime > time.time() + time_shift + 60.0:
197                 # if it's in the future, use previous year
198                 st_mtime = time.mktime( (year-1, month, day,
199                                          hour, minute, 0, 0, 0, -1) )
200         return st_mtime
201
202     def parse_ms_time(self, date, time_, time_shift):
203         """
204         Return a floating point number, like from `time.mktime`, by
205         parsing the string arguments `date` and `time_`. The parameter
206         `time_shift` is the difference
207
208             "time on server" - "time on client"
209
210         and can be set as the `time_shift` parameter in the
211         `parse_line` interface.
212
213         Times in MS-style directory listings typically have the
214         format "10-23-01 03:25PM" (month-day_of_month-two_digit_year,
215         hour:minute, am/pm).
216
217         If this method can not make sense of the given arguments, it
218         raises an `ftp_error.ParserError`.
219         """
220         # don't complain about unused `time_shift` argument
221         # pylint: disable-msg=W0613
222         try:
223             month, day, year = [int(part) for part in date.split('-')]
224             if year >= 70:
225                 year = 1900 + year
226             else:
227                 year = 2000 + year
228             hour, minute, am_pm = time_[0:2], time_[3:5], time_[5]
229             hour, minute = int(hour), int(minute)
230         except (ValueError, IndexError):
231             raise ftp_error.ParserError("invalid time string '%s'" % time_)
232         if am_pm == 'P':
233             hour = hour + 12
234         st_mtime = time.mktime( (year, month, day,
235                                  hour, minute, 0, 0, 0, -1) )
236         return st_mtime
237
238
239 class UnixParser(Parser):
240     """`Parser` class for Unix-specific directory format."""
241
242     def _split_line(self, line):
243         """
244         Split a line in metadata, nlink, user, group, size, month,
245         day, year_or_time and name and return the result as an
246         nine-element list of these values.
247         """
248         # This method encapsulates the recognition of an unusual
249         #  Unix format variant (see ticket
250         #  http://ftputil.sschwarzer.net/trac/ticket/12 )
251         parts = line.split(None, 8)
252         if len(parts) == 9:
253             if parts[-1].startswith("-> "):
254                 # for the alternative format, the last part will not be
255                 #  "link_name -> link_target" but "-> link_target" and the
256                 #  link name will be in the previous field;
257                 # this heuristic will fail for names starting with "-> "
258                 #  which should be _quite_ rare
259                 # insert `None` for the user field
260                 parts.insert(2, None)
261                 parts[-2] = "%s %s" % tuple(parts[-2:])
262                 del parts[-1]
263             return parts
264         elif len(parts) == 8:
265             # alternative unusual format, insert `None` for the user field
266             parts.insert(2, None)
267             return parts
268         else:
269             # no known Unix-style format
270             raise ftp_error.ParserError("line '%s' can't be parsed" % line)
271
272     def parse_line(self, line, time_shift=0.0):
273         """
274         Return a `StatResult` instance corresponding to the given
275         text line. The `time_shift` value is needed to determine
276         to which year a datetime without an explicit year belongs.
277
278         If the line can't be parsed, raise a `ParserError`.
279         """
280         mode_string, nlink, user, group, size, month, day, \
281           year_or_time, name = self._split_line(line)
282         # st_mode
283         st_mode = self.parse_unix_mode(mode_string)
284         # st_ino, st_dev, st_nlink, st_uid, st_gid, st_size, st_atime
285         st_ino = None
286         st_dev = None
287         st_nlink = int(nlink)
288         st_uid = user
289         st_gid = group
290         st_size = int(size)
291         st_atime = None
292         # st_mtime
293         st_mtime = self.parse_unix_time(month, day, year_or_time, time_shift)
294         # st_ctime
295         st_ctime = None
296         # st_name
297         if " -> " in name:
298             st_name, st_target = name.split(' -> ')
299         else:
300             st_name, st_target = name, None
301         stat_result = StatResult(
302                       (st_mode, st_ino, st_dev, st_nlink, st_uid,
303                        st_gid, st_size, st_atime, st_mtime, st_ctime) )
304         stat_result._st_name = st_name
305         stat_result._st_target = st_target
306         return stat_result
307
308
309 class MSParser(Parser):
310     """`Parser` class for MS-specific directory format."""
311
312     def parse_line(self, line, time_shift=0.0):
313         """
314         Return a `StatResult` instance corresponding to the given
315         text line from a FTP server which emits "Microsoft format"
316         (see end of file).
317
318         If the line can't be parsed, raise a `ParserError`.
319
320         The parameter `time_shift` isn't used in this method but is
321         listed for compatibilty with the base class.
322         """
323         try:
324             date, time_, dir_or_size, name = line.split(None, 3)
325         except ValueError:
326             # "unpack list of wrong size"
327             raise ftp_error.ParserError("line '%s' can't be parsed" % line )
328         # st_mode
329         #  default to read access only; in fact, we can't tell
330         st_mode = 0400
331         if dir_or_size == "<DIR>":
332             st_mode = st_mode | stat.S_IFDIR
333         else:
334             st_mode = st_mode | stat.S_IFREG
335         # st_ino, st_dev, st_nlink, st_uid, st_gid
336         st_ino = None
337         st_dev = None
338         st_nlink = None
339         st_uid = None
340         st_gid = None
341         # st_size
342         if dir_or_size != "<DIR>":
343             try:
344                 st_size = int(dir_or_size)
345             except ValueError:
346                 raise ftp_error.ParserError("invalid size %s" % dir_or_size)
347         else:
348             st_size = None
349         # st_atime
350         st_atime = None
351         # st_mtime
352         st_mtime = self.parse_ms_time(date, time_, time_shift)
353         # st_ctime
354         st_ctime = None
355         stat_result = StatResult(
356                       (st_mode, st_ino, st_dev, st_nlink, st_uid,
357                        st_gid, st_size, st_atime, st_mtime, st_ctime) )
358         # _st_name and _st_target
359         stat_result._st_name = name
360         stat_result._st_target = None
361         return stat_result
362
363 #
364 # Stat'ing operations for files on an FTP server
365 #
366 class _Stat(object):
367     """Methods for stat'ing directories, links and regular files."""
368
369     def __init__(self, host):
370         self._host = host
371         self._path = host.path
372         # use the Unix directory parser by default
373         self._parser = UnixParser()
374         # allow one chance to switch to another parser if the default
375         #  doesn't work
376         self._allow_parser_switching = True
377         # cache only lstat results; `stat` works locally on `lstat` results
378         self._lstat_cache = ftp_stat_cache.StatCache()
379
380     def _host_dir(self, path):
381         """
382         Return a list of lines, as fetched by FTP's `DIR` command,
383         when applied to `path`.
384         """
385         return self._host._dir(path)
386
387     def _real_listdir(self, path):
388         """
389         Return a list of directories, files etc. in the directory
390         named `path`.
391
392         If the directory listing from the server can't be parsed
393         raise a `ParserError`.
394         """
395         # we _can't_ put this check into `FTPHost._dir`; see its docstring
396         path = self._path.abspath(path)
397         if not self._path.isdir(path):
398             #TODO use FTP DIR command on the file to implicitly use
399             #  the usual status code of the server for missing files
400             #  (450 vs. 550)
401             raise ftp_error.PermanentError(
402                   "550 %s: no such directory or wrong directory parser used" %
403                   path)
404         # set up for loop
405         lines = self._host_dir(path)
406         # exit the method now if there aren't any files
407         if lines == ['']:
408             return []
409         names = []
410         for line in lines:
411             if self._parser.ignores_line(line):
412                 continue
413             # for `listdir`, we are interested in just the names,
414             #  but we use the `time_shift` parameter to have the
415             #  correct timestamp values in the cache
416             stat_result = self._parser.parse_line(line,
417                                                   self._host.time_shift())
418             loop_path = self._path.join(path, stat_result._st_name)
419             self._lstat_cache[loop_path] = stat_result
420             st_name = stat_result._st_name
421             if st_name not in (self._host.curdir, self._host.pardir):
422                 names.append(st_name)
423         return names
424
425     def _real_lstat(self, path, _exception_for_missing_path=True):
426         """
427         Return an object similar to that returned by `os.lstat`.
428
429         If the directory listing from the server can't be parsed,
430         raise a `ParserError`. If the directory can be parsed and the
431         `path` is not found, raise a `PermanentError`. That means that
432         if the directory containing `path` can't be parsed we get a
433         `ParserError`, independent on the presence of `path` on the
434         server.
435
436         (`_exception_for_missing_path` is an implementation aid and
437         _not_ intended for use by ftputil clients.)
438         """
439         path = self._path.abspath(path)
440         # if the path is in the cache, return the lstat result
441         if path in self._lstat_cache:
442             return self._lstat_cache[path]
443         # get output from FTP's `DIR` command
444         lines = []
445         # Note: (l)stat works by going one directory up and parsing
446         #  the output of an FTP `DIR` command. Unfortunately, it is
447         #  not possible to do this for the root directory `/`.
448         if path == '/':
449             raise ftp_error.RootDirError(
450                   "can't stat remote root directory")
451         dirname, basename = self._path.split(path)
452         lstat_result_for_path = None
453         # loop through all lines of the directory listing; we
454         #  probably won't need all lines for the particular path but
455         #  we want to collect as many stat results in the cache as
456         #  possible
457         lines = self._host_dir(dirname)
458         for line in lines:
459             if self._parser.ignores_line(line):
460                 continue
461             stat_result = self._parser.parse_line(line,
462                           self._host.time_shift())
463             loop_path = self._path.join(dirname, stat_result._st_name)
464             self._lstat_cache[loop_path] = stat_result
465             # needed to work without cache or with disabled cache
466             if stat_result._st_name == basename:
467                 lstat_result_for_path = stat_result
468         if lstat_result_for_path:
469             return lstat_result_for_path
470         # path was not found
471         if _exception_for_missing_path:
472             #TODO use FTP DIR command on the file to implicitly use
473             #  the usual status code of the server for missing files
474             #  (450 vs. 550)
475             raise ftp_error.PermanentError(
476                   "550 %s: no such file or directory" % path)
477         else:
478             # be explicit; returning `None` is a signal for
479             #  `_Path.exists/isfile/isdir/islink` that the path was
480             #  not found; if we would raise an exception, there would
481             #  be no distinction between a missing path or a more
482             #  severe error in the code above
483             return None
484
485     def _real_stat(self, path, _exception_for_missing_path=True):
486         """
487         Return info from a "stat" call on `path`.
488
489         If the directory containing `path` can't be parsed, raise
490         a `ParserError`. If the listing can be parsed but the
491         `path` can't be found, raise a `PermanentError`. Also raise
492         a `PermanentError` if there's an endless (cyclic) chain of
493         symbolic links "behind" the `path`.
494
495         (`_exception_for_missing_path` is an implementation aid and
496         _not_ intended for use by ftputil clients.)
497         """
498         # save for error message
499         original_path = path
500         # most code in this method is used to detect recursive
501         #  link structures
502         visited_paths = {}
503         while True:
504             # stat the link if it is one, else the file/directory
505             lstat_result = self._real_lstat(path, _exception_for_missing_path)
506             if lstat_result is None:
507                 return None
508             # if the file is not a link, the `stat` result is the
509             #  same as the `lstat` result
510             if not stat.S_ISLNK(lstat_result.st_mode):
511                 return lstat_result
512             # if we stat'ed a link, calculate a normalized path for
513             #  the file the link points to
514             # we don't use `basename`
515             # pylint: disable-msg=W0612
516             dirname, basename = self._path.split(path)
517             path = self._path.join(dirname, lstat_result._st_target)
518             path = self._path.normpath(path)
519             # check for cyclic structure
520             if path in visited_paths:
521                 # we had this path already
522                 raise ftp_error.PermanentError(
523                       "recursive link structure detected for remote path '%s'" %
524                       original_path)
525             # remember the path we have encountered
526             visited_paths[path] = True
527
528     def __call_with_parser_retry(self, method, *args, **kwargs):
529         """
530         Call `method` with the `args` and `kwargs` once. If that
531         results in a `ParserError` and only one parser has been
532         used yet, try the other parser. If that still fails,
533         propagate the `ParserError`.
534         """
535         # Do _not_ set `_allow_parser_switching` in a `finally` clause!
536         #  This would cause a `PermanentError` due to a not-found
537         #  file in an empty directory to finally establish the
538         #  parser - which is wrong.
539         try:
540             result = method(*args, **kwargs)
541             # if a `listdir` call didn't find anything, we can't
542             #  say anything about the usefulness of the parser
543             if (method is not self._real_listdir) and result:
544                 self._allow_parser_switching = False
545             return result
546         except ftp_error.ParserError:
547             if self._allow_parser_switching:
548                 self._allow_parser_switching = False
549                 self._parser = MSParser()
550                 return method(*args, **kwargs)
551             else:
552                 raise
553
554     def listdir(self, path):
555         """
556         Return a list of items in `path`.
557         
558         Raise a `PermanentError` if the path doesn't exist, but
559         maybe raise other exceptions depending on the state of
560         the server (e. g. timeout).
561         """
562         return self.__call_with_parser_retry(self._real_listdir, path)
563
564     def lstat(self, path, _exception_for_missing_path=True):
565         """
566         Return a `StatResult` without following links.
567
568         Raise a `PermanentError` if the path doesn't exist, but
569         maybe raise other exceptions depending on the state of
570         the server (e. g. timeout).
571         """
572         return self.__call_with_parser_retry(self._real_lstat, path,
573                                              _exception_for_missing_path)
574
575     def stat(self, path, _exception_for_missing_path=True):
576         """
577         Return a `StatResult` with following links.
578
579         Raise a `PermanentError` if the path doesn't exist, but
580         maybe raise other exceptions depending on the state of
581         the server (e. g. timeout).
582         """
583         return self.__call_with_parser_retry(self._real_stat, path,
584                                              _exception_for_missing_path)
585
Note: See TracBrowser for help on using the browser.