root/tags/release2_2/ftp_stat.py

Revision 671, 21.4 kB (checked in by schwa, 2 years ago)
Use Python 2.2 style dictionary key test.
  • Property svn:mime-type set to text/x-python
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1 # Copyright (C) 2002-2006, Stefan Schwarzer
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 # - Redistributions of source code must retain the above copyright
9 #   notice, this list of conditions and the following disclaimer.
10 #
11 # - Redistributions in binary form must reproduce the above copyright
12 #   notice, this list of conditions and the following disclaimer in the
13 #   documentation and/or other materials provided with the distribution.
14 #
15 # - Neither the name of the above author nor the names of the
16 #   contributors to the software may be used to endorse or promote
17 #   products derived from this software without specific prior written
18 #   permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
24 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32 """
33 ftp_stat.py - stat result, parsers, and FTP stat'ing for `ftputil`
34 """
35
36 # $Id$
37
38 import re
39 import stat
40 import sys
41 import time
42
43 import ftp_error
44 import ftp_stat_cache
45
46
47 class StatResult(tuple):
48     """
49     Support class resembling a tuple like that returned from
50     `os.(l)stat`.
51     """
52     _index_mapping = {
53       'st_mode':  0, 'st_ino':   1, 'st_dev':    2, 'st_nlink':    3,
54       'st_uid':   4, 'st_gid':   5, 'st_size':   6, 'st_atime':    7,
55       'st_mtime': 8, 'st_ctime': 9, '_st_name': 10, '_st_target': 11}
56
57     def __getattr__(self, attr_name):
58         if self._index_mapping.has_key(attr_name):
59             return self[self._index_mapping[attr_name]]
60         else:
61             raise AttributeError("'StatResult' object has no attribute '%s'" %
62                                  attr_name)
63
64 #
65 # FTP directory parsers
66 #
67 class Parser(object):
68     # map month abbreviations to month numbers
69     _month_numbers = {
70       'jan':  1, 'feb':  2, 'mar':  3, 'apr':  4,
71       'may':  5, 'jun':  6, 'jul':  7, 'aug':  8,
72       'sep':  9, 'oct': 10, 'nov': 11, 'dec': 12}
73
74     _total_regex = re.compile(r"^total\s+\d+")
75
76     def ignores_line(self, line):
77         """
78         Return a true value if the line should be ignored, i. e. is
79         assumed to _not_ contain actual directory/file/link data.
80         A typical example are summary lines like "total 23" which
81         are emitted by some FTP servers.
82
83         If the line should be used to extract stat data from it,
84         return a false value.
85         """
86         # either a `_SRE_Match` instance or `None`
87         match = self._total_regex.search(line)
88         return bool(match)
89
90     def parse_line(self, line, time_shift=0.0):
91         """
92         Return a `StatResult` object as derived from the string
93         `line`. The parser code to use depends on the directory format
94         the FTP server delivers (also see examples at end of file).
95
96         If the given text line can't be parsed, raise a `ParserError`.
97
98         For the definition of `time_shift` see the docstring of
99         `FTPHost.set_time_shift` in `ftputil.py`. Not all parsers
100         use the `time_shift` parameter.
101         """
102         raise NotImplementedError("must be defined by subclass")
103
104     #
105     # helper methods for parts of a directory listing line
106     #
107     def parse_unix_mode(self, mode_string):
108         """
109         Return an integer from the `mode_string`, compatible with
110         the `st_mode` value in stat results. Such a mode string
111         may look like "drwxr-xr-x".
112
113         If the mode string can't be parsed, raise an
114         `ftp_error.ParserError`.
115         """
116         st_mode = 0
117         if len(mode_string) != 10:
118             raise ftp_error.ParserError("invalid mode string '%s'" %
119                                         mode_string)
120         for bit in mode_string[1:10]:
121             bit = (bit != '-')
122             st_mode = (st_mode << 1) + bit
123         if mode_string[3] == 's':
124             st_mode = st_mode | stat.S_ISUID
125         if mode_string[6] == 's':
126             st_mode = st_mode | stat.S_ISGID
127         file_type_to_mode = {'d': stat.S_IFDIR, 'l': stat.S_IFLNK,
128                              'c': stat.S_IFCHR, '-': stat.S_IFREG}
129         file_type = mode_string[0]
130         if file_type in file_type_to_mode:
131             st_mode = st_mode | file_type_to_mode[file_type]
132         else:
133             raise ftp_error.ParserError(
134                   "unknown file type character '%s'" % file_type)
135         return st_mode
136
137     def parse_unix_time(self, month_abbreviation, day, year_or_time,
138                         time_shift):
139         """
140         Return a floating point number, like from `time.mktime`, by
141         parsing the string arguments `month_abbreviation`, `day` and
142         `year_or_time`. The parameter `time_shift` is the difference
143         "time on server" - "time on client" and is available as the
144         `time_shift` parameter in the `parse_line` interface.
145
146         Times in Unix-style directory listings typically have one of
147         these formats:
148
149         - "Nov 23 02:33" (month name, day of month, time)
150
151         - "May 26  2005" (month name, day of month, year)
152
153         If this method can not make sense of the given arguments, it
154         raises an `ftp_error.ParserError`.
155         """
156         try:
157             month = self._month_numbers[month_abbreviation.lower()]
158         except KeyError:
159             raise ftp_error.ParserError("invalid month name '%s'" % month)
160         day = int(day)
161         if ":" not in year_or_time:
162             # `year_or_time` is really a year
163             year, hour, minute = int(year_or_time), 0, 0
164             st_mtime = time.mktime( (year, month, day,
165                                      hour, minute, 0, 0, 0, -1) )
166         else:
167             # `year_or_time` is a time hh:mm
168             hour, minute = year_or_time.split(':')
169             year, hour, minute = None, int(hour), int(minute)
170             # try the current year
171             year = time.localtime()[0]
172             st_mtime = time.mktime( (year, month, day,
173                                      hour, minute, 0, 0, 0, -1) )
174             # rhs of comparison: transform client time to server time
175             #  (as on the lhs), so both can be compared with respect
176             #  to the set time shift (see the definition of the time
177             #  shift in `FTPHost.set_time_shift`'s docstring); the
178             #  last addend allows for small deviations between the
179             #  supposed (rounded) and the actual time shift
180             # #XXX the downside of this "correction" is that there is
181             #  a one-minute time interval exactly one year ago that
182             #  may cause that datetime to be recognized as the current
183             #  datetime, but after all the datetime from the server
184             #  can only be exact up to a minute
185             if st_mtime > time.time() + time_shift + 60.0:
186                 # if it's in the future, use previous year
187                 st_mtime = time.mktime( (year-1, month, day,
188                                          hour, minute, 0, 0, 0, -1) )
189         return st_mtime
190
191     def parse_ms_time(self, date, time_, time_shift):
192         """
193         Return a floating point number, like from `time.mktime`, by
194         parsing the string arguments `date` and `time_`. The parameter
195         `time_shift` is the difference
196         "time on server" - "time on client"
197         and is available as the `time_shift` parameter in the
198         `parse_line` interface.
199
200         Times in MS-style directory listings typically have the
201         format "10-23-01 03:25PM" (month-day_of_month-two_digit_year,
202         hour:minute, am/pm).
203
204         If this method can not make sense of the given arguments, it
205         raises an `ftp_error.ParserError`.
206         """
207         try:
208             month, day, year = map(int, date.split('-'))
209             if year >= 70:
210                 year = 1900 + year
211             else:
212                 year = 2000 + year
213             hour, minute, am_pm = time_[0:2], time_[3:5], time_[5]
214             hour, minute = int(hour), int(minute)
215         except (ValueError, IndexError):
216             raise ftp_error.ParserError("invalid time string '%s'" % time_)
217         if am_pm == 'P':
218             hour = hour + 12
219         st_mtime = time.mktime( (year, month, day,
220                                  hour, minute, 0, 0, 0, -1) )
221         return st_mtime
222
223
224 class UnixParser(Parser):
225     """`Parser` class for Unix-specific directory format."""
226     def _split_line(self, line):
227         """
228         Split a line in metadata, nlink, user, group, size, month,
229         day, year_or_time and name and return the result as an
230         nine-element list of these values.
231         """
232         # This method encapsulates the recognition of an unusual
233         #  Unix format variant (see ticket
234         #  http://ftputil.sschwarzer.net/trac/ticket/12 )
235         parts = line.split(None, 8)
236         if len(parts) == 9:
237             if parts[-1].startswith("-> "):
238                 # for the alternative format, the last part will not be
239                 #  "link_name -> link_target" but "-> link_target" and the
240                 #  link name will be in the previous field;
241                 # this heuristic will fail for names starting with "-> "
242                 #  which should be _quite_ rare
243                 # insert `None` for the user field
244                 parts.insert(2, None)
245                 parts[-2] = "%s %s" % tuple(parts[-2:])
246                 del parts[-1]
247             return parts
248         elif len(parts) == 8:
249             # alternative unusual format, insert `None` for the user field
250             parts.insert(2, None)
251             return parts
252         else:
253             # no known Unix-style format
254             raise ftp_error.ParserError("line '%s' can't be parsed" % line)
255
256     def parse_line(self, line, time_shift=0.0):
257         """
258         Return a `StatResult` instance corresponding to the given
259         text line. The `time_shift` value is needed to determine
260         to which year a datetime without an explicit year belongs.
261
262         If the line can't be parsed, raise a `ParserError`.
263         """
264         mode_string, nlink, user, group, size, month, day, \
265           year_or_time, name = self._split_line(line)
266         # st_mode
267         st_mode = self.parse_unix_mode(mode_string)
268         # st_ino, st_dev, st_nlink, st_uid, st_gid, st_size, st_atime
269         st_ino = None
270         st_dev = None
271         st_nlink = int(nlink)
272         st_uid = user
273         st_gid = group
274         st_size = int(size)
275         st_atime = None
276         # st_mtime
277         st_mtime = self.parse_unix_time(month, day, year_or_time, time_shift)
278         # st_ctime
279         st_ctime = None
280         # st_name
281         if " -> " in name:
282             st_name, st_target = name.split(' -> ')
283         else:
284             st_name, st_target = name, None
285         stat_result = StatResult(
286                       (st_mode, st_ino, st_dev, st_nlink, st_uid,
287                        st_gid, st_size, st_atime, st_mtime, st_ctime) )
288         stat_result._st_name = st_name
289         stat_result._st_target = st_target
290         return stat_result
291
292
293 class MSParser(Parser):
294     """`Parser` class for MS-specific directory format."""
295     def parse_line(self, line, time_shift=0.0):
296         """
297         Return a `StatResult` instance corresponding to the given
298         text line from a FTP server which emits "Microsoft format"
299         (see end of file).
300
301         If the line can't be parsed, raise a `ParserError`.
302
303         The parameter `time_shift` isn't used in this method but is
304         listed for compatibilty with the base class.
305         """
306         try:
307             date, time_, dir_or_size, name = line.split(None, 3)
308         except ValueError:
309             # "unpack list of wrong size"
310             raise ftp_error.ParserError("line '%s' can't be parsed" % line )
311         # st_mode
312         #  default to read access only; in fact, we can't tell
313         st_mode = 0400
314         if dir_or_size == "<DIR>":
315             st_mode = st_mode | stat.S_IFDIR
316         else:
317             st_mode = st_mode | stat.S_IFREG
318         # st_ino, st_dev, st_nlink, st_uid, st_gid
319         st_ino = None
320         st_dev = None
321         st_nlink = None
322         st_uid = None
323         st_gid = None
324         # st_size
325         if dir_or_size != "<DIR>":
326             try:
327                 st_size = int(dir_or_size)
328             except ValueError:
329                 raise ftp_error.ParserError("invalid size %s" % dir_or_size)
330         else:
331             st_size = None
332         # st_atime
333         st_atime = None
334         # st_mtime
335         st_mtime = self.parse_ms_time(date, time_, time_shift)
336         # st_ctime
337         st_ctime = None
338         stat_result = StatResult(
339                       (st_mode, st_ino, st_dev, st_nlink, st_uid,
340                        st_gid, st_size, st_atime, st_mtime, st_ctime) )
341         # _st_name and _st_target
342         stat_result._st_name = name
343         stat_result._st_target = None
344         return stat_result
345
346 #
347 # Stat'ing operations for files on an FTP server
348 #
349 class _Stat(object):
350     """Methods for stat'ing directories, links and regular files."""
351     def __init__(self, host):
352         self._host = host
353         self._path = host.path
354         # use the Unix directory parser by default
355         self._parser = UnixParser()
356         # allow one chance to switch to another parser if the default
357         #  doesn't work
358         self._allow_parser_switching = True
359         # cache only lstat results; `stat` works locally on `lstat` results
360         self._lstat_cache = ftp_stat_cache.StatCache()
361
362     def _host_dir(self, path):
363         """
364         Return a list of lines, as fetched by FTP's `DIR` command,
365         when applied to `path`.
366         """
367         return self._host._dir(path)
368
369     def _real_listdir(self, path):
370         """
371         Return a list of directories, files etc. in the directory
372         named `path`.
373
374         If the directory listing from the server can't be parsed
375         raise a `ParserError`.
376         """
377         # we _can't_ put this check into `FTPHost._dir`; see its docstring
378         path = self._path.abspath(path)
379         if not self._path.isdir(path):
380             raise ftp_error.PermanentError(
381                   "550 %s: no such directory or wrong directory parser used" %
382                   path)
383         # set up for loop
384         lines = self._host_dir(path)
385         # exit the method now if there aren't any files
386         if lines == ['']:
387             return []
388         names = []
389         for line in lines:
390             if self._parser.ignores_line(line):
391                 continue
392             # for `listdir`, we are interested in just the names,
393             #  but we use the `time_shift` parameter to have the
394             #  correct timestamp values in the cache
395             stat_result = self._parser.parse_line(line,
396                                                   self._host.time_shift())
397             loop_path = self._path.join(path, stat_result._st_name)
398             self._lstat_cache[loop_path] = stat_result
399             st_name = stat_result._st_name
400             if st_name not in (self._host.curdir, self._host.pardir):
401                 names.append(st_name)
402         return names
403
404     def _real_lstat(self, path, _exception_for_missing_path=True):
405         """
406         Return an object similar to that returned by `os.lstat`.
407
408         If the directory listing from the server can't be parsed,
409         raise a `ParserError`. If the directory can be parsed and the
410         `path` is not found, raise a `PermanentError`. That means that
411         if the directory containing `path` can't be parsed we get a
412         `ParserError`, independent on the presence of `path` on the
413         server.
414
415         (`_exception_for_missing_path` is an implementation aid and
416         _not_ intended for use by ftputil clients.)
417         """
418         path = self._path.abspath(path)
419         # if the path is in the cache, return the lstat result
420         if path in self._lstat_cache:
421             return self._lstat_cache[path]
422         # get output from FTP's `DIR` command
423         lines = []
424         # Note: (l)stat works by going one directory up and parsing
425         #  the output of an FTP `DIR` command. Unfortunately, it is
426         #  not possible to do this for the root directory `/`.
427         if path == '/':
428             raise ftp_error.RootDirError(
429                   "can't stat remote root directory")
430         dirname, basename = self._path.split(path)
431         lstat_result_for_path = None
432         # loop through all lines of the directory listing; we
433         #  probably won't need all lines for the particular path but
434         #  we want to collect as many stat results in the cache as
435         #  possible
436         lines = self._host_dir(dirname)
437         for line in lines:
438             if self._parser.ignores_line(line):
439                 continue
440             stat_result = self._parser.parse_line(line,
441                           self._host.time_shift())
442             loop_path = self._path.join(dirname, stat_result._st_name)
443             self._lstat_cache[loop_path] = stat_result
444             # needed to work without cache or with disabled cache
445             if stat_result._st_name == basename:
446                 lstat_result_for_path = stat_result
447         if lstat_result_for_path:
448             return lstat_result_for_path
449         # path was not found
450         if _exception_for_missing_path:
451             raise ftp_error.PermanentError(
452                   "550 %s: no such file or directory" % path)
453         else:
454             # be explicit; returning `None` is a signal for
455             #  `_Path.exists/isfile/isdir/islink` that the path was
456             #  not found; if we would raise an exception, there would
457             #  be no distinction between a missing path or a more
458             #  severe error in the code above
459             return None
460
461     def _real_stat(self, path, _exception_for_missing_path=True):
462         """
463         Return info from a "stat" call on `path`.
464
465         If the directory containing `path` can't be parsed, raise
466         a `ParserError`. If the listing can be parsed but the
467         `path` can't be found, raise a `PermanentError`. Also raise
468         a `PermanentError` if there's an endless (cyclic) chain of
469         symbolic links "behind" the `path`.
470
471         (`_exception_for_missing_path` is an implementation aid and
472         _not_ intended for use by ftputil clients.)
473         """
474         # save for error message
475         original_path = path
476         # most code in this method is used to detect recursive
477         #  link structures
478         visited_paths = {}
479         while True:
480             # stat the link if it is one, else the file/directory
481             lstat_result = self._real_lstat(path, _exception_for_missing_path)
482             if lstat_result is None:
483                 return None
484             # if the file is not a link, the `stat` result is the
485             #  same as the `lstat` result
486             if not stat.S_ISLNK(lstat_result.st_mode):
487                 return lstat_result
488             # if we stat'ed a link, calculate a normalized path for
489             #  the file the link points to
490             dirname, basename = self._path.split(path)
491             path = self._path.join(dirname, lstat_result._st_target)
492             path = self._path.normpath(path)
493             # check for cyclic structure
494             if path in visited_paths:
495                 # we had this path already
496                 raise ftp_error.PermanentError(
497                       "recursive link structure detected for remote path '%s'" %
498                       original_path)
499             # remember the path we have encountered
500             visited_paths[path] = True
501
502     def __call_with_parser_retry(self, method, *args, **kwargs):
503         """
504         Call `method` with the `args` and `kwargs` once. If that
505         results in a `ParserError` and only one parser has been
506         used yet, try the other parser. If that still fails,
507         propagate the `ParserError`.
508         """
509         # Do _not_ set `_allow_parser_switching` in a `finally` clause!
510         #  This would cause a `PermanentError` due to a not-found
511         #  file in an empty directory to finally establish the
512         #  parser - which is wrong.
513         try:
514             result = method(*args, **kwargs)
515             # if a `listdir` call didn't find anything, we can't
516             #  say anything about the usefulness of the parser
517             if (method is not self._real_listdir) and result:
518                 self._allow_parser_switching = False
519             return result
520         except ftp_error.ParserError:
521             if self._allow_parser_switching:
522                 self._allow_parser_switching = False
523                 self._parser = MSParser()
524                 return method(*args, **kwargs)
525             else:
526                 raise
527
528     def listdir(self, path):
529         return self.__call_with_parser_retry(self._real_listdir, path)
530
531     def lstat(self, path, _exception_for_missing_path=True):
532         return self.__call_with_parser_retry(self._real_lstat, path,
533                                              _exception_for_missing_path)
534
535     def stat(self, path, _exception_for_missing_path=True):
536         return self.__call_with_parser_retry(self._real_stat, path,
537                                              _exception_for_missing_path)
538
Note: See TracBrowser for help on using the browser.