source: ftp_stat.py @ 803:52cb5a25265a

Last change on this file since 803:52cb5a25265a was 803:52cb5a25265a, checked in by Stefan Schwarzer <sschwarzer@…>, 12 years ago
Removed #TODO comment.
File size: 23.1 KB
Line 
1# Copyright (C) 2002-2008, Stefan Schwarzer
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# - Redistributions of source code must retain the above copyright
9#   notice, this list of conditions and the following disclaimer.
10#
11# - Redistributions in binary form must reproduce the above copyright
12#   notice, this list of conditions and the following disclaimer in the
13#   documentation and/or other materials provided with the distribution.
14#
15# - Neither the name of the above author nor the names of the
16#   contributors to the software may be used to endorse or promote
17#   products derived from this software without specific prior written
18#   permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
24# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32"""
33ftp_stat.py - stat result, parsers, and FTP stat'ing for `ftputil`
34"""
35
36# $Id$
37
38import re
39import stat
40import time
41
42import ftp_error
43import ftp_stat_cache
44
45
46class StatResult(tuple):
47    """
48    Support class resembling a tuple like that returned from
49    `os.(l)stat`.
50    """
51
52    _index_mapping = {
53      'st_mode':  0, 'st_ino':   1, 'st_dev':    2, 'st_nlink':    3,
54      'st_uid':   4, 'st_gid':   5, 'st_size':   6, 'st_atime':    7,
55      'st_mtime': 8, 'st_ctime': 9, '_st_name': 10, '_st_target': 11}
56
57    def __init__(self, sequence):
58        # Don't call `__init__` via `super`. Construction from a
59        #  sequence is implicitly handled by `tuple.__new__`, not
60        #  `tuple.__init__`. As a by-product, this avoids a
61        #  `DeprecationWarning` in Python 2.6+
62        # these may be overwritten in a `Parser.parse_line` method
63        self._st_name = ""
64        self._st_target = None
65
66    def __getattr__(self, attr_name):
67        if self._index_mapping.has_key(attr_name):
68            return self[self._index_mapping[attr_name]]
69        else:
70            raise AttributeError("'StatResult' object has no attribute '%s'" %
71                                 attr_name)
72
73#
74# FTP directory parsers
75#
76class Parser(object):
77    """
78    Represent a parser for directory lines. Parsers for specific
79    directory formats inherit from this class.
80    """
81
82    # map month abbreviations to month numbers
83    _month_numbers = {
84      'jan':  1, 'feb':  2, 'mar':  3, 'apr':  4,
85      'may':  5, 'jun':  6, 'jul':  7, 'aug':  8,
86      'sep':  9, 'oct': 10, 'nov': 11, 'dec': 12}
87
88    _total_regex = re.compile(r"^total\s+\d+")
89
90    def ignores_line(self, line):
91        """
92        Return a true value if the line should be ignored, i. e. is
93        assumed to _not_ contain actual directory/file/link data.
94        A typical example are summary lines like "total 23" which
95        are emitted by some FTP servers.
96
97        If the line should be used to extract stat data from it,
98        return a false value.
99        """
100        # either a `_SRE_Match` instance or `None`
101        match = self._total_regex.search(line)
102        return bool(match)
103
104    def parse_line(self, line, time_shift=0.0):
105        """
106        Return a `StatResult` object as derived from the string
107        `line`. The parser code to use depends on the directory format
108        the FTP server delivers (also see examples at end of file).
109
110        If the given text line can't be parsed, raise a `ParserError`.
111
112        For the definition of `time_shift` see the docstring of
113        `FTPHost.set_time_shift` in `ftputil.py`. Not all parsers
114        use the `time_shift` parameter.
115        """
116        raise NotImplementedError("must be defined by subclass")
117
118    #
119    # helper methods for parts of a directory listing line
120    #
121    def parse_unix_mode(self, mode_string):
122        """
123        Return an integer from the `mode_string`, compatible with
124        the `st_mode` value in stat results. Such a mode string
125        may look like "drwxr-xr-x".
126
127        If the mode string can't be parsed, raise an
128        `ftp_error.ParserError`.
129        """
130        st_mode = 0
131        if len(mode_string) != 10:
132            raise ftp_error.ParserError("invalid mode string '%s'" %
133                                        mode_string)
134        for bit in mode_string[1:10]:
135            bit = (bit != '-')
136            st_mode = (st_mode << 1) + bit
137        if mode_string[3] == 's':
138            st_mode = st_mode | stat.S_ISUID
139        if mode_string[6] == 's':
140            st_mode = st_mode | stat.S_ISGID
141        file_type_to_mode = {'d': stat.S_IFDIR, 'l': stat.S_IFLNK,
142                             'c': stat.S_IFCHR, '-': stat.S_IFREG}
143        file_type = mode_string[0]
144        if file_type in file_type_to_mode:
145            st_mode = st_mode | file_type_to_mode[file_type]
146        else:
147            raise ftp_error.ParserError(
148                  "unknown file type character '%s'" % file_type)
149        return st_mode
150
151    def parse_unix_time(self, month_abbreviation, day, year_or_time,
152                        time_shift):
153        """
154        Return a floating point number, like from `time.mktime`, by
155        parsing the string arguments `month_abbreviation`, `day` and
156        `year_or_time`. The parameter `time_shift` is the difference
157        "time on server" - "time on client" and is available as the
158        `time_shift` parameter in the `parse_line` interface.
159
160        Times in Unix-style directory listings typically have one of
161        these formats:
162
163        - "Nov 23 02:33" (month name, day of month, time)
164
165        - "May 26  2005" (month name, day of month, year)
166
167        If this method can not make sense of the given arguments, it
168        raises an `ftp_error.ParserError`.
169        """
170        try:
171            month = self._month_numbers[month_abbreviation.lower()]
172        except KeyError:
173            raise ftp_error.ParserError("invalid month name '%s'" % month)
174        day = int(day)
175        if ":" not in year_or_time:
176            # `year_or_time` is really a year
177            year, hour, minute = int(year_or_time), 0, 0
178            st_mtime = time.mktime( (year, month, day,
179                                     hour, minute, 0, 0, 0, -1) )
180        else:
181            # `year_or_time` is a time hh:mm
182            hour, minute = year_or_time.split(':')
183            year, hour, minute = None, int(hour), int(minute)
184            # try the current year
185            year = time.localtime()[0]
186            st_mtime = time.mktime( (year, month, day,
187                                     hour, minute, 0, 0, 0, -1) )
188            # rhs of comparison: transform client time to server time
189            #  (as on the lhs), so both can be compared with respect
190            #  to the set time shift (see the definition of the time
191            #  shift in `FTPHost.set_time_shift`'s docstring); the
192            #  last addend allows for small deviations between the
193            #  supposed (rounded) and the actual time shift
194            # #XXX the downside of this "correction" is that there is
195            #  a one-minute time interval exactly one year ago that
196            #  may cause that datetime to be recognized as the current
197            #  datetime, but after all the datetime from the server
198            #  can only be exact up to a minute
199            if st_mtime > time.time() + time_shift + 60.0:
200                # if it's in the future, use previous year
201                st_mtime = time.mktime( (year-1, month, day,
202                                         hour, minute, 0, 0, 0, -1) )
203        return st_mtime
204
205    def parse_ms_time(self, date, time_, time_shift):
206        """
207        Return a floating point number, like from `time.mktime`, by
208        parsing the string arguments `date` and `time_`. The parameter
209        `time_shift` is the difference
210
211            "time on server" - "time on client"
212
213        and can be set as the `time_shift` parameter in the
214        `parse_line` interface.
215
216        Times in MS-style directory listings typically have the
217        format "10-23-01 03:25PM" (month-day_of_month-two_digit_year,
218        hour:minute, am/pm).
219
220        If this method can not make sense of the given arguments, it
221        raises an `ftp_error.ParserError`.
222        """
223        # don't complain about unused `time_shift` argument
224        # pylint: disable-msg=W0613
225        try:
226            month, day, year = [int(part) for part in date.split('-')]
227            if year >= 70:
228                year = 1900 + year
229            else:
230                year = 2000 + year
231            hour, minute, am_pm = time_[0:2], time_[3:5], time_[5]
232            hour, minute = int(hour), int(minute)
233        except (ValueError, IndexError):
234            raise ftp_error.ParserError("invalid time string '%s'" % time_)
235        if am_pm == 'A' and hour == 12:
236            hour = 0
237        if am_pm == 'P' and hour != 12:
238            hour = hour + 12
239        st_mtime = time.mktime( (year, month, day,
240                                 hour, minute, 0, 0, 0, -1) )
241        return st_mtime
242
243
244class UnixParser(Parser):
245    """`Parser` class for Unix-specific directory format."""
246
247    def _split_line(self, line):
248        """
249        Split a line in metadata, nlink, user, group, size, month,
250        day, year_or_time and name and return the result as an
251        nine-element list of these values.
252        """
253        # This method encapsulates the recognition of an unusual
254        #  Unix format variant (see ticket
255        #  http://ftputil.sschwarzer.net/trac/ticket/12 )
256        parts = line.split(None, 8)
257        if len(parts) == 9:
258            if parts[-1].startswith("-> "):
259                # for the alternative format, the last part will not be
260                #  "link_name -> link_target" but "-> link_target" and the
261                #  link name will be in the previous field;
262                # this heuristic will fail for names starting with "-> "
263                #  which should be _quite_ rare
264                # insert `None` for the user field
265                parts.insert(2, None)
266                parts[-2] = "%s %s" % tuple(parts[-2:])
267                del parts[-1]
268            return parts
269        elif len(parts) == 8:
270            # alternative unusual format, insert `None` for the user field
271            parts.insert(2, None)
272            return parts
273        else:
274            # no known Unix-style format
275            raise ftp_error.ParserError("line '%s' can't be parsed" % line)
276
277    def parse_line(self, line, time_shift=0.0):
278        """
279        Return a `StatResult` instance corresponding to the given
280        text line. The `time_shift` value is needed to determine
281        to which year a datetime without an explicit year belongs.
282
283        If the line can't be parsed, raise a `ParserError`.
284        """
285        mode_string, nlink, user, group, size, month, day, \
286          year_or_time, name = self._split_line(line)
287        # st_mode
288        st_mode = self.parse_unix_mode(mode_string)
289        # st_ino, st_dev, st_nlink, st_uid, st_gid, st_size, st_atime
290        st_ino = None
291        st_dev = None
292        st_nlink = int(nlink)
293        st_uid = user
294        st_gid = group
295        st_size = int(size)
296        st_atime = None
297        # st_mtime
298        st_mtime = self.parse_unix_time(month, day, year_or_time, time_shift)
299        # st_ctime
300        st_ctime = None
301        # st_name
302        if " -> " in name:
303            st_name, st_target = name.split(' -> ')
304        else:
305            st_name, st_target = name, None
306        stat_result = StatResult(
307                      (st_mode, st_ino, st_dev, st_nlink, st_uid,
308                       st_gid, st_size, st_atime, st_mtime, st_ctime) )
309        stat_result._st_name = st_name
310        stat_result._st_target = st_target
311        return stat_result
312
313
314class MSParser(Parser):
315    """`Parser` class for MS-specific directory format."""
316
317    def parse_line(self, line, time_shift=0.0):
318        """
319        Return a `StatResult` instance corresponding to the given
320        text line from a FTP server which emits "Microsoft format"
321        (see end of file).
322
323        If the line can't be parsed, raise a `ParserError`.
324
325        The parameter `time_shift` isn't used in this method but is
326        listed for compatibilty with the base class.
327        """
328        try:
329            date, time_, dir_or_size, name = line.split(None, 3)
330        except ValueError:
331            # "unpack list of wrong size"
332            raise ftp_error.ParserError("line '%s' can't be parsed" % line )
333        # st_mode
334        #  default to read access only; in fact, we can't tell
335        st_mode = 0400
336        if dir_or_size == "<DIR>":
337            st_mode = st_mode | stat.S_IFDIR
338        else:
339            st_mode = st_mode | stat.S_IFREG
340        # st_ino, st_dev, st_nlink, st_uid, st_gid
341        st_ino = None
342        st_dev = None
343        st_nlink = None
344        st_uid = None
345        st_gid = None
346        # st_size
347        if dir_or_size != "<DIR>":
348            try:
349                st_size = int(dir_or_size)
350            except ValueError:
351                raise ftp_error.ParserError("invalid size %s" % dir_or_size)
352        else:
353            st_size = None
354        # st_atime
355        st_atime = None
356        # st_mtime
357        st_mtime = self.parse_ms_time(date, time_, time_shift)
358        # st_ctime
359        st_ctime = None
360        stat_result = StatResult(
361                      (st_mode, st_ino, st_dev, st_nlink, st_uid,
362                       st_gid, st_size, st_atime, st_mtime, st_ctime) )
363        # _st_name and _st_target
364        stat_result._st_name = name
365        stat_result._st_target = None
366        return stat_result
367
368#
369# Stat'ing operations for files on an FTP server
370#
371class _Stat(object):
372    """Methods for stat'ing directories, links and regular files."""
373
374    def __init__(self, host):
375        self._host = host
376        self._path = host.path
377        # use the Unix directory parser by default
378        self._parser = UnixParser()
379        # allow one chance to switch to another parser if the default
380        #  doesn't work
381        self._allow_parser_switching = True
382        # cache only lstat results; `stat` works locally on `lstat` results
383        self._lstat_cache = ftp_stat_cache.StatCache()
384
385    def _host_dir(self, path):
386        """
387        Return a list of lines, as fetched by FTP's `DIR` command,
388        when applied to `path`.
389        """
390        return self._host._dir(path)
391
392    def _real_listdir(self, path):
393        """
394        Return a list of directories, files etc. in the directory
395        named `path`.
396
397        If the directory listing from the server can't be parsed
398        raise a `ParserError`.
399        """
400        # we _can't_ put this check into `FTPHost._dir`; see its docstring
401        path = self._path.abspath(path)
402        # `listdir` should only be allowed for directories and links to them
403        if not self._path.isdir(path):
404            raise ftp_error.PermanentError(
405                  "550 %s: no such directory or wrong directory parser used" %
406                  path)
407        # set up for loop
408        lines = self._host_dir(path)
409        # exit the method now if there aren't any files
410        if lines == ['']:
411            return []
412        names = []
413        for line in lines:
414            if self._parser.ignores_line(line):
415                continue
416            # for `listdir`, we are interested in just the names,
417            #  but we use the `time_shift` parameter to have the
418            #  correct timestamp values in the cache
419            stat_result = self._parser.parse_line(line,
420                                                  self._host.time_shift())
421            loop_path = self._path.join(path, stat_result._st_name)
422            self._lstat_cache[loop_path] = stat_result
423            st_name = stat_result._st_name
424            if st_name not in (self._host.curdir, self._host.pardir):
425                names.append(st_name)
426        return names
427
428    def _real_lstat(self, path, _exception_for_missing_path=True):
429        """
430        Return an object similar to that returned by `os.lstat`.
431
432        If the directory listing from the server can't be parsed,
433        raise a `ParserError`. If the directory can be parsed and the
434        `path` is not found, raise a `PermanentError`. That means that
435        if the directory containing `path` can't be parsed we get a
436        `ParserError`, independent on the presence of `path` on the
437        server.
438
439        (`_exception_for_missing_path` is an implementation aid and
440        _not_ intended for use by ftputil clients.)
441        """
442        path = self._path.abspath(path)
443        # if the path is in the cache, return the lstat result
444        if path in self._lstat_cache:
445            return self._lstat_cache[path]
446        # get output from FTP's `DIR` command
447        lines = []
448        # Note: (l)stat works by going one directory up and parsing
449        #  the output of an FTP `DIR` command. Unfortunately, it is
450        #  not possible to do this for the root directory `/`.
451        if path == '/':
452            raise ftp_error.RootDirError(
453                  "can't stat remote root directory")
454        dirname, basename = self._path.split(path)
455        lstat_result_for_path = None
456        # loop through all lines of the directory listing; we
457        #  probably won't need all lines for the particular path but
458        #  we want to collect as many stat results in the cache as
459        #  possible
460        lines = self._host_dir(dirname)
461        for line in lines:
462            if self._parser.ignores_line(line):
463                continue
464            stat_result = self._parser.parse_line(line,
465                          self._host.time_shift())
466            loop_path = self._path.join(dirname, stat_result._st_name)
467            self._lstat_cache[loop_path] = stat_result
468            # needed to work without cache or with disabled cache
469            if stat_result._st_name == basename:
470                lstat_result_for_path = stat_result
471        if lstat_result_for_path:
472            return lstat_result_for_path
473        # path was not found
474        if _exception_for_missing_path:
475            #TODO use FTP DIR command on the file to implicitly use
476            #  the usual status code of the server for missing files
477            #  (450 vs. 550)
478            raise ftp_error.PermanentError(
479                  "550 %s: no such file or directory" % path)
480        else:
481            # be explicit; returning `None` is a signal for
482            #  `_Path.exists/isfile/isdir/islink` that the path was
483            #  not found; if we would raise an exception, there would
484            #  be no distinction between a missing path or a more
485            #  severe error in the code above
486            return None
487
488    def _real_stat(self, path, _exception_for_missing_path=True):
489        """
490        Return info from a "stat" call on `path`.
491
492        If the directory containing `path` can't be parsed, raise
493        a `ParserError`. If the listing can be parsed but the
494        `path` can't be found, raise a `PermanentError`. Also raise
495        a `PermanentError` if there's an endless (cyclic) chain of
496        symbolic links "behind" the `path`.
497
498        (`_exception_for_missing_path` is an implementation aid and
499        _not_ intended for use by ftputil clients.)
500        """
501        # save for error message
502        original_path = path
503        # most code in this method is used to detect recursive
504        #  link structures
505        visited_paths = {}
506        while True:
507            # stat the link if it is one, else the file/directory
508            lstat_result = self._real_lstat(path, _exception_for_missing_path)
509            if lstat_result is None:
510                return None
511            # if the file is not a link, the `stat` result is the
512            #  same as the `lstat` result
513            if not stat.S_ISLNK(lstat_result.st_mode):
514                return lstat_result
515            # if we stat'ed a link, calculate a normalized path for
516            #  the file the link points to
517            # we don't use `basename`
518            # pylint: disable-msg=W0612
519            dirname, basename = self._path.split(path)
520            path = self._path.join(dirname, lstat_result._st_target)
521            path = self._path.normpath(path)
522            # check for cyclic structure
523            if path in visited_paths:
524                # we had this path already
525                raise ftp_error.PermanentError(
526                      "recursive link structure detected for remote path '%s'" %
527                      original_path)
528            # remember the path we have encountered
529            visited_paths[path] = True
530
531    def __call_with_parser_retry(self, method, *args, **kwargs):
532        """
533        Call `method` with the `args` and `kwargs` once. If that
534        results in a `ParserError` and only one parser has been
535        used yet, try the other parser. If that still fails,
536        propagate the `ParserError`.
537        """
538        # Do _not_ set `_allow_parser_switching` in a `finally` clause!
539        #  This would cause a `PermanentError` due to a not-found
540        #  file in an empty directory to finally establish the
541        #  parser - which is wrong.
542        try:
543            result = method(*args, **kwargs)
544            # if a `listdir` call didn't find anything, we can't
545            #  say anything about the usefulness of the parser
546            if (method is not self._real_listdir) and result:
547                self._allow_parser_switching = False
548            return result
549        except ftp_error.ParserError:
550            if self._allow_parser_switching:
551                self._allow_parser_switching = False
552                self._parser = MSParser()
553                return method(*args, **kwargs)
554            else:
555                raise
556
557    def listdir(self, path):
558        """
559        Return a list of items in `path`.
560       
561        Raise a `PermanentError` if the path doesn't exist, but
562        maybe raise other exceptions depending on the state of
563        the server (e. g. timeout).
564        """
565        return self.__call_with_parser_retry(self._real_listdir, path)
566
567    def lstat(self, path, _exception_for_missing_path=True):
568        """
569        Return a `StatResult` without following links.
570
571        Raise a `PermanentError` if the path doesn't exist, but
572        maybe raise other exceptions depending on the state of
573        the server (e. g. timeout).
574        """
575        return self.__call_with_parser_retry(self._real_lstat, path,
576                                             _exception_for_missing_path)
577
578    def stat(self, path, _exception_for_missing_path=True):
579        """
580        Return a `StatResult` with following links.
581
582        Raise a `PermanentError` if the path doesn't exist, but
583        maybe raise other exceptions depending on the state of
584        the server (e. g. timeout).
585        """
586        return self.__call_with_parser_retry(self._real_stat, path,
587                                             _exception_for_missing_path)
588
Note: See TracBrowser for help on using the repository browser.