source: ftp_stat.py @ 798:ff1b73253239

Last change on this file since 798:ff1b73253239 was 798:ff1b73253239, checked in by Stefan Schwarzer <sschwarzer@…>, 12 years ago
Fix handling of 12 AM and 12 PM times in the MS format parser.
File size: 23.2 KB
Line 
1# Copyright (C) 2002-2008, Stefan Schwarzer
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8# - Redistributions of source code must retain the above copyright
9#   notice, this list of conditions and the following disclaimer.
10#
11# - Redistributions in binary form must reproduce the above copyright
12#   notice, this list of conditions and the following disclaimer in the
13#   documentation and/or other materials provided with the distribution.
14#
15# - Neither the name of the above author nor the names of the
16#   contributors to the software may be used to endorse or promote
17#   products derived from this software without specific prior written
18#   permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR
24# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32"""
33ftp_stat.py - stat result, parsers, and FTP stat'ing for `ftputil`
34"""
35
36# $Id$
37
38import re
39import stat
40import time
41
42import ftp_error
43import ftp_stat_cache
44
45
46class StatResult(tuple):
47    """
48    Support class resembling a tuple like that returned from
49    `os.(l)stat`.
50    """
51
52    _index_mapping = {
53      'st_mode':  0, 'st_ino':   1, 'st_dev':    2, 'st_nlink':    3,
54      'st_uid':   4, 'st_gid':   5, 'st_size':   6, 'st_atime':    7,
55      'st_mtime': 8, 'st_ctime': 9, '_st_name': 10, '_st_target': 11}
56
57    def __init__(self, sequence):
58        # Don't call `__init__` via `super`. Construction from a
59        #  sequence is implicitly handled by `tuple.__new__`, not
60        #  `tuple.__init__`. As a by-product, this avoids a
61        #  `DeprecationWarning` in Python 2.6+
62        # these may be overwritten in a `Parser.parse_line` method
63        self._st_name = ""
64        self._st_target = None
65
66    def __getattr__(self, attr_name):
67        if self._index_mapping.has_key(attr_name):
68            return self[self._index_mapping[attr_name]]
69        else:
70            raise AttributeError("'StatResult' object has no attribute '%s'" %
71                                 attr_name)
72
73#
74# FTP directory parsers
75#
76class Parser(object):
77    """
78    Represent a parser for directory lines. Parsers for specific
79    directory formats inherit from this class.
80    """
81
82    # map month abbreviations to month numbers
83    _month_numbers = {
84      'jan':  1, 'feb':  2, 'mar':  3, 'apr':  4,
85      'may':  5, 'jun':  6, 'jul':  7, 'aug':  8,
86      'sep':  9, 'oct': 10, 'nov': 11, 'dec': 12}
87
88    _total_regex = re.compile(r"^total\s+\d+")
89
90    def ignores_line(self, line):
91        """
92        Return a true value if the line should be ignored, i. e. is
93        assumed to _not_ contain actual directory/file/link data.
94        A typical example are summary lines like "total 23" which
95        are emitted by some FTP servers.
96
97        If the line should be used to extract stat data from it,
98        return a false value.
99        """
100        # either a `_SRE_Match` instance or `None`
101        match = self._total_regex.search(line)
102        return bool(match)
103
104    def parse_line(self, line, time_shift=0.0):
105        """
106        Return a `StatResult` object as derived from the string
107        `line`. The parser code to use depends on the directory format
108        the FTP server delivers (also see examples at end of file).
109
110        If the given text line can't be parsed, raise a `ParserError`.
111
112        For the definition of `time_shift` see the docstring of
113        `FTPHost.set_time_shift` in `ftputil.py`. Not all parsers
114        use the `time_shift` parameter.
115        """
116        raise NotImplementedError("must be defined by subclass")
117
118    #
119    # helper methods for parts of a directory listing line
120    #
121    def parse_unix_mode(self, mode_string):
122        """
123        Return an integer from the `mode_string`, compatible with
124        the `st_mode` value in stat results. Such a mode string
125        may look like "drwxr-xr-x".
126
127        If the mode string can't be parsed, raise an
128        `ftp_error.ParserError`.
129        """
130        st_mode = 0
131        if len(mode_string) != 10:
132            raise ftp_error.ParserError("invalid mode string '%s'" %
133                                        mode_string)
134        for bit in mode_string[1:10]:
135            bit = (bit != '-')
136            st_mode = (st_mode << 1) + bit
137        if mode_string[3] == 's':
138            st_mode = st_mode | stat.S_ISUID
139        if mode_string[6] == 's':
140            st_mode = st_mode | stat.S_ISGID
141        file_type_to_mode = {'d': stat.S_IFDIR, 'l': stat.S_IFLNK,
142                             'c': stat.S_IFCHR, '-': stat.S_IFREG}
143        file_type = mode_string[0]
144        if file_type in file_type_to_mode:
145            st_mode = st_mode | file_type_to_mode[file_type]
146        else:
147            raise ftp_error.ParserError(
148                  "unknown file type character '%s'" % file_type)
149        return st_mode
150
151    def parse_unix_time(self, month_abbreviation, day, year_or_time,
152                        time_shift):
153        """
154        Return a floating point number, like from `time.mktime`, by
155        parsing the string arguments `month_abbreviation`, `day` and
156        `year_or_time`. The parameter `time_shift` is the difference
157        "time on server" - "time on client" and is available as the
158        `time_shift` parameter in the `parse_line` interface.
159
160        Times in Unix-style directory listings typically have one of
161        these formats:
162
163        - "Nov 23 02:33" (month name, day of month, time)
164
165        - "May 26  2005" (month name, day of month, year)
166
167        If this method can not make sense of the given arguments, it
168        raises an `ftp_error.ParserError`.
169        """
170        try:
171            month = self._month_numbers[month_abbreviation.lower()]
172        except KeyError:
173            raise ftp_error.ParserError("invalid month name '%s'" % month)
174        day = int(day)
175        if ":" not in year_or_time:
176            # `year_or_time` is really a year
177            year, hour, minute = int(year_or_time), 0, 0
178            st_mtime = time.mktime( (year, month, day,
179                                     hour, minute, 0, 0, 0, -1) )
180        else:
181            # `year_or_time` is a time hh:mm
182            hour, minute = year_or_time.split(':')
183            year, hour, minute = None, int(hour), int(minute)
184            # try the current year
185            year = time.localtime()[0]
186            st_mtime = time.mktime( (year, month, day,
187                                     hour, minute, 0, 0, 0, -1) )
188            # rhs of comparison: transform client time to server time
189            #  (as on the lhs), so both can be compared with respect
190            #  to the set time shift (see the definition of the time
191            #  shift in `FTPHost.set_time_shift`'s docstring); the
192            #  last addend allows for small deviations between the
193            #  supposed (rounded) and the actual time shift
194            # #XXX the downside of this "correction" is that there is
195            #  a one-minute time interval exactly one year ago that
196            #  may cause that datetime to be recognized as the current
197            #  datetime, but after all the datetime from the server
198            #  can only be exact up to a minute
199            if st_mtime > time.time() + time_shift + 60.0:
200                # if it's in the future, use previous year
201                st_mtime = time.mktime( (year-1, month, day,
202                                         hour, minute, 0, 0, 0, -1) )
203        return st_mtime
204
205    def parse_ms_time(self, date, time_, time_shift):
206        """
207        Return a floating point number, like from `time.mktime`, by
208        parsing the string arguments `date` and `time_`. The parameter
209        `time_shift` is the difference
210
211            "time on server" - "time on client"
212
213        and can be set as the `time_shift` parameter in the
214        `parse_line` interface.
215
216        Times in MS-style directory listings typically have the
217        format "10-23-01 03:25PM" (month-day_of_month-two_digit_year,
218        hour:minute, am/pm).
219
220        If this method can not make sense of the given arguments, it
221        raises an `ftp_error.ParserError`.
222        """
223        # don't complain about unused `time_shift` argument
224        # pylint: disable-msg=W0613
225        try:
226            month, day, year = [int(part) for part in date.split('-')]
227            if year >= 70:
228                year = 1900 + year
229            else:
230                year = 2000 + year
231            hour, minute, am_pm = time_[0:2], time_[3:5], time_[5]
232            hour, minute = int(hour), int(minute)
233        except (ValueError, IndexError):
234            raise ftp_error.ParserError("invalid time string '%s'" % time_)
235        if am_pm == 'A' and hour == 12:
236            hour = 0
237        if am_pm == 'P' and hour != 12:
238            hour = hour + 12
239        st_mtime = time.mktime( (year, month, day,
240                                 hour, minute, 0, 0, 0, -1) )
241        return st_mtime
242
243
244class UnixParser(Parser):
245    """`Parser` class for Unix-specific directory format."""
246
247    def _split_line(self, line):
248        """
249        Split a line in metadata, nlink, user, group, size, month,
250        day, year_or_time and name and return the result as an
251        nine-element list of these values.
252        """
253        # This method encapsulates the recognition of an unusual
254        #  Unix format variant (see ticket
255        #  http://ftputil.sschwarzer.net/trac/ticket/12 )
256        parts = line.split(None, 8)
257        if len(parts) == 9:
258            if parts[-1].startswith("-> "):
259                # for the alternative format, the last part will not be
260                #  "link_name -> link_target" but "-> link_target" and the
261                #  link name will be in the previous field;
262                # this heuristic will fail for names starting with "-> "
263                #  which should be _quite_ rare
264                # insert `None` for the user field
265                parts.insert(2, None)
266                parts[-2] = "%s %s" % tuple(parts[-2:])
267                del parts[-1]
268            return parts
269        elif len(parts) == 8:
270            # alternative unusual format, insert `None` for the user field
271            parts.insert(2, None)
272            return parts
273        else:
274            # no known Unix-style format
275            raise ftp_error.ParserError("line '%s' can't be parsed" % line)
276
277    def parse_line(self, line, time_shift=0.0):
278        """
279        Return a `StatResult` instance corresponding to the given
280        text line. The `time_shift` value is needed to determine
281        to which year a datetime without an explicit year belongs.
282
283        If the line can't be parsed, raise a `ParserError`.
284        """
285        mode_string, nlink, user, group, size, month, day, \
286          year_or_time, name = self._split_line(line)
287        # st_mode
288        st_mode = self.parse_unix_mode(mode_string)
289        # st_ino, st_dev, st_nlink, st_uid, st_gid, st_size, st_atime
290        st_ino = None
291        st_dev = None
292        st_nlink = int(nlink)
293        st_uid = user
294        st_gid = group
295        st_size = int(size)
296        st_atime = None
297        # st_mtime
298        st_mtime = self.parse_unix_time(month, day, year_or_time, time_shift)
299        # st_ctime
300        st_ctime = None
301        # st_name
302        if " -> " in name:
303            st_name, st_target = name.split(' -> ')
304        else:
305            st_name, st_target = name, None
306        stat_result = StatResult(
307                      (st_mode, st_ino, st_dev, st_nlink, st_uid,
308                       st_gid, st_size, st_atime, st_mtime, st_ctime) )
309        stat_result._st_name = st_name
310        stat_result._st_target = st_target
311        return stat_result
312
313
314class MSParser(Parser):
315    """`Parser` class for MS-specific directory format."""
316
317    def parse_line(self, line, time_shift=0.0):
318        """
319        Return a `StatResult` instance corresponding to the given
320        text line from a FTP server which emits "Microsoft format"
321        (see end of file).
322
323        If the line can't be parsed, raise a `ParserError`.
324
325        The parameter `time_shift` isn't used in this method but is
326        listed for compatibilty with the base class.
327        """
328        try:
329            date, time_, dir_or_size, name = line.split(None, 3)
330        except ValueError:
331            # "unpack list of wrong size"
332            raise ftp_error.ParserError("line '%s' can't be parsed" % line )
333        # st_mode
334        #  default to read access only; in fact, we can't tell
335        st_mode = 0400
336        if dir_or_size == "<DIR>":
337            st_mode = st_mode | stat.S_IFDIR
338        else:
339            st_mode = st_mode | stat.S_IFREG
340        # st_ino, st_dev, st_nlink, st_uid, st_gid
341        st_ino = None
342        st_dev = None
343        st_nlink = None
344        st_uid = None
345        st_gid = None
346        # st_size
347        if dir_or_size != "<DIR>":
348            try:
349                st_size = int(dir_or_size)
350            except ValueError:
351                raise ftp_error.ParserError("invalid size %s" % dir_or_size)
352        else:
353            st_size = None
354        # st_atime
355        st_atime = None
356        # st_mtime
357        st_mtime = self.parse_ms_time(date, time_, time_shift)
358        # st_ctime
359        st_ctime = None
360        stat_result = StatResult(
361                      (st_mode, st_ino, st_dev, st_nlink, st_uid,
362                       st_gid, st_size, st_atime, st_mtime, st_ctime) )
363        # _st_name and _st_target
364        stat_result._st_name = name
365        stat_result._st_target = None
366        return stat_result
367
368#
369# Stat'ing operations for files on an FTP server
370#
371class _Stat(object):
372    """Methods for stat'ing directories, links and regular files."""
373
374    def __init__(self, host):
375        self._host = host
376        self._path = host.path
377        # use the Unix directory parser by default
378        self._parser = UnixParser()
379        # allow one chance to switch to another parser if the default
380        #  doesn't work
381        self._allow_parser_switching = True
382        # cache only lstat results; `stat` works locally on `lstat` results
383        self._lstat_cache = ftp_stat_cache.StatCache()
384
385    def _host_dir(self, path):
386        """
387        Return a list of lines, as fetched by FTP's `DIR` command,
388        when applied to `path`.
389        """
390        return self._host._dir(path)
391
392    def _real_listdir(self, path):
393        """
394        Return a list of directories, files etc. in the directory
395        named `path`.
396
397        If the directory listing from the server can't be parsed
398        raise a `ParserError`.
399        """
400        # we _can't_ put this check into `FTPHost._dir`; see its docstring
401        path = self._path.abspath(path)
402        if not self._path.isdir(path):
403            #TODO use FTP DIR command on the file to implicitly use
404            #  the usual status code of the server for missing files
405            #  (450 vs. 550)
406            raise ftp_error.PermanentError(
407                  "550 %s: no such directory or wrong directory parser used" %
408                  path)
409        # set up for loop
410        lines = self._host_dir(path)
411        # exit the method now if there aren't any files
412        if lines == ['']:
413            return []
414        names = []
415        for line in lines:
416            if self._parser.ignores_line(line):
417                continue
418            # for `listdir`, we are interested in just the names,
419            #  but we use the `time_shift` parameter to have the
420            #  correct timestamp values in the cache
421            stat_result = self._parser.parse_line(line,
422                                                  self._host.time_shift())
423            loop_path = self._path.join(path, stat_result._st_name)
424            self._lstat_cache[loop_path] = stat_result
425            st_name = stat_result._st_name
426            if st_name not in (self._host.curdir, self._host.pardir):
427                names.append(st_name)
428        return names
429
430    def _real_lstat(self, path, _exception_for_missing_path=True):
431        """
432        Return an object similar to that returned by `os.lstat`.
433
434        If the directory listing from the server can't be parsed,
435        raise a `ParserError`. If the directory can be parsed and the
436        `path` is not found, raise a `PermanentError`. That means that
437        if the directory containing `path` can't be parsed we get a
438        `ParserError`, independent on the presence of `path` on the
439        server.
440
441        (`_exception_for_missing_path` is an implementation aid and
442        _not_ intended for use by ftputil clients.)
443        """
444        path = self._path.abspath(path)
445        # if the path is in the cache, return the lstat result
446        if path in self._lstat_cache:
447            return self._lstat_cache[path]
448        # get output from FTP's `DIR` command
449        lines = []
450        # Note: (l)stat works by going one directory up and parsing
451        #  the output of an FTP `DIR` command. Unfortunately, it is
452        #  not possible to do this for the root directory `/`.
453        if path == '/':
454            raise ftp_error.RootDirError(
455                  "can't stat remote root directory")
456        dirname, basename = self._path.split(path)
457        lstat_result_for_path = None
458        # loop through all lines of the directory listing; we
459        #  probably won't need all lines for the particular path but
460        #  we want to collect as many stat results in the cache as
461        #  possible
462        lines = self._host_dir(dirname)
463        for line in lines:
464            if self._parser.ignores_line(line):
465                continue
466            stat_result = self._parser.parse_line(line,
467                          self._host.time_shift())
468            loop_path = self._path.join(dirname, stat_result._st_name)
469            self._lstat_cache[loop_path] = stat_result
470            # needed to work without cache or with disabled cache
471            if stat_result._st_name == basename:
472                lstat_result_for_path = stat_result
473        if lstat_result_for_path:
474            return lstat_result_for_path
475        # path was not found
476        if _exception_for_missing_path:
477            #TODO use FTP DIR command on the file to implicitly use
478            #  the usual status code of the server for missing files
479            #  (450 vs. 550)
480            raise ftp_error.PermanentError(
481                  "550 %s: no such file or directory" % path)
482        else:
483            # be explicit; returning `None` is a signal for
484            #  `_Path.exists/isfile/isdir/islink` that the path was
485            #  not found; if we would raise an exception, there would
486            #  be no distinction between a missing path or a more
487            #  severe error in the code above
488            return None
489
490    def _real_stat(self, path, _exception_for_missing_path=True):
491        """
492        Return info from a "stat" call on `path`.
493
494        If the directory containing `path` can't be parsed, raise
495        a `ParserError`. If the listing can be parsed but the
496        `path` can't be found, raise a `PermanentError`. Also raise
497        a `PermanentError` if there's an endless (cyclic) chain of
498        symbolic links "behind" the `path`.
499
500        (`_exception_for_missing_path` is an implementation aid and
501        _not_ intended for use by ftputil clients.)
502        """
503        # save for error message
504        original_path = path
505        # most code in this method is used to detect recursive
506        #  link structures
507        visited_paths = {}
508        while True:
509            # stat the link if it is one, else the file/directory
510            lstat_result = self._real_lstat(path, _exception_for_missing_path)
511            if lstat_result is None:
512                return None
513            # if the file is not a link, the `stat` result is the
514            #  same as the `lstat` result
515            if not stat.S_ISLNK(lstat_result.st_mode):
516                return lstat_result
517            # if we stat'ed a link, calculate a normalized path for
518            #  the file the link points to
519            # we don't use `basename`
520            # pylint: disable-msg=W0612
521            dirname, basename = self._path.split(path)
522            path = self._path.join(dirname, lstat_result._st_target)
523            path = self._path.normpath(path)
524            # check for cyclic structure
525            if path in visited_paths:
526                # we had this path already
527                raise ftp_error.PermanentError(
528                      "recursive link structure detected for remote path '%s'" %
529                      original_path)
530            # remember the path we have encountered
531            visited_paths[path] = True
532
533    def __call_with_parser_retry(self, method, *args, **kwargs):
534        """
535        Call `method` with the `args` and `kwargs` once. If that
536        results in a `ParserError` and only one parser has been
537        used yet, try the other parser. If that still fails,
538        propagate the `ParserError`.
539        """
540        # Do _not_ set `_allow_parser_switching` in a `finally` clause!
541        #  This would cause a `PermanentError` due to a not-found
542        #  file in an empty directory to finally establish the
543        #  parser - which is wrong.
544        try:
545            result = method(*args, **kwargs)
546            # if a `listdir` call didn't find anything, we can't
547            #  say anything about the usefulness of the parser
548            if (method is not self._real_listdir) and result:
549                self._allow_parser_switching = False
550            return result
551        except ftp_error.ParserError:
552            if self._allow_parser_switching:
553                self._allow_parser_switching = False
554                self._parser = MSParser()
555                return method(*args, **kwargs)
556            else:
557                raise
558
559    def listdir(self, path):
560        """
561        Return a list of items in `path`.
562       
563        Raise a `PermanentError` if the path doesn't exist, but
564        maybe raise other exceptions depending on the state of
565        the server (e. g. timeout).
566        """
567        return self.__call_with_parser_retry(self._real_listdir, path)
568
569    def lstat(self, path, _exception_for_missing_path=True):
570        """
571        Return a `StatResult` without following links.
572
573        Raise a `PermanentError` if the path doesn't exist, but
574        maybe raise other exceptions depending on the state of
575        the server (e. g. timeout).
576        """
577        return self.__call_with_parser_retry(self._real_lstat, path,
578                                             _exception_for_missing_path)
579
580    def stat(self, path, _exception_for_missing_path=True):
581        """
582        Return a `StatResult` with following links.
583
584        Raise a `PermanentError` if the path doesn't exist, but
585        maybe raise other exceptions depending on the state of
586        the server (e. g. timeout).
587        """
588        return self.__call_with_parser_retry(self._real_stat, path,
589                                             _exception_for_missing_path)
590
Note: See TracBrowser for help on using the repository browser.