1 | # Copyright (C) 2002-2018, Stefan Schwarzer <sschwarzer@sschwarzer.net> |
---|
2 | # and ftputil contributors (see `doc/contributors.txt`) |
---|
3 | # See the file LICENSE for licensing terms. |
---|
4 | |
---|
5 | """ |
---|
6 | ftputil.stat - stat result, parsers, and FTP stat'ing for `ftputil` |
---|
7 | """ |
---|
8 | |
---|
9 | import datetime |
---|
10 | import math |
---|
11 | import re |
---|
12 | import stat |
---|
13 | import time |
---|
14 | |
---|
15 | import ftputil.error |
---|
16 | import ftputil.stat_cache |
---|
17 | |
---|
18 | |
---|
19 | # These can be used to write custom parsers. |
---|
20 | __all__ = ["StatResult", "Parser", "UnixParser", "MSParser"] |
---|
21 | |
---|
22 | |
---|
23 | # Datetime precision values in seconds. |
---|
24 | MINUTE_PRECISION = 60 |
---|
25 | DAY_PRECISION = 24 * 60 * 60 |
---|
26 | UNKNOWN_PRECISION = None |
---|
27 | |
---|
28 | |
---|
29 | class StatResult(tuple): |
---|
30 | """ |
---|
31 | Support class resembling a tuple like that returned from |
---|
32 | `os.(l)stat`. |
---|
33 | """ |
---|
34 | |
---|
35 | _index_mapping = { |
---|
36 | "st_mode": 0, "st_ino": 1, "st_dev": 2, "st_nlink": 3, |
---|
37 | "st_uid": 4, "st_gid": 5, "st_size": 6, "st_atime": 7, |
---|
38 | "st_mtime": 8, "st_ctime": 9, "_st_name": 10, "_st_target": 11} |
---|
39 | |
---|
40 | def __init__(self, sequence): |
---|
41 | # Don't call `__init__` via `super`. Construction from a |
---|
42 | # sequence is implicitly handled by `tuple.__new__`, not |
---|
43 | # `tuple.__init__`. As a by-product, this avoids a |
---|
44 | # `DeprecationWarning` in Python 2.6+ . |
---|
45 | # pylint: disable=super-init-not-called |
---|
46 | # |
---|
47 | # Use `sequence` parameter to remain compatible to `__new__` |
---|
48 | # interface. |
---|
49 | # pylint: disable=unused-argument |
---|
50 | # |
---|
51 | # These may be overwritten in a `Parser.parse_line` method. |
---|
52 | self._st_name = "" |
---|
53 | self._st_target = None |
---|
54 | self._st_mtime_precision = UNKNOWN_PRECISION |
---|
55 | |
---|
56 | def __getattr__(self, attr_name): |
---|
57 | if attr_name in self._index_mapping: |
---|
58 | return self[self._index_mapping[attr_name]] |
---|
59 | else: |
---|
60 | raise AttributeError("'StatResult' object has no attribute '{0}'". |
---|
61 | format(attr_name)) |
---|
62 | |
---|
63 | def __repr__(self): |
---|
64 | # "Invert" `_index_mapping` so that we can look up the names |
---|
65 | # for the tuple indices. |
---|
66 | index_to_name = dict((v, k) for k, v in self._index_mapping.items()) |
---|
67 | argument_strings = [] |
---|
68 | for index, item in enumerate(self): |
---|
69 | argument_strings.append("{0}={1!r}".format(index_to_name[index], |
---|
70 | item)) |
---|
71 | return "{0}({1})".format(type(self).__name__, |
---|
72 | ", ".join(argument_strings)) |
---|
73 | |
---|
74 | |
---|
75 | # |
---|
76 | # FTP directory parsers |
---|
77 | # |
---|
78 | class Parser: |
---|
79 | """ |
---|
80 | Represent a parser for directory lines. Parsers for specific |
---|
81 | directory formats inherit from this class. |
---|
82 | """ |
---|
83 | |
---|
84 | # Map month abbreviations to month numbers. |
---|
85 | _month_numbers = { |
---|
86 | "jan": 1, "feb": 2, "mar": 3, "apr": 4, |
---|
87 | "may": 5, "jun": 6, "jul": 7, "aug": 8, |
---|
88 | "sep": 9, "oct": 10, "nov": 11, "dec": 12} |
---|
89 | |
---|
90 | _total_regex = re.compile(r"^total\s+\d+") |
---|
91 | |
---|
92 | def ignores_line(self, line): |
---|
93 | """ |
---|
94 | Return a true value if the line should be ignored, i. e. is |
---|
95 | assumed to _not_ contain actual directory/file/link data. |
---|
96 | A typical example are summary lines like "total 23" which |
---|
97 | are emitted by some FTP servers. |
---|
98 | |
---|
99 | If the line should be used to extract stat data from it, |
---|
100 | return a false value. |
---|
101 | """ |
---|
102 | # Ignore empty lines stemming from only a line break. |
---|
103 | if not line.strip(): |
---|
104 | # Yes, ignore the line if it's empty. |
---|
105 | return True |
---|
106 | # Either a `_SRE_Match` instance or `None` |
---|
107 | match = self._total_regex.search(line) |
---|
108 | return bool(match) |
---|
109 | |
---|
110 | def parse_line(self, line, time_shift=0.0): |
---|
111 | """ |
---|
112 | Return a `StatResult` object as derived from the string |
---|
113 | `line`. The parser code to use depends on the directory format |
---|
114 | the FTP server delivers (also see examples at end of file). |
---|
115 | |
---|
116 | If the given text line can't be parsed, raise a `ParserError`. |
---|
117 | |
---|
118 | For the definition of `time_shift` see the docstring of |
---|
119 | `FTPHost.set_time_shift` in `ftputil.py`. Not all parsers |
---|
120 | use the `time_shift` parameter. |
---|
121 | """ |
---|
122 | raise NotImplementedError("must be defined by subclass") |
---|
123 | |
---|
124 | # |
---|
125 | # Helper methods for parts of a directory listing line |
---|
126 | # |
---|
127 | def parse_unix_mode(self, mode_string): |
---|
128 | """ |
---|
129 | Return an integer from the `mode_string`, compatible with |
---|
130 | the `st_mode` value in stat results. Such a mode string |
---|
131 | may look like "drwxr-xr-x". |
---|
132 | |
---|
133 | If the mode string can't be parsed, raise an |
---|
134 | `ftputil.error.ParserError`. |
---|
135 | """ |
---|
136 | # Allow derived classes to make use of `self`. |
---|
137 | # pylint: disable=no-self-use |
---|
138 | if len(mode_string) != 10: |
---|
139 | raise ftputil.error.ParserError("invalid mode string '{0}'". |
---|
140 | format(mode_string)) |
---|
141 | st_mode = 0 |
---|
142 | #TODO Add support for "S" and sticky bit ("t", "T"). |
---|
143 | for bit in mode_string[1:10]: |
---|
144 | bit = (bit != "-") |
---|
145 | st_mode = (st_mode << 1) + bit |
---|
146 | if mode_string[3] == "s": |
---|
147 | st_mode = st_mode | stat.S_ISUID |
---|
148 | if mode_string[6] == "s": |
---|
149 | st_mode = st_mode | stat.S_ISGID |
---|
150 | file_type_to_mode = {"b": stat.S_IFBLK, "c": stat.S_IFCHR, |
---|
151 | "d": stat.S_IFDIR, "l": stat.S_IFLNK, |
---|
152 | "p": stat.S_IFIFO, "s": stat.S_IFSOCK, |
---|
153 | "-": stat.S_IFREG, |
---|
154 | # Ignore types which `ls` can't make sense of |
---|
155 | # (assuming the FTP server returns listings |
---|
156 | # like `ls` does). |
---|
157 | "?": 0, |
---|
158 | } |
---|
159 | file_type = mode_string[0] |
---|
160 | if file_type in file_type_to_mode: |
---|
161 | st_mode = st_mode | file_type_to_mode[file_type] |
---|
162 | else: |
---|
163 | raise ftputil.error.ParserError( |
---|
164 | "unknown file type character '{0}'".format(file_type)) |
---|
165 | return st_mode |
---|
166 | |
---|
167 | def _as_int(self, int_string, int_description): |
---|
168 | """ |
---|
169 | Return `int_string` converted to an integer. |
---|
170 | |
---|
171 | If it can't be converted, raise a `ParserError`, using |
---|
172 | `int_description` in the error message. For example, if the |
---|
173 | integer value is a day, pass "day" for `int_description`. |
---|
174 | """ |
---|
175 | try: |
---|
176 | return int(int_string) |
---|
177 | except ValueError: |
---|
178 | raise ftputil.error.ParserError("non-integer {0} value {1!r}". |
---|
179 | format(int_description, |
---|
180 | int_string)) |
---|
181 | |
---|
182 | def _mktime(self, mktime_tuple): |
---|
183 | """ |
---|
184 | Return a float value like `time.mktime` does, but ... |
---|
185 | |
---|
186 | - Raise a `ParserError` if parts of `mktime_tuple` are |
---|
187 | invalid (say, a day is 32). |
---|
188 | |
---|
189 | - If the resulting float value would be smaller than 0.0 |
---|
190 | (indicating a time before the "epoch") return a sentinel |
---|
191 | value of 0.0. Do this also if the native `mktime` |
---|
192 | implementation would raise an `OverflowError`. |
---|
193 | """ |
---|
194 | datetime_tuple = mktime_tuple[:6] |
---|
195 | try: |
---|
196 | # Only for sanity checks, we're not interested in the |
---|
197 | # return value. |
---|
198 | datetime.datetime(*datetime_tuple) |
---|
199 | # For example, day == 32. Not all implementations of `mktime` |
---|
200 | # catch this kind of error. |
---|
201 | except ValueError: |
---|
202 | invalid_datetime = ("%04d-%02d-%02d %02d:%02d:%02d" % |
---|
203 | datetime_tuple) |
---|
204 | raise ftputil.error.ParserError("invalid datetime {0!r}". |
---|
205 | format(invalid_datetime)) |
---|
206 | try: |
---|
207 | time_float = time.mktime(mktime_tuple) |
---|
208 | except (OverflowError, ValueError): |
---|
209 | # Sentinel for times before the epoch, see ticket #83. |
---|
210 | time_float = 0.0 |
---|
211 | # Don't allow float values smaller than 0.0 because, according |
---|
212 | # to https://docs.python.org/3/library/time.html#module-time , |
---|
213 | # these might be undefined for some platforms. |
---|
214 | return max(0.0, time_float) |
---|
215 | |
---|
216 | def parse_unix_time(self, month_abbreviation, day, year_or_time, |
---|
217 | time_shift, with_precision=False): |
---|
218 | """ |
---|
219 | Return a floating point number, like from `time.mktime`, by |
---|
220 | parsing the string arguments `month_abbreviation`, `day` and |
---|
221 | `year_or_time`. The parameter `time_shift` is the difference |
---|
222 | "time on server" - "time on client" and is available as the |
---|
223 | `time_shift` parameter in the `parse_line` interface. |
---|
224 | |
---|
225 | If `with_precision` is true (default: false), return a |
---|
226 | two-element tuple consisting of the floating point number as |
---|
227 | described in the previous paragraph and the precision of the |
---|
228 | time in seconds. The default is `False` for backward |
---|
229 | compatibility with custom parsers. |
---|
230 | |
---|
231 | The precision value takes into account that, for example, a |
---|
232 | time string like "May 26 2005" has only a precision of one |
---|
233 | day. This information is important for the `upload_if_newer` |
---|
234 | and `download_if_newer` methods in the `FTPHost` class. |
---|
235 | |
---|
236 | Times in Unix-style directory listings typically have one of |
---|
237 | these formats: |
---|
238 | |
---|
239 | - "Nov 23 02:33" (month name, day of month, time) |
---|
240 | |
---|
241 | - "May 26 2005" (month name, day of month, year) |
---|
242 | |
---|
243 | If this method can't make sense of the given arguments, it |
---|
244 | raises an `ftputil.error.ParserError`. |
---|
245 | """ |
---|
246 | try: |
---|
247 | month = self._month_numbers[month_abbreviation.lower()] |
---|
248 | except KeyError: |
---|
249 | raise ftputil.error.ParserError("invalid month abbreviation {0!r}". |
---|
250 | format(month_abbreviation)) |
---|
251 | day = self._as_int(day, "day") |
---|
252 | if ":" not in year_or_time: |
---|
253 | # `year_or_time` is really a year. |
---|
254 | year, hour, minute = self._as_int(year_or_time, "year"), 0, 0 |
---|
255 | st_mtime = self._mktime( (year, month, day, |
---|
256 | hour, minute, 0, 0, 0, -1) ) |
---|
257 | st_mtime_precision = DAY_PRECISION |
---|
258 | else: |
---|
259 | # `year_or_time` is a time hh:mm. |
---|
260 | hour, minute = year_or_time.split(":") |
---|
261 | year, hour, minute = ( |
---|
262 | None, self._as_int(hour, "hour"), self._as_int(minute, "minute")) |
---|
263 | # Try the current year |
---|
264 | year = time.localtime()[0] |
---|
265 | st_mtime = self._mktime( (year, month, day, |
---|
266 | hour, minute, 0, 0, 0, -1) ) |
---|
267 | st_mtime_precision = MINUTE_PRECISION |
---|
268 | # Rhs of comparison: Transform client time to server time |
---|
269 | # (as on the lhs), so both can be compared with respect |
---|
270 | # to the set time shift (see the definition of the time |
---|
271 | # shift in `FTPHost.set_time_shift`'s docstring). The |
---|
272 | # last addend allows for small deviations between the |
---|
273 | # supposed (rounded) and the actual time shift. |
---|
274 | # |
---|
275 | # XXX The downside of this "correction" is that there is |
---|
276 | # a one-minute time interval exactly one year ago that |
---|
277 | # may cause that datetime to be recognized as the current |
---|
278 | # datetime, but after all the datetime from the server |
---|
279 | # can only be exact up to a minute. |
---|
280 | if st_mtime > time.time() + time_shift + st_mtime_precision: |
---|
281 | # If it's in the future, use previous year. |
---|
282 | st_mtime = self._mktime( (year-1, month, day, |
---|
283 | hour, minute, 0, 0, 0, -1) ) |
---|
284 | # If we had a datetime before the epoch, the resulting value |
---|
285 | # 0.0 doesn't tell us anything about the precision. |
---|
286 | if st_mtime == 0.0: |
---|
287 | st_mtime_precision = UNKNOWN_PRECISION |
---|
288 | # |
---|
289 | if with_precision: |
---|
290 | return st_mtime, st_mtime_precision |
---|
291 | else: |
---|
292 | return st_mtime |
---|
293 | |
---|
294 | def parse_ms_time(self, date, time_, time_shift): |
---|
295 | """ |
---|
296 | Return a floating point number, like from `time.mktime`, by |
---|
297 | parsing the string arguments `date` and `time_`. The parameter |
---|
298 | `time_shift` is the difference |
---|
299 | |
---|
300 | "time on server" - "time on client" |
---|
301 | |
---|
302 | and can be set as the `time_shift` parameter in the |
---|
303 | `parse_line` interface. |
---|
304 | |
---|
305 | Times in MS-style directory listings typically have the |
---|
306 | format "10-23-01 03:25PM" (month-day_of_month-two_digit_year, |
---|
307 | hour:minute, am/pm). |
---|
308 | |
---|
309 | If this method can't make sense of the given arguments, it |
---|
310 | raises an `ftputil.error.ParserError`. |
---|
311 | """ |
---|
312 | # Derived classes might want to use `self`. |
---|
313 | # pylint: disable=no-self-use |
---|
314 | # |
---|
315 | # Derived classes may need access to `time_shift`. |
---|
316 | # pylint: disable=unused-argument |
---|
317 | # |
---|
318 | # For the time being, I don't add a `with_precision` |
---|
319 | # parameter as in the Unix parser because the precision for |
---|
320 | # the DOS format is always a minute and can be set in |
---|
321 | # `MSParser.parse_line`. Should you find yourself needing |
---|
322 | # support for `with_precision` for a derived class, please |
---|
323 | # send a mail (see ftputil.txt/html). |
---|
324 | month, day, year = [self._as_int(part, "year/month/day") |
---|
325 | for part in date.split("-")] |
---|
326 | if year >= 1000: |
---|
327 | # We have a four-digit year, so no need for heuristics. |
---|
328 | pass |
---|
329 | elif year >= 70: |
---|
330 | year = 1900 + year |
---|
331 | else: |
---|
332 | year = 2000 + year |
---|
333 | try: |
---|
334 | hour, minute, am_pm = time_[0:2], time_[3:5], time_[5] |
---|
335 | except IndexError: |
---|
336 | raise ftputil.error.ParserError("invalid time string '{0}'". |
---|
337 | format(time_)) |
---|
338 | hour, minute = ( |
---|
339 | self._as_int(hour, "hour"), self._as_int(minute, "minute")) |
---|
340 | if hour == 12 and am_pm == "A": |
---|
341 | hour = 0 |
---|
342 | if hour != 12 and am_pm == "P": |
---|
343 | hour += 12 |
---|
344 | st_mtime = self._mktime( (year, month, day, |
---|
345 | hour, minute, 0, 0, 0, -1) ) |
---|
346 | return st_mtime |
---|
347 | |
---|
348 | |
---|
349 | class UnixParser(Parser): |
---|
350 | """`Parser` class for Unix-specific directory format.""" |
---|
351 | |
---|
352 | @staticmethod |
---|
353 | def _split_line(line): |
---|
354 | """ |
---|
355 | Split a line in metadata, nlink, user, group, size, month, |
---|
356 | day, year_or_time and name and return the result as an |
---|
357 | nine-element list of these values. If the name is a link, |
---|
358 | it will be encoded as a string "link_name -> link_target". |
---|
359 | """ |
---|
360 | # This method encapsulates the recognition of an unusual |
---|
361 | # Unix format variant (see ticket |
---|
362 | # http://ftputil.sschwarzer.net/trac/ticket/12 ). |
---|
363 | line_parts = line.split() |
---|
364 | FIELD_COUNT_WITHOUT_USERID = 8 |
---|
365 | FIELD_COUNT_WITH_USERID = FIELD_COUNT_WITHOUT_USERID + 1 |
---|
366 | if len(line_parts) < FIELD_COUNT_WITHOUT_USERID: |
---|
367 | # No known Unix-style format |
---|
368 | raise ftputil.error.ParserError("line '{0}' can't be parsed". |
---|
369 | format(line)) |
---|
370 | # If we have a valid format (either with or without user id field), |
---|
371 | # the field with index 5 is either the month abbreviation or a day. |
---|
372 | try: |
---|
373 | int(line_parts[5]) |
---|
374 | except ValueError: |
---|
375 | # Month abbreviation, "invalid literal for int" |
---|
376 | line_parts = line.split(None, FIELD_COUNT_WITH_USERID-1) |
---|
377 | else: |
---|
378 | # Day |
---|
379 | line_parts = line.split(None, FIELD_COUNT_WITHOUT_USERID-1) |
---|
380 | USER_FIELD_INDEX = 2 |
---|
381 | line_parts.insert(USER_FIELD_INDEX, None) |
---|
382 | return line_parts |
---|
383 | |
---|
384 | def parse_line(self, line, time_shift=0.0): |
---|
385 | """ |
---|
386 | Return a `StatResult` instance corresponding to the given |
---|
387 | text line. The `time_shift` value is needed to determine |
---|
388 | to which year a datetime without an explicit year belongs. |
---|
389 | |
---|
390 | If the line can't be parsed, raise a `ParserError`. |
---|
391 | """ |
---|
392 | # The local variables are rather simple. |
---|
393 | # pylint: disable=too-many-locals |
---|
394 | try: |
---|
395 | mode_string, nlink, user, group, size, month, day, \ |
---|
396 | year_or_time, name = self._split_line(line) |
---|
397 | # We can get a `ValueError` here if the name is blank (see |
---|
398 | # ticket #69). This is a strange use case, but at least we |
---|
399 | # should raise the exception the docstring mentions. |
---|
400 | except ValueError as exc: |
---|
401 | raise ftputil.error.ParserError(str(exc)) |
---|
402 | # st_mode |
---|
403 | st_mode = self.parse_unix_mode(mode_string) |
---|
404 | # st_ino, st_dev, st_nlink, st_uid, st_gid, st_size, st_atime |
---|
405 | st_ino = None |
---|
406 | st_dev = None |
---|
407 | st_nlink = int(nlink) |
---|
408 | st_uid = user |
---|
409 | st_gid = group |
---|
410 | st_size = int(size) |
---|
411 | st_atime = None |
---|
412 | # st_mtime |
---|
413 | st_mtime, st_mtime_precision = \ |
---|
414 | self.parse_unix_time(month, day, year_or_time, time_shift, |
---|
415 | with_precision=True) |
---|
416 | # st_ctime |
---|
417 | st_ctime = None |
---|
418 | # st_name |
---|
419 | if name.count(" -> ") > 1: |
---|
420 | # If we have more than one arrow we can't tell where the link |
---|
421 | # name ends and the target name starts. |
---|
422 | raise ftputil.error.ParserError( |
---|
423 | '''name '{0}' contains more than one "->"'''.format(name)) |
---|
424 | elif name.count(" -> ") == 1: |
---|
425 | st_name, st_target = name.split(" -> ") |
---|
426 | else: |
---|
427 | st_name, st_target = name, None |
---|
428 | stat_result = StatResult( |
---|
429 | (st_mode, st_ino, st_dev, st_nlink, st_uid, |
---|
430 | st_gid, st_size, st_atime, st_mtime, st_ctime) ) |
---|
431 | # These attributes are kind of "half-official". I'm not |
---|
432 | # sure whether they should be used by ftputil client code. |
---|
433 | # pylint: disable=protected-access |
---|
434 | stat_result._st_mtime_precision = st_mtime_precision |
---|
435 | stat_result._st_name = st_name |
---|
436 | stat_result._st_target = st_target |
---|
437 | return stat_result |
---|
438 | |
---|
439 | |
---|
440 | class MSParser(Parser): |
---|
441 | """`Parser` class for MS-specific directory format.""" |
---|
442 | |
---|
443 | def parse_line(self, line, time_shift=0.0): |
---|
444 | """ |
---|
445 | Return a `StatResult` instance corresponding to the given |
---|
446 | text line from a FTP server which emits "Microsoft format" |
---|
447 | (see end of file). |
---|
448 | |
---|
449 | If the line can't be parsed, raise a `ParserError`. |
---|
450 | |
---|
451 | The parameter `time_shift` isn't used in this method but is |
---|
452 | listed for compatibility with the base class. |
---|
453 | """ |
---|
454 | # The local variables are rather simple. |
---|
455 | # pylint: disable=too-many-locals |
---|
456 | try: |
---|
457 | date, time_, dir_or_size, name = line.split(None, 3) |
---|
458 | except ValueError: |
---|
459 | # "unpack list of wrong size" |
---|
460 | raise ftputil.error.ParserError("line '{0}' can't be parsed". |
---|
461 | format(line)) |
---|
462 | # st_mode |
---|
463 | # Default to read access only; in fact, we can't tell. |
---|
464 | st_mode = 0o400 |
---|
465 | if dir_or_size == "<DIR>": |
---|
466 | st_mode = st_mode | stat.S_IFDIR |
---|
467 | else: |
---|
468 | st_mode = st_mode | stat.S_IFREG |
---|
469 | # st_ino, st_dev, st_nlink, st_uid, st_gid |
---|
470 | st_ino = None |
---|
471 | st_dev = None |
---|
472 | st_nlink = None |
---|
473 | st_uid = None |
---|
474 | st_gid = None |
---|
475 | # st_size |
---|
476 | if dir_or_size != "<DIR>": |
---|
477 | try: |
---|
478 | st_size = int(dir_or_size) |
---|
479 | except ValueError: |
---|
480 | raise ftputil.error.ParserError("invalid size {0}". |
---|
481 | format(dir_or_size)) |
---|
482 | else: |
---|
483 | st_size = None |
---|
484 | # st_atime |
---|
485 | st_atime = None |
---|
486 | # st_mtime |
---|
487 | st_mtime = self.parse_ms_time(date, time_, time_shift) |
---|
488 | # st_ctime |
---|
489 | st_ctime = None |
---|
490 | stat_result = StatResult( |
---|
491 | (st_mode, st_ino, st_dev, st_nlink, st_uid, |
---|
492 | st_gid, st_size, st_atime, st_mtime, st_ctime) ) |
---|
493 | # These attributes are kind of "half-official". I'm not |
---|
494 | # sure whether they should be used by ftputil client code. |
---|
495 | # pylint: disable=protected-access |
---|
496 | # _st_name and _st_target |
---|
497 | stat_result._st_name = name |
---|
498 | stat_result._st_target = None |
---|
499 | # mtime precision in seconds |
---|
500 | # If we had a datetime before the epoch, the resulting value |
---|
501 | # 0.0 doesn't tell us anything about the precision. |
---|
502 | if st_mtime == 0.0: |
---|
503 | stat_result._st_mtime_precision = UNKNOWN_PRECISION |
---|
504 | else: |
---|
505 | stat_result._st_mtime_precision = MINUTE_PRECISION |
---|
506 | return stat_result |
---|
507 | |
---|
508 | # |
---|
509 | # Stat'ing operations for files on an FTP server |
---|
510 | # |
---|
511 | class _Stat: |
---|
512 | """Methods for stat'ing directories, links and regular files.""" |
---|
513 | |
---|
514 | def __init__(self, host): |
---|
515 | self._host = host |
---|
516 | self._path = host.path |
---|
517 | # Use the Unix directory parser by default. |
---|
518 | self._parser = UnixParser() |
---|
519 | # Allow one chance to switch to another parser if the default |
---|
520 | # doesn't work. |
---|
521 | self._allow_parser_switching = True |
---|
522 | # Cache only lstat results. `stat` works locally on `lstat` results. |
---|
523 | self._lstat_cache = ftputil.stat_cache.StatCache() |
---|
524 | |
---|
525 | def _host_dir(self, path): |
---|
526 | """ |
---|
527 | Return a list of lines, as fetched by FTP's `LIST` command, |
---|
528 | when applied to `path`. |
---|
529 | """ |
---|
530 | return self._host._dir(path) |
---|
531 | |
---|
532 | def _stat_results_from_dir(self, path): |
---|
533 | """ |
---|
534 | Yield stat results extracted from the directory listing `path`. |
---|
535 | Omit the special entries for the directory itself and its parent |
---|
536 | directory. |
---|
537 | """ |
---|
538 | lines = self._host_dir(path) |
---|
539 | # `cache` is the "high-level" `StatCache` object whereas |
---|
540 | # `cache._cache` is the "low-level" `LRUCache` object. |
---|
541 | cache = self._lstat_cache |
---|
542 | # Auto-grow cache if the cache up to now can't hold as many |
---|
543 | # entries as there are in the directory `path`. |
---|
544 | if cache._enabled and len(lines) >= cache._cache.size: |
---|
545 | new_size = int(math.ceil(1.1 * len(lines))) |
---|
546 | cache.resize(new_size) |
---|
547 | # Yield stat results from lines. |
---|
548 | for line in lines: |
---|
549 | if self._parser.ignores_line(line): |
---|
550 | continue |
---|
551 | # For `listdir`, we are interested in just the names, |
---|
552 | # but we use the `time_shift` parameter to have the |
---|
553 | # correct timestamp values in the cache. |
---|
554 | stat_result = self._parser.parse_line(line, |
---|
555 | self._host.time_shift()) |
---|
556 | if stat_result._st_name in [self._host.curdir, self._host.pardir]: |
---|
557 | continue |
---|
558 | loop_path = self._path.join(path, stat_result._st_name) |
---|
559 | self._lstat_cache[loop_path] = stat_result |
---|
560 | yield stat_result |
---|
561 | |
---|
562 | def _real_listdir(self, path): |
---|
563 | """ |
---|
564 | Return a list of directories, files etc. in the directory |
---|
565 | named `path`. |
---|
566 | |
---|
567 | Like `os.listdir` the returned list elements have the type |
---|
568 | of the path argument. |
---|
569 | |
---|
570 | If the directory listing from the server can't be parsed, |
---|
571 | raise a `ParserError`. |
---|
572 | """ |
---|
573 | # We _can't_ put this check into `FTPHost._dir`; see its docstring. |
---|
574 | path = self._path.abspath(path) |
---|
575 | # `listdir` should only be allowed for directories and links to them. |
---|
576 | if not self._path.isdir(path): |
---|
577 | raise ftputil.error.PermanentError( |
---|
578 | "550 {0}: no such directory or wrong directory parser used". |
---|
579 | format(path)) |
---|
580 | # Set up for `for` loop. |
---|
581 | names = [] |
---|
582 | for stat_result in self._stat_results_from_dir(path): |
---|
583 | st_name = stat_result._st_name |
---|
584 | names.append(st_name) |
---|
585 | return names |
---|
586 | |
---|
587 | def _real_lstat(self, path, _exception_for_missing_path=True): |
---|
588 | """ |
---|
589 | Return an object similar to that returned by `os.lstat`. |
---|
590 | |
---|
591 | If the directory listing from the server can't be parsed, |
---|
592 | raise a `ParserError`. If the directory can be parsed and the |
---|
593 | `path` is not found, raise a `PermanentError`. That means that |
---|
594 | if the directory containing `path` can't be parsed we get a |
---|
595 | `ParserError`, independent on the presence of `path` on the |
---|
596 | server. |
---|
597 | |
---|
598 | (`_exception_for_missing_path` is an implementation aid and |
---|
599 | _not_ intended for use by ftputil clients.) |
---|
600 | """ |
---|
601 | path = self._path.abspath(path) |
---|
602 | # If the path is in the cache, return the lstat result. |
---|
603 | if path in self._lstat_cache: |
---|
604 | return self._lstat_cache[path] |
---|
605 | # Note: (l)stat works by going one directory up and parsing |
---|
606 | # the output of an FTP `LIST` command. Unfortunately, it is |
---|
607 | # not possible to do this for the root directory `/`. |
---|
608 | if path == "/": |
---|
609 | raise ftputil.error.RootDirError( |
---|
610 | "can't stat remote root directory") |
---|
611 | dirname, basename = self._path.split(path) |
---|
612 | # If even the directory doesn't exist and we don't want the |
---|
613 | # exception, treat it the same as if the path wasn't found in the |
---|
614 | # directory's contents (compare below). The use of `isdir` here |
---|
615 | # causes a recursion but that should be ok because that will at |
---|
616 | # the latest stop when we've gotten to the root directory. |
---|
617 | if not self._path.isdir(dirname) and not _exception_for_missing_path: |
---|
618 | return None |
---|
619 | # Loop through all lines of the directory listing. We |
---|
620 | # probably won't need all lines for the particular path but |
---|
621 | # we want to collect as many stat results in the cache as |
---|
622 | # possible. |
---|
623 | lstat_result_for_path = None |
---|
624 | for stat_result in self._stat_results_from_dir(dirname): |
---|
625 | # Needed to work without cache or with disabled cache. |
---|
626 | if stat_result._st_name == basename: |
---|
627 | lstat_result_for_path = stat_result |
---|
628 | if lstat_result_for_path is not None: |
---|
629 | return lstat_result_for_path |
---|
630 | # Path was not found during the loop. |
---|
631 | if _exception_for_missing_path: |
---|
632 | #TODO Use FTP `LIST` command on the file to implicitly use |
---|
633 | # the usual status code of the server for missing files |
---|
634 | # (450 vs. 550). |
---|
635 | raise ftputil.error.PermanentError( |
---|
636 | "550 {0}: no such file or directory".format(path)) |
---|
637 | else: |
---|
638 | # Be explicit. Returning `None` is a signal for |
---|
639 | # `_Path.exists/isfile/isdir/islink` that the path was |
---|
640 | # not found. If we would raise an exception, there would |
---|
641 | # be no distinction between a missing path or a more |
---|
642 | # severe error in the code above. |
---|
643 | return None |
---|
644 | |
---|
645 | def _real_stat(self, path, _exception_for_missing_path=True): |
---|
646 | """ |
---|
647 | Return info from a "stat" call on `path`. |
---|
648 | |
---|
649 | If the directory containing `path` can't be parsed, raise |
---|
650 | a `ParserError`. If the listing can be parsed but the |
---|
651 | `path` can't be found, raise a `PermanentError`. Also raise |
---|
652 | a `PermanentError` if there's an endless (cyclic) chain of |
---|
653 | symbolic links "behind" the `path`. |
---|
654 | |
---|
655 | (`_exception_for_missing_path` is an implementation aid and |
---|
656 | _not_ intended for use by ftputil clients.) |
---|
657 | """ |
---|
658 | # Save for error message. |
---|
659 | original_path = path |
---|
660 | # Most code in this method is used to detect recursive |
---|
661 | # link structures. |
---|
662 | visited_paths = set() |
---|
663 | while True: |
---|
664 | # Stat the link if it is one, else the file/directory. |
---|
665 | lstat_result = self._real_lstat(path, _exception_for_missing_path) |
---|
666 | if lstat_result is None: |
---|
667 | return None |
---|
668 | # If the file is not a link, the `stat` result is the |
---|
669 | # same as the `lstat` result. |
---|
670 | if not stat.S_ISLNK(lstat_result.st_mode): |
---|
671 | return lstat_result |
---|
672 | # If we stat'ed a link, calculate a normalized path for |
---|
673 | # the file the link points to. |
---|
674 | dirname, _ = self._path.split(path) |
---|
675 | path = self._path.join(dirname, lstat_result._st_target) |
---|
676 | path = self._path.abspath(self._path.normpath(path)) |
---|
677 | # Check for cyclic structure. |
---|
678 | if path in visited_paths: |
---|
679 | # We had seen this path already. |
---|
680 | raise ftputil.error.RecursiveLinksError( |
---|
681 | "recursive link structure detected for remote path '{0}'". |
---|
682 | format(original_path)) |
---|
683 | # Remember the path we have encountered. |
---|
684 | visited_paths.add(path) |
---|
685 | |
---|
686 | def __call_with_parser_retry(self, method, *args, **kwargs): |
---|
687 | """ |
---|
688 | Call `method` with the `args` and `kwargs` once. If that |
---|
689 | results in a `ParserError` and only one parser has been |
---|
690 | used yet, try the other parser. If that still fails, |
---|
691 | propagate the `ParserError`. |
---|
692 | """ |
---|
693 | # Do _not_ set `_allow_parser_switching` in a `finally` clause! |
---|
694 | # This would cause a `PermanentError` due to a not-found |
---|
695 | # file in an empty directory to finally establish the |
---|
696 | # parser - which is wrong. |
---|
697 | try: |
---|
698 | result = method(*args, **kwargs) |
---|
699 | # If a `listdir` call didn't find anything, we can't |
---|
700 | # say anything about the usefulness of the parser. |
---|
701 | if (method is not self._real_listdir) and result: |
---|
702 | self._allow_parser_switching = False |
---|
703 | return result |
---|
704 | except ftputil.error.ParserError: |
---|
705 | if self._allow_parser_switching: |
---|
706 | self._allow_parser_switching = False |
---|
707 | self._parser = MSParser() |
---|
708 | return method(*args, **kwargs) |
---|
709 | else: |
---|
710 | raise |
---|
711 | |
---|
712 | # Don't use these methods, but instead the corresponding methods |
---|
713 | # in the `FTPHost` class. |
---|
714 | def _listdir(self, path): |
---|
715 | """ |
---|
716 | Return a list of items in `path`. |
---|
717 | |
---|
718 | Raise a `PermanentError` if the path doesn't exist, but |
---|
719 | maybe raise other exceptions depending on the state of |
---|
720 | the server (e. g. timeout). |
---|
721 | """ |
---|
722 | return self.__call_with_parser_retry(self._real_listdir, path) |
---|
723 | |
---|
724 | def _lstat(self, path, _exception_for_missing_path=True): |
---|
725 | """ |
---|
726 | Return a `StatResult` without following links. |
---|
727 | |
---|
728 | Raise a `PermanentError` if the path doesn't exist, but |
---|
729 | maybe raise other exceptions depending on the state of |
---|
730 | the server (e. g. timeout). |
---|
731 | """ |
---|
732 | return self.__call_with_parser_retry(self._real_lstat, path, |
---|
733 | _exception_for_missing_path) |
---|
734 | |
---|
735 | def _stat(self, path, _exception_for_missing_path=True): |
---|
736 | """ |
---|
737 | Return a `StatResult` with following links. |
---|
738 | |
---|
739 | Raise a `PermanentError` if the path doesn't exist, but |
---|
740 | maybe raise other exceptions depending on the state of |
---|
741 | the server (e. g. timeout). |
---|
742 | """ |
---|
743 | return self.__call_with_parser_retry(self._real_stat, path, |
---|
744 | _exception_for_missing_path) |
---|