1 | # Copyright (C) 2013-2018, Stefan Schwarzer |
---|
2 | # and ftputil contributors (see `doc/contributors.txt`) |
---|
3 | # See the file LICENSE for licensing terms. |
---|
4 | |
---|
5 | """ |
---|
6 | tool.py - helper code |
---|
7 | """ |
---|
8 | |
---|
9 | import ftputil.compat as compat |
---|
10 | |
---|
11 | |
---|
12 | __all__ = ["same_string_type_as", "as_bytes", "as_unicode", |
---|
13 | "as_default_string"] |
---|
14 | |
---|
15 | |
---|
16 | # Encoding to convert between byte string and unicode string. This is |
---|
17 | # a "lossless" encoding: Strings can be encoded/decoded back and forth |
---|
18 | # without information loss or causing encoding-related errors. The |
---|
19 | # `ftplib` module under Python 3 also uses the "latin1" encoding |
---|
20 | # internally. It's important to use the same encoding here, so that users who |
---|
21 | # used `ftplib` to create FTP items with non-ASCII characters can access them |
---|
22 | # in the same way with ftputil. |
---|
23 | LOSSLESS_ENCODING = "latin1" |
---|
24 | |
---|
25 | |
---|
26 | def same_string_type_as(type_source, content_source): |
---|
27 | """ |
---|
28 | Return a string of the same type as `type_source` with the content |
---|
29 | from `content_source`. |
---|
30 | |
---|
31 | If the `type_source` and `content_source` don't have the same |
---|
32 | type, use `LOSSLESS_ENCODING` above to encode or decode, whatever |
---|
33 | operation is needed. |
---|
34 | """ |
---|
35 | if ( |
---|
36 | isinstance(type_source, compat.bytes_type) and |
---|
37 | isinstance(content_source, compat.unicode_type)): |
---|
38 | return content_source.encode(LOSSLESS_ENCODING) |
---|
39 | elif ( |
---|
40 | isinstance(type_source, compat.unicode_type) and |
---|
41 | isinstance(content_source, compat.bytes_type)): |
---|
42 | return content_source.decode(LOSSLESS_ENCODING) |
---|
43 | else: |
---|
44 | return content_source |
---|
45 | |
---|
46 | |
---|
47 | def as_bytes(string): |
---|
48 | """ |
---|
49 | Return the argument `string` converted to a byte string if it's a |
---|
50 | unicode string. Otherwise just return the string. |
---|
51 | """ |
---|
52 | return same_string_type_as(b"", string) |
---|
53 | |
---|
54 | |
---|
55 | def as_unicode(string): |
---|
56 | """ |
---|
57 | Return the argument `string` converted to a unicode string if it's |
---|
58 | a byte string. Otherwise just return the string. |
---|
59 | """ |
---|
60 | return same_string_type_as("", string) |
---|
61 | |
---|
62 | |
---|
63 | def as_default_string(string): |
---|
64 | """ |
---|
65 | Return the argument `string` converted to a the default string |
---|
66 | type for the Python version. For unicode strings, |
---|
67 | `LOSSLESS_ENCODING` is used for encoding or decoding. |
---|
68 | """ |
---|
69 | return same_string_type_as(compat.default_string_type(), string) |
---|
70 | |
---|
71 | |
---|
72 | def encode_if_unicode(string, encoding): |
---|
73 | """ |
---|
74 | Return the string `string`, encoded with `encoding` if `string` is |
---|
75 | a unicode string. Otherwise return `string` unchanged. |
---|
76 | """ |
---|
77 | if isinstance(string, compat.unicode_type): |
---|
78 | return string.encode(encoding) |
---|
79 | else: |
---|
80 | return string |
---|