Changeset 2033:99fb8a5f1ab6


Ignore:
Timestamp:
Jan 27, 2021, 7:22:14 PM (10 months ago)
Author:
Stefan Schwarzer <sschwarzer@…>
Branch:
default
Message:
Make path encoding configurable

Some changes:

- Add `default_session_factory` in `host.py` and use the new session
  factory as the default factory instead of `ftplib.FTP`. This makes
  sure that we keep the old behavior of ftputil under Python 3.8 and
  below, that is, using latin-1 path encoding.

- When creating an instance from the session factory, store the
  encoding of the instance in the `FTPHost` instance in the
  `_encoding` attribute. In case the created session has no `encoding`
  attribute, fall back to the default, i. e. "latin-1".

- Add an `encoding` argument to the string conversion tools in
  `tool.py`.

- When calling into `tool.py`, pass the `_encoding` from the `FTPHost`
  instance as the `encoding` argument. There are a few places that
  call into `tool.py` without the session encoding being available.
  This applies to `error.py`. In this case use the default encoding,
  latin-1.

- Adapt tests. For example, make sure that some mock FTP sessions have
  an `encoding` attribute set.

With these changes, the tests run successfully under Python 3.6 to
3.8. There are still a few tests failing under Python 3.9.

ticket: 143
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • ftputil/error.py

    r1983 r2033  
    1 # Copyright (C) 2003-2020, Stefan Schwarzer <sschwarzer@sschwarzer.net>
     1# Copyright (C) 2003-2021, Stefan Schwarzer <sschwarzer@sschwarzer.net>
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    5454            # though.
    5555            try:
    56                 self.strerror = ftputil.tool.as_str(args[0])
     56                self.strerror = ftputil.tool.as_str(
     57                    args[0], ftputil.tool.DEFAULT_ENCODING
     58                )
    5759            except TypeError:
    5860                # `args[0]` isn't `str` or `bytes`.
     
    176178            # If `exc_value.args[0]` is present, assume it's a byte or unicode
    177179            # string.
    178             if exc_value.args and ftputil.tool.as_str(exc_value.args[0]).startswith(
    179                 "502"
    180             ):
     180            if exc_value.args and ftputil.tool.as_str(
     181                exc_value.args[0], ftputil.tool.DEFAULT_ENCODING
     182            ).startswith("502"):
    181183                raise CommandNotImplementedError(
    182184                    *exc_value.args, original_error=exc_value
  • ftputil/host.py

    r1939 r2033  
    1 # Copyright (C) 2002-2020, Stefan Schwarzer <sschwarzer@sschwarzer.net>
     1# Copyright (C) 2002-2021, Stefan Schwarzer <sschwarzer@sschwarzer.net>
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    2020import ftputil.file_transfer
    2121import ftputil.path
     22import ftputil.session
    2223import ftputil.stat
    2324import ftputil.tool
     
    3132# attributes though.
    3233# pylint: disable=protected-access
     34
     35
     36# For Python versions 3.8 and below, ftputil has implicitly defaulted to
     37# latin-1 encoding. Prefer that behavior for Python 3.9 and up as well instead
     38# of using the encoding that is the default for `ftplib.FTP` in the Python
     39# version.
     40default_session_factory = ftputil.session.session_factory(encoding="latin-1")
    3341
    3442
     
    7987        self.stat_cache.enable()
    8088        with ftputil.error.ftplib_error_to_ftp_os_error:
     89            current_dir = self._session.pwd()
    8190            self._cached_current_dir = self.path.normpath(
    82                 ftputil.tool.as_str_path(self._session.pwd())
     91                ftputil.tool.as_str_path(current_dir, encoding=self._encoding)
    8392            )
    8493        # Associated `FTPHost` objects for data transfer.
     
    130139        # `FTPHost` object, use the same factory for this `FTPHost` object's
    131140        # child sessions.
    132         factory = kwargs.pop("session_factory", ftplib.FTP)
     141        factory = kwargs.pop("session_factory", default_session_factory)
    133142        with ftputil.error.ftplib_error_to_ftp_os_error:
    134143            session = factory(*args, **kwargs)
     144            self._encoding = getattr(session, "encoding", ftputil.tool.DEFAULT_ENCODING)
    135145        return session
    136146
     
    217227        # Support the same arguments as `open`.
    218228        # pylint: disable=too-many-arguments
    219         path = ftputil.tool.as_str_path(path)
     229        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    220230        host = self._available_child()
    221231        if host is None:
     
    490500        the data chunk that was transferred before the callback was called.
    491501        """
    492         target = ftputil.tool.as_str_path(target)
     502        target = ftputil.tool.as_str_path(target, encoding=self._encoding)
    493503        source_file, target_file = self._upload_files(source, target)
    494504        ftputil.file_transfer.copy_file(
     
    509519        the data chunk that was transferred before the callback was called.
    510520        """
    511         target = ftputil.tool.as_str_path(target)
     521        target = ftputil.tool.as_str_path(target, encoding=self._encoding)
    512522        source_file, target_file = self._upload_files(source, target)
    513523        return ftputil.file_transfer.copy_file(
     
    537547        the data chunk that was transferred before the callback was called.
    538548        """
    539         source = ftputil.tool.as_str_path(source)
     549        source = ftputil.tool.as_str_path(source, encoding=self._encoding)
    540550        source_file, target_file = self._download_files(source, target)
    541551        ftputil.file_transfer.copy_file(
     
    556566        the data chunk that was transferred before the callback was called.
    557567        """
    558         source = ftputil.tool.as_str_path(source)
     568        source = ftputil.tool.as_str_path(source, encoding=self._encoding)
    559569        source_file, target_file = self._download_files(source, target)
    560570        return ftputil.file_transfer.copy_file(
     
    630640        Change the directory on the host.
    631641        """
    632         path = ftputil.tool.as_str_path(path)
     642        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    633643        with ftputil.error.ftplib_error_to_ftp_os_error:
    634644            self._session.cwd(path)
     
    646656        ignored and only "supported" for similarity with `os.mkdir`.
    647657        """
    648         path = ftputil.tool.as_str_path(path)
     658        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    649659
    650660        def command(self, path):
     
    673683        raise a `PermanentError` with `errno` 17.
    674684        """
    675         path = ftputil.tool.as_str_path(path)
     685        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    676686        path = self.path.abspath(path)
    677687        directories = path.split(self.sep)
     
    725735        supported.
    726736        """
    727         path = ftputil.tool.as_str_path(path)
     737        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    728738        path = self.path.abspath(path)
    729739        if self.listdir(path):
     
    745755        other exceptions depending on the state of the server (e. g. timeout).
    746756        """
    747         path = ftputil.tool.as_str_path(path)
     757        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    748758        path = self.path.abspath(path)
    749759        # Though `isfile` includes also links to files, `islink` is needed to
     
    791801        Python 2.4 and adapted to ftputil.
    792802        """
    793         path = ftputil.tool.as_str_path(path)
     803        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    794804        # The following code is an adapted version of Python 2.4's
    795805        # `shutil.rmtree` function.
     
    837847        Rename the source on the FTP host to target.
    838848        """
    839         source = ftputil.tool.as_str_path(source)
    840         target = ftputil.tool.as_str_path(target)
     849        source = ftputil.tool.as_str_path(source, encoding=self._encoding)
     850        target = ftputil.tool.as_str_path(target, encoding=self._encoding)
    841851        # The following code is in spirit similar to the code in the method
    842852        # `_robust_ftp_command`, though we do _not_ do _everything_ imaginable.
     
    877887            def callback(line):
    878888                """Callback function."""
    879                 lines.append(ftputil.tool.as_str(line))
     889                lines.append(ftputil.tool.as_str(line, encoding=self._encoding))
    880890
    881891            with ftputil.error.ftplib_error_to_ftp_os_error:
     
    903913        """
    904914        original_path = path
    905         path = ftputil.tool.as_str_path(path)
     915        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    906916        items = self._stat._listdir(path)
    907         return [ftputil.tool.same_string_type_as(original_path, item) for item in items]
     917        return [
     918            ftputil.tool.same_string_type_as(original_path, item, self._encoding)
     919            for item in items
     920        ]
    908921
    909922    def lstat(self, path, _exception_for_missing_path=True):
     
    918931        intended for use by ftputil clients.)
    919932        """
    920         path = ftputil.tool.as_str_path(path)
     933        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    921934        return self._stat._lstat(path, _exception_for_missing_path)
    922935
     
    934947        intended for use by ftputil clients.)
    935948        """
    936         path = ftputil.tool.as_str_path(path)
     949        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    937950        return self._stat._stat(path, _exception_for_missing_path)
    938951
     
    943956        https://docs.python.org/library/os.html#os.walk ).
    944957        """
    945         top = ftputil.tool.as_str_path(top)
     958        top = ftputil.tool.as_str_path(top, encoding=self._encoding)
    946959        # The following code is copied from `os.walk` in Python 2.4 and adapted
    947960        # to ftputil.
     
    977990        In particular, a non-existent path usually causes a `PermanentError`.
    978991        """
    979         path = ftputil.tool.as_str_path(path)
     992        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    980993        path = self.path.abspath(path)
    981994
  • ftputil/path.py

    r1935 r2033  
    1 # Copyright (C) 2003-2020, Stefan Schwarzer <sschwarzer@sschwarzer.net>
     1# Copyright (C) 2003-2021, Stefan Schwarzer <sschwarzer@sschwarzer.net>
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    3333    def __init__(self, host):
    3434        self._host = host
     35        self._encoding = host._encoding
    3536        # Delegate these methods to the `posixpath` module because they don't
    3637        # need file system access but work on the path strings (possibly
     
    5455        """
    5556        original_path = path
    56         path = ftputil.tool.as_str_path(path)
     57        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    5758        if not self.isabs(path):
    5859            path = self.join(self._host.getcwd(), path)
    5960        return ftputil.tool.same_string_type_as(
    60             os.fspath(original_path), self.normpath(path)
     61            os.fspath(original_path), self.normpath(path), self._encoding
    6162        )
    6263
     
    117118            stat_function = stat.S_ISREG
    118119        #
    119         path = ftputil.tool.as_str_path(path)
     120        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    120121        #  Workaround if we can't go up from the current directory. The result
    121122        #  from `getcwd` should already be normalized.
     
    162163        `False`.
    163164        """
    164         path = ftputil.tool.as_str_path(path)
     165        path = ftputil.tool.as_str_path(path, encoding=self._encoding)
    165166        try:
    166167            lstat_result = self._host.lstat(path, _exception_for_missing_path=False)
     
    192193        to accumulate statistics.  Passing None for arg is common.
    193194        """
    194         top = ftputil.tool.as_str_path(top)
     195        top = ftputil.tool.as_str_path(top, encoding=self._encoding)
    195196        # This code (and the above documentation) is taken from `posixpath.py`,
    196197        # with slight modifications.
  • ftputil/tool.py

    r2028 r2033  
    2424
    2525
    26 def same_string_type_as(type_source, string):
     26def same_string_type_as(type_source, string, encoding):
    2727    """
    2828    Return a string of the same type as `type_source` with the content from
    2929    `string`.
    3030
    31     If the `type_source` and `string` don't have the same type, use
    32     `LOSSLESS_ENCODING` above to encode or decode, whatever operation is
    33     needed.
     31    If the `type_source` and `string` don't have the same type, use `encoding`
     32    to encode or decode, whatever operation is needed.
    3433    """
    3534    if isinstance(type_source, bytes) and isinstance(string, str):
    36         return string.encode(LOSSLESS_ENCODING)
     35        return string.encode(encoding)
    3736    elif isinstance(type_source, str) and isinstance(string, bytes):
    38         return string.decode(LOSSLESS_ENCODING)
     37        return string.decode(encoding)
    3938    else:
    4039        return string
    4140
    4241
    43 def as_str(string):
     42def as_str(string, encoding):
    4443    """
    4544    Return the argument `string` converted to a unicode string if it's a
    4645    `bytes` object. Otherwise just return the string.
    4746
     47    If a conversion is necessary, use `encoding`.
     48
    4849    If `string` is neither `str` nor `bytes`, raise a `TypeError`.
    4950    """
    5051    if isinstance(string, bytes):
    51         return string.decode(LOSSLESS_ENCODING)
     52        return string.decode(encoding)
    5253    elif isinstance(string, str):
    5354        return string
     
    5657
    5758
    58 def as_str_path(path):
     59def as_str_path(path, encoding):
    5960    """
    6061    Return the argument `path` converted to a unicode string if it's a `bytes`
    6162    object. Otherwise just return the string.
     63
     64    If a conversion is necessary, use `encoding`.
    6265
    6366    Instead of passing a `bytes` or `str` object for `path`, you can pass a
     
    6871    """
    6972    path = os.fspath(path)
    70     return as_str(path)
     73    return as_str(path, encoding)
  • test/test_base.py

    r1935 r2033  
    7373                      datetime=datetime.datetime.now(),
    7474                      name="file_name")
    75  
     75
    7676      # Result:
    7777      # "drwxr-xr-x  1  dummy_user dummy_group  512  Apr 22 2019  dir_name -> dir_target"
  • test/test_host.py

    r1938 r2033  
    1 # Copyright (C) 2002-2020, Stefan Schwarzer <sschwarzer@sschwarzer.net>
     1# Copyright (C) 2002-2021, Stefan Schwarzer <sschwarzer@sschwarzer.net>
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    4646
    4747def as_bytes(string):
    48     return string.encode(ftputil.tool.LOSSLESS_ENCODING)
     48    return string.encode(ftputil.tool.DEFAULT_ENCODING)
    4949
    5050
  • test/test_path.py

    r1935 r2033  
    1 # Copyright (C) 2003-2020, Stefan Schwarzer <sschwarzer@sschwarzer.net>
     1# Copyright (C) 2003-2021, Stefan Schwarzer <sschwarzer@sschwarzer.net>
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    2121
    2222def as_bytes(string):
    23     return string.encode(ftputil.tool.LOSSLESS_ENCODING)
     23    return string.encode(ftputil.tool.DEFAULT_ENCODING)
    2424
    2525
  • test/test_real_ftp.py

    r2008 r2033  
    1 # Copyright (C) 2003-2020, Stefan Schwarzer <sschwarzer@sschwarzer.net>
     1# Copyright (C) 2003-2021, Stefan Schwarzer <sschwarzer@sschwarzer.net>
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    886886            host.synchronize_times()
    887887
     888    def test_encoding(self):
     889        """
     890        Test setting the path encoding.
     891        """
     892        for encoding in ["latin-1", "UTF-8"]:
     893            factory = ftputil.session.session_factory(port=PORT, encoding=encoding)
     894            session = factory(*self.login_data)
     895            assert session.encoding == encoding
     896
    888897    def test_listdir_with_non_ascii_byte_string(self):
    889898        """
  • test/test_session.py

    r2029 r2033  
    1818    """
    1919
    20     encoding = ftputil.tool.DEFAULT_ENCODING
     20    encoding = (
     21        "latin-1"
     22        if (sys.version_info.major, sys.version_info.minor) <= (3, 8)
     23        else "utf-8"
     24    )
    2125
    22     def __init__(self):
     26    def __init__(self, encoding=None):
    2327        self.calls = []
     28        if encoding is not None:
     29            self.encoding = encoding
    2430
    2531    def add_call(self, *args):
  • test/test_sync.py

    r1935 r2033  
    1 # Copyright (C) 2007-2018, Stefan Schwarzer <sschwarzer@sschwarzer.net>
     1# Copyright (C) 2007-2021, Stefan Schwarzer <sschwarzer@sschwarzer.net>
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    1010import ftputil
    1111import ftputil.sync
     12import ftputil.tool
    1213
    1314
     
    9697
    9798    def _make_session(self, *args, **kwargs):
     99        self._encoding = ftputil.tool.DEFAULT_ENCODING
    98100        return DummyFTPSession()
    99101
  • test/test_tool.py

    r1935 r2033  
    1 # Copyright (C) 2013-2020, Stefan Schwarzer
     1# Copyright (C) 2013-2021, Stefan Schwarzer
    22# and ftputil contributors (see `doc/contributors.txt`)
    33# See the file LICENSE for licensing terms.
     
    88
    99import ftputil.tool
     10
     11
     12DEFAULT_ENCODING = ftputil.tool.DEFAULT_ENCODING
    1013
    1114
     
    3841
    3942    def test_to_bytes(self):
    40         assert same_string_type_as(b"abc", "def") == b"def"
     43        assert same_string_type_as(b"abc", "def", encoding=DEFAULT_ENCODING) == b"def"
    4144
    4245    def test_to_str(self):
    43         assert same_string_type_as("abc", b"def") == "def"
     46        assert same_string_type_as("abc", b"def", encoding=DEFAULT_ENCODING) == "def"
    4447
    4548    def test_both_bytes_type(self):
    46         assert same_string_type_as(b"abc", b"def") == b"def"
     49        assert same_string_type_as(b"abc", b"def", encoding=DEFAULT_ENCODING) == b"def"
    4750
    4851    def test_both_str_type(self):
    49         assert same_string_type_as("abc", "def") == "def"
     52        assert same_string_type_as("abc", "def", encoding=DEFAULT_ENCODING) == "def"
    5053
    5154
     
    5659class TestAsStr:
    5760    def test_from_bytes(self):
    58         assert as_str(b"abc") == "abc"
    59         assert as_str_path(b"abc") == "abc"
     61        assert as_str(b"abc", encoding=DEFAULT_ENCODING) == "abc"
     62        assert as_str_path(b"abc", encoding=DEFAULT_ENCODING) == "abc"
    6063
    6164    def test_from_str(self):
    62         assert as_str("abc") == "abc"
    63         assert as_str_path("abc") == "abc"
     65        assert as_str("abc", encoding=DEFAULT_ENCODING) == "abc"
     66        assert as_str_path("abc", encoding=DEFAULT_ENCODING) == "abc"
    6467
    6568    def test_from_bytes_path(self):
    66         assert as_str_path(Path(b"abc")) == "abc"
     69        assert as_str_path(Path(b"abc"), encoding=DEFAULT_ENCODING) == "abc"
    6770
    6871    def test_from_str_path(self):
    69         assert as_str_path(Path("abc")) == "abc"
     72        assert as_str_path(Path("abc"), encoding=DEFAULT_ENCODING) == "abc"
    7073
    7174    def test_type_error(self):
    7275        with pytest.raises(TypeError):
    73             as_str(1)
     76            as_str(1, encoding=DEFAULT_ENCODING)
Note: See TracChangeset for help on using the changeset viewer.