~sschwarzer/ftputil

ftputil/ftputil/sync.py -rw-r--r-- 5.7 KiB
77f2ca24Stefan Schwarzer Move item "Push to repository" a month ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Copyright (C) 2007-2018, Stefan Schwarzer <sschwarzer@sschwarzer.net>
# and ftputil contributors (see `doc/contributors.txt`)
# See the file LICENSE for licensing terms.

"""
Tools for syncing combinations of local and remote directories.

*** WARNING: This is an unfinished in-development version!
"""

# Sync combinations:
# - remote -> local (download)
# - local -> remote (upload)
# - remote -> remote
# - local -> local (maybe implicitly possible due to design, but not targeted)

import os
import shutil

from ftputil import FTPHost
import ftputil.error

__all__ = ["FTPHost", "LocalHost", "Syncer"]


# Used for copying file objects; value is 64 KB.
CHUNK_SIZE = 64 * 1024


class LocalHost:
    """
    Provide an API for local directories and files so we can use the same code
    as for `FTPHost` instances.
    """

    def open(self, path, mode):
        """
        Return a Python file object for file name `path`, opened in mode
        `mode`.
        """
        # This is the built-in `open` function, not `os.open`!
        return open(path, mode)

    def time_shift(self):
        """
        Return the time shift value (see methods `set_time_shift` and
        `time_shift` in class `FTPHost` for a definition). By definition, the
        value is zero for local file systems.
        """
        return 0.0

    def __getattr__(self, attr):
        return getattr(os, attr)


class Syncer:
    """
    Control synchronization between combinations of local and remote
    directories and files.
    """

    def __init__(self, source, target):
        """
        Init the `FTPSyncer` instance.

        Each of `source` and `target` is either an `FTPHost` or a `LocalHost`
        object. The source and target directories, resp. have to be set with
        the `chdir` command before passing them in. The semantics is so that
        the items under the source directory will show up under the target
        directory after the synchronization (unless there's an error).
        """
        self._source = source
        self._target = target

    def _mkdir(self, target_dir):
        """
        Try to create the target directory `target_dir`. If it already exists,
        don't do anything. If the directory is present but it's actually a
        file, raise a `SyncError`.
        """
        # TODO: Handle setting of target mtime according to source mtime
        # (beware of rootdir anomalies; try to handle them as well).
        if self._target.path.isfile(target_dir):
            raise ftputil.error.SyncError(
                "target dir '{}' is actually a file".format(target_dir)
            )
        # Deliberately use an `isdir` test instead of `try/except`. The latter
        # approach might mask other errors we want to see, e. g. insufficient
        # permissions.
        if not self._target.path.isdir(target_dir):
            self._target.mkdir(target_dir)

    def _sync_file(self, source_file, target_file):
        # XXX: This duplicates code from `FTPHost._copyfileobj`. Maybe
        # implement the upload and download methods in terms of `_sync_file`,
        # or maybe not?
        # TODO: Handle `IOError`s
        # TODO: Handle conditional copy
        # TODO: Handle setting of target mtime according to source mtime
        # (beware of rootdir anomalies; try to handle them as well).
        source = self._source.open(source_file, "rb")
        try:
            target = self._target.open(target_file, "wb")
            try:
                shutil.copyfileobj(source, target, length=CHUNK_SIZE)
            finally:
                target.close()
        finally:
            source.close()

    def _fix_sep_for_target(self, path):
        """
        Return the string `path` with appropriate path separators for the
        target file system.
        """
        return path.replace(self._source.sep, self._target.sep)

    def _sync_tree(self, source_dir, target_dir):
        """
        Synchronize the source and the target directory tree by updating the
        target to match the source as far as possible.

        Current limitations:
        - _don't_ delete items which are on the target path but not on the
          source path
        - files are always copied, the modification timestamps are not compared
        - all files are copied in binary mode, never in ASCII/text mode
        - incomplete error handling
        """
        self._mkdir(target_dir)
        for dirpath, dir_names, file_names in self._source.walk(source_dir):
            for dir_name in dir_names:
                inner_source_dir = self._source.path.join(dirpath, dir_name)
                inner_target_dir = inner_source_dir.replace(source_dir, target_dir, 1)
                inner_target_dir = self._fix_sep_for_target(inner_target_dir)
                self._mkdir(inner_target_dir)
            for file_name in file_names:
                source_file = self._source.path.join(dirpath, file_name)
                target_file = source_file.replace(source_dir, target_dir, 1)
                target_file = self._fix_sep_for_target(target_file)
                self._sync_file(source_file, target_file)

    def sync(self, source_path, target_path):
        """
        Synchronize `source_path` and `target_path` (both are strings, each
        denoting a directory or file path), i. e. update the target path so
        that it's a copy of the source path.

        This method handles both directory trees and single files.
        """
        # TODO: Handle making of missing intermediate directories.
        source_path = self._source.path.abspath(source_path)
        target_path = self._target.path.abspath(target_path)
        if self._source.path.isfile(source_path):
            self._sync_file(source_path, target_path)
        else:
            self._sync_tree(source_path, target_path)