diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 2a8592f8bd69c1..06800c4588b663 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -512,7 +512,9 @@ the use of userspace buffers in Python as in "``outfd.write(infd.read())``". On macOS `fcopyfile`_ is used to copy the file content (not metadata). -On Linux and Solaris :func:`os.sendfile` is used. +On Linux :func:`os.copy_file_range` or :func:`os.sendfile` is used. + +On Solaris :func:`os.sendfile` is used. On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB instead of 64 KiB) and a :func:`memoryview`-based variant of @@ -527,6 +529,10 @@ file then shutil will silently fallback on using less efficient .. versionchanged:: 3.14 Solaris now uses :func:`os.sendfile`. +.. versionchanged:: next + Copy-on-write or server-side copy may be used internally via + :func:`os.copy_file_range` on supported Linux filesystems. + .. _shutil-copytree-example: copytree example diff --git a/Lib/shutil.py b/Lib/shutil.py index 171489ca41f2a7..510ae8c6f22d59 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -49,6 +49,7 @@ # https://bugs.python.org/issue43743#msg393429 _USE_CP_SENDFILE = (hasattr(os, "sendfile") and sys.platform.startswith(("linux", "android", "sunos"))) +_USE_CP_COPY_FILE_RANGE = hasattr(os, "copy_file_range") _HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS # CMD defaults in Windows 10 @@ -107,6 +108,66 @@ def _fastcopy_fcopyfile(fsrc, fdst, flags): else: raise err from None +def _determine_linux_fastcopy_blocksize(infd): + """Determine blocksize for fastcopying on Linux. + + Hopefully the whole file will be copied in a single call. + The copying itself should be performed in a loop 'till EOF is + reached (0 return) so a blocksize smaller or bigger than the actual + file size should not make any difference, also in case the file + content changes while being copied. + """ + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB + except OSError: + blocksize = 2 ** 27 # 128 MiB + # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, + # see gh-82500. + if sys.maxsize < 2 ** 32: + blocksize = min(blocksize, 2 ** 30) + return blocksize + +def _fastcopy_copy_file_range(fsrc, fdst): + """Copy data from one regular mmap-like fd to another by using + a high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side copy. + + This should work on Linux >= 4.5 only. + """ + try: + infd = fsrc.fileno() + outfd = fdst.fileno() + except Exception as err: + raise _GiveupOnFastCopy(err) # not a regular file + + blocksize = _determine_linux_fastcopy_blocksize(infd) + offset = 0 + while True: + try: + n_copied = os.copy_file_range(infd, outfd, blocksize, offset_dst=offset) + except OSError as err: + # ...in oder to have a more informative exception. + err.filename = fsrc.name + err.filename2 = fdst.name + + if err.errno == errno.ENOSPC: # filesystem is full + raise err from None + + # Give up on first call and if no data was copied. + if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: + raise _GiveupOnFastCopy(err) + + raise err + else: + if n_copied == 0: + # If no bytes have been copied yet, copy_file_range + # might silently fail. + # https://lore.kernel.org/linux-fsdevel/20210126233840.GG4626@dread.disaster.area/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0 + if offset == 0: + raise _GiveupOnFastCopy() + break + offset += n_copied + def _fastcopy_sendfile(fsrc, fdst): """Copy data from one regular mmap-like fd to another by using high-performance sendfile(2) syscall. @@ -128,20 +189,7 @@ def _fastcopy_sendfile(fsrc, fdst): except Exception as err: raise _GiveupOnFastCopy(err) # not a regular file - # Hopefully the whole file will be copied in a single call. - # sendfile() is called in a loop 'till EOF is reached (0 return) - # so a bufsize smaller or bigger than the actual file size - # should not make any difference, also in case the file content - # changes while being copied. - try: - blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB - except OSError: - blocksize = 2 ** 27 # 128MiB - # On 32-bit architectures truncate to 1GiB to avoid OverflowError, - # see bpo-38319. - if sys.maxsize < 2 ** 32: - blocksize = min(blocksize, 2 ** 30) - + blocksize = _determine_linux_fastcopy_blocksize(infd) offset = 0 while True: try: @@ -266,12 +314,20 @@ def copyfile(src, dst, *, follow_symlinks=True): except _GiveupOnFastCopy: pass # Linux / Android / Solaris - elif _USE_CP_SENDFILE: - try: - _fastcopy_sendfile(fsrc, fdst) - return dst - except _GiveupOnFastCopy: - pass + elif _USE_CP_SENDFILE or _USE_CP_COPY_FILE_RANGE: + # reflink may be implicit in copy_file_range. + if _USE_CP_COPY_FILE_RANGE: + try: + _fastcopy_copy_file_range(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass + if _USE_CP_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass # Windows, see: # https://github.com/python/cpython/pull/7160#discussion_r195405230 elif _WINDOWS and file_size > 0: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 1f18b1f09b5858..078ddd6c431b37 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -3239,12 +3239,8 @@ def test_filesystem_full(self): self.assertRaises(OSError, self.zerocopy_fun, src, dst) -@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported') -class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase): - PATCHPOINT = "os.sendfile" - - def zerocopy_fun(self, fsrc, fdst): - return shutil._fastcopy_sendfile(fsrc, fdst) +class _ZeroCopyFileLinuxTest(_ZeroCopyFileTest): + BLOCKSIZE_INDEX = None def test_non_regular_file_src(self): with io.BytesIO(self.FILEDATA) as src: @@ -3265,65 +3261,65 @@ def test_non_regular_file_dst(self): self.assertEqual(dst.read(), self.FILEDATA) def test_exception_on_second_call(self): - def sendfile(*args, **kwargs): + def syscall(*args, **kwargs): if not flag: flag.append(None) - return orig_sendfile(*args, **kwargs) + return orig_syscall(*args, **kwargs) else: raise OSError(errno.EBADF, "yo") flag = [] - orig_sendfile = os.sendfile - with unittest.mock.patch('os.sendfile', create=True, - side_effect=sendfile): + orig_syscall = eval(self.PATCHPOINT) + with unittest.mock.patch(self.PATCHPOINT, create=True, + side_effect=syscall): with self.get_files() as (src, dst): with self.assertRaises(OSError) as cm: - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert flag self.assertEqual(cm.exception.errno, errno.EBADF) def test_cant_get_size(self): # Emulate a case where src file size cannot be determined. # Internally bufsize will be set to a small value and - # sendfile() will be called repeatedly. + # a system call will be called repeatedly. with unittest.mock.patch('os.fstat', side_effect=OSError) as m: with self.get_files() as (src, dst): - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert m.called self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) def test_small_chunks(self): # Force internal file size detection to be smaller than the - # actual file size. We want to force sendfile() to be called + # actual file size. We want to force a system call to be called # multiple times, also in order to emulate a src fd which gets # bigger while it is being copied. mock = unittest.mock.Mock() mock.st_size = 65536 + 1 with unittest.mock.patch('os.fstat', return_value=mock) as m: with self.get_files() as (src, dst): - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert m.called self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) def test_big_chunk(self): # Force internal file size detection to be +100MB bigger than - # the actual file size. Make sure sendfile() does not rely on + # the actual file size. Make sure a system call does not rely on # file size value except for (maybe) a better throughput / # performance. mock = unittest.mock.Mock() mock.st_size = self.FILESIZE + (100 * 1024 * 1024) with unittest.mock.patch('os.fstat', return_value=mock) as m: with self.get_files() as (src, dst): - shutil._fastcopy_sendfile(src, dst) + self.zerocopy_fun(src, dst) assert m.called self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) def test_blocksize_arg(self): - with unittest.mock.patch('os.sendfile', + with unittest.mock.patch(self.PATCHPOINT, side_effect=ZeroDivisionError) as m: self.assertRaises(ZeroDivisionError, shutil.copyfile, TESTFN, TESTFN2) - blocksize = m.call_args[0][3] + blocksize = m.call_args[0][self.BLOCKSIZE_INDEX] # Make sure file size and the block size arg passed to # sendfile() are the same. self.assertEqual(blocksize, os.path.getsize(TESTFN)) @@ -3333,9 +3329,19 @@ def test_blocksize_arg(self): self.addCleanup(os_helper.unlink, TESTFN2 + '3') self.assertRaises(ZeroDivisionError, shutil.copyfile, TESTFN2, TESTFN2 + '3') - blocksize = m.call_args[0][3] + blocksize = m.call_args[0][self.BLOCKSIZE_INDEX] self.assertEqual(blocksize, 2 ** 23) + +@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported') +@unittest.mock.patch.object(shutil, "_USE_CP_COPY_FILE_RANGE", False) +class TestZeroCopySendfile(_ZeroCopyFileLinuxTest, unittest.TestCase): + PATCHPOINT = "os.sendfile" + BLOCKSIZE_INDEX = 3 + + def zerocopy_fun(self, fsrc, fdst): + return shutil._fastcopy_sendfile(fsrc, fdst) + def test_file2file_not_supported(self): # Emulate a case where sendfile() only support file->socket # fds. In such a case copyfile() is supposed to skip the @@ -3358,6 +3364,29 @@ def test_file2file_not_supported(self): shutil._USE_CP_SENDFILE = True +@unittest.skipUnless(shutil._USE_CP_COPY_FILE_RANGE, "os.copy_file_range() not supported") +class TestZeroCopyCopyFileRange(_ZeroCopyFileLinuxTest, unittest.TestCase): + PATCHPOINT = "os.copy_file_range" + BLOCKSIZE_INDEX = 2 + + def zerocopy_fun(self, fsrc, fdst): + return shutil._fastcopy_copy_file_range(fsrc, fdst) + + def test_empty_file(self): + srcname = f"{TESTFN}src" + dstname = f"{TESTFN}dst" + self.addCleanup(lambda: os_helper.unlink(srcname)) + self.addCleanup(lambda: os_helper.unlink(dstname)) + with open(srcname, "wb"): + pass + + with open(srcname, "rb") as src, open(dstname, "wb") as dst: + # _fastcopy_copy_file_range gives up copying empty files due + # to a bug in older Linux. + with self.assertRaises(shutil._GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + + @unittest.skipIf(not MACOS, 'macOS only') class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase): PATCHPOINT = "posix._fcopyfile" diff --git a/Misc/ACKS b/Misc/ACKS index 4901609a178bc3..d2cd97c4ff10c6 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1971,6 +1971,7 @@ Johannes Vogel Michael Vogt Radu Voicilas Alex Volkov +Illia Volochii Ruben Vorderman Guido Vranken Martijn Vries diff --git a/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst new file mode 100644 index 00000000000000..49e6305bf83138 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-23-21-23-29.gh-issue-81340.D11RkZ.rst @@ -0,0 +1,5 @@ +Use :func:`os.copy_file_range` in :func:`shutil.copy`, :func:`shutil.copy2`, +and :func:`shutil.copyfile` functions by default. An underlying Linux system +call gives filesystems an opportunity to implement the use of copy-on-write +(in case of btrfs and XFS) or server-side copy (in the case of NFS.) +Patch by Illia Volochii.