fsspec · DimitriPapadopoulos · Jun 13, 2025
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -103,7 +103,7 @@ Enhancements
 - "tree" text display of filesystem contents (#1750)
 - async wrapper for sync FSs (#1745)
 - new known implementation: tosfs (#1739)
-- consilidate block fetch requests (#1733)
+- consolidate block fetch requests (#1733)
 
 Fixes
 

diff --git a/fsspec/archive.py b/fsspec/archive.py
@@ -43,8 +43,8 @@ def info(self, path, **kwargs):
             return {"name": "", "type": "directory", "size": 0}
         if path in self.dir_cache:
             return self.dir_cache[path]
-        elif path + "/" in self.dir_cache:
-            return self.dir_cache[path + "/"]
+        elif f"{path}/" in self.dir_cache:
+            return self.dir_cache[f"{path}/"]
         else:
             raise FileNotFoundError(path)
 
@@ -69,7 +69,6 @@ def ls(self, path, detail=True, **kwargs):
                     out = {"name": ppath, "size": 0, "type": "directory"}
                     paths[ppath] = out
         if detail:
-            out = sorted(paths.values(), key=operator.itemgetter("name"))
-            return out
+            return sorted(paths.values(), key=operator.itemgetter("name"))
         else:
             return sorted(paths)
diff --git a/fsspec/asyn.py b/fsspec/asyn.py
@@ -85,10 +85,8 @@ def sync(loop, func, *args, timeout=None, **kwargs):
     result = [None]
     event = threading.Event()
     asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
-    while True:
-        # this loops allows thread to get interrupted
-        if event.wait(1):
-            break
+    while not event.wait(1):
+        # this loop allows thread to get interrupted
         if timeout is not None:
             timeout -= 1
             if timeout < 0:
@@ -356,10 +354,11 @@ async def _copy(
         batch_size=None,
         **kwargs,
     ):
-        if on_error is None and recursive:
-            on_error = "ignore"
-        elif on_error is None:
-            on_error = "raise"
+        if on_error is None:
+            if recursive:
+                on_error = "ignore"
+            else:
+                on_error = "raise"
 
         if isinstance(path1, list) and isinstance(path2, list):
             # No need to expand paths when both source and destination
@@ -714,7 +713,7 @@ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
         detail = kwargs.pop("detail", False)
         try:
             listing = await self._ls(path, detail=True, **kwargs)
-        except (FileNotFoundError, OSError) as e:
+        except OSError as e:
             if on_error == "raise":
                 raise
             elif callable(on_error):
@@ -766,7 +765,7 @@ async def _glob(self, path, maxdepth=None, **kwargs):
         ends_with_sep = path.endswith(seps)  # _strip_protocol strips trailing slash
         path = self._strip_protocol(path)
         append_slash_to_dirname = ends_with_sep or path.endswith(
-            tuple(sep + "**" for sep in seps)
+            tuple(f"{sep}**" for sep in seps)
         )
         idx_star = path.find("*") if path.find("*") >= 0 else len(path)
         idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
@@ -814,7 +813,7 @@ async def _glob(self, path, maxdepth=None, **kwargs):
             p: info
             for p, info in sorted(allpaths.items())
             if pattern.match(
-                p + "/"
+                f"{p}/"
                 if append_slash_to_dirname and info["type"] == "directory"
                 else p
             )

diff --git a/fsspec/caching.py b/fsspec/caching.py
@@ -430,8 +430,7 @@ def _fetch_block(self, block_number: int) -> bytes:
         self.total_requested_bytes += end - start
         self.miss_count += 1
         logger.info("BlockCache fetching block %d", block_number)
-        block_contents = super()._fetch(start, end)
-        return block_contents
+        return super()._fetch(start, end)
 
     def _read_cache(
         self, start: int, end: int, start_block_number: int, end_block_number: int
@@ -704,7 +703,7 @@ class UpdatableLRU(Generic[P, T]):
     """
     Custom implementation of LRU cache that allows updating keys
 
-    Used by BackgroudBlockCache
+    Used by BackgroundBlockCache
     """
 
     class CacheInfo(NamedTuple):
@@ -857,7 +856,7 @@ def _fetch(self, start: int | None, end: int | None) -> bytes:
                     self._fetch_future = None
                 else:
                     # Must join if we need the block for the current fetch
-                    must_join = bool(
+                    must_join = (
                         start_block_number
                         <= self._fetch_future_block_number
                         <= end_block_number
@@ -920,8 +919,7 @@ def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
         logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
         self.total_requested_bytes += end - start
         self.miss_count += 1
-        block_contents = super()._fetch(start, end)
-        return block_contents
+        return super()._fetch(start, end)
 
     def _read_cache(
         self, start: int, end: int, start_block_number: int, end_block_number: int

diff --git a/fsspec/core.py b/fsspec/core.py
@@ -339,7 +339,7 @@ def _un_chain(path, kwargs):
             if "://" in p or x.match(p):
                 bits.append(p)
             else:
-                bits.append(p + "://")
+                bits.append(f"{p}://")
     else:
         bits = [path]
     # [[url, protocol, kwargs], ...]

diff --git a/fsspec/fuse.py b/fsspec/fuse.py
@@ -76,8 +76,7 @@ def read(self, path, size, offset, fh):
 
         f = self.cache[fh]
         f.seek(offset)
-        out = f.read(size)
-        return out
+        return f.read(size)
 
     def write(self, path, data, offset, fh):
         logger.debug("write %s", (path, offset))
@@ -119,7 +118,7 @@ def unlink(self, path):
         fn = "".join([self.root, path.lstrip("/")])
         try:
             self.fs.rm(fn, False)
-        except (OSError, FileNotFoundError) as exc:
+        except OSError as exc:
             raise FuseOSError(EIO) from exc
 
     def release(self, path, fh):

diff --git a/fsspec/implementations/cache_metadata.py b/fsspec/implementations/cache_metadata.py
@@ -166,7 +166,7 @@ def on_close_cached_file(self, f: Any, path: str) -> None:
 
         The actual closing of the file is the responsibility of the caller.
         """
-        # File must be writeble, so in self.cached_files[-1]
+        # File must be writeable, so in self.cached_files[-1]
         c = self.cached_files[-1][path]
         if c["blocks"] is not True and len(c["blocks"]) * f.blocksize >= f.size:
             c["blocks"] = True

diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py
@@ -338,7 +338,7 @@ def _open(
         # explicitly submitting the size to the open call will avoid extra
         # operations when opening. This is particularly relevant
         # for any file that is read over a network, e.g. S3.
-        size = detail.get("size", None)
+        size = detail.get("size")
 
         # call target filesystems open
         self._mkcache()
@@ -821,7 +821,7 @@ def info(self, path, **kwargs):
             if f:
                 size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
                 return {"name": path, "size": size, "type": "file"}
-            f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
+            f = any(_.path.startswith(f"{path}/") for _ in self.transaction.files)
             if f:
                 return {"name": path, "size": 0, "type": "directory"}
         return self.fs.info(path, **kwargs)

diff --git a/fsspec/implementations/github.py b/fsspec/implementations/github.py
@@ -153,7 +153,7 @@ def ls(self, path, detail=False, sha=None, _sha=None, **kwargs):
             _sha = sha or self.root
             for part in parts:
                 out = self.ls(so_far, True, sha=sha, _sha=_sha)
-                so_far += "/" + part if so_far else part
+                so_far += f"/{part}" if so_far else part
                 out = [o for o in out if o["name"] == so_far]
                 if not out:
                     raise FileNotFoundError(path)

diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py
@@ -446,7 +446,7 @@ async def _glob(self, path, maxdepth=None, **kwargs):
         """
         Find files by glob-matching.
 
-        This implementation is idntical to the one in AbstractFileSystem,
+        This implementation is identical to the one in AbstractFileSystem,
         but "?" is not considered as a character for globbing, because it is
         so common in URLs, often identifying the "query" part.
         """

diff --git a/fsspec/implementations/http_sync.py b/fsspec/implementations/http_sync.py
@@ -11,7 +11,7 @@
 
 try:
     import yarl
-except (ImportError, ModuleNotFoundError, OSError):
+except (ImportError, OSError):
     yarl = False
 
 from fsspec.callbacks import _DEFAULT_CALLBACK
@@ -278,10 +278,9 @@ def encode_url(self, url):
     @classmethod
     def _strip_protocol(cls, path: str) -> str:
         """For HTTP, we always want to keep the full URL"""
-        path = path.replace("sync-http://", "http://").replace(
+        return path.replace("sync-http://", "http://").replace(
             "sync-https://", "https://"
         )
-        return path
 
     @classmethod
     def _parent(cls, path):
@@ -310,7 +309,7 @@ def _ls_real(self, url, detail=True, **kwargs):
                 l = l[1]
             if l.startswith("/") and len(l) > 1:
                 # absolute URL on this server
-                l = parts.scheme + "://" + parts.netloc + l
+                l = f"{parts.scheme}://{parts.netloc}{l}"
             if l.startswith("http"):
                 if self.same_schema and l.startswith(url.rstrip("/") + "/"):
                     out.add(l)

diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py
@@ -235,10 +235,9 @@ def _parent(cls, path):
         else:
             # NT
             path_ = path.rsplit("/", 1)[0]
-            if len(path_) <= 3:
-                if path_[1:2] == ":":
-                    # nt root (something like c:/)
-                    return path_[0] + ":/"
+            if len(path_) <= 3 and path_[1:2] == ":":
+                # nt root (something like c:/)
+                return path_[0] + ":/"
             # More cases may be required here
             return path_
 
@@ -322,7 +321,7 @@ def make_path_posix(path):
             # windows full path like "C:\\local\\path"
             if len(path) <= 3:
                 # nt root (something like c:/)
-                return path[0] + ":/"
+                return f"{path[0]}:/"
             path = path.replace("\\", "/")
             return path
         elif path[0:1] == "~":

diff --git a/fsspec/implementations/memory.py b/fsspec/implementations/memory.py
@@ -39,7 +39,7 @@ def _strip_protocol(cls, path):
         if "::" in path or "://" in path:
             return path.rstrip("/")
         path = path.lstrip("/").rstrip("/")
-        return "/" + path if path else ""
+        return f"/{path}" if path else ""
 
     def ls(self, path, detail=True, **kwargs):
         path = self._strip_protocol(path)
@@ -56,7 +56,7 @@ def ls(self, path, detail=True, **kwargs):
                 }
             ]
         paths = set()
-        starter = path + "/"
+        starter = f"{path}/"
         out = []
         for p2 in tuple(self.store):
             if p2.startswith(starter):
@@ -151,7 +151,7 @@ def info(self, path, **kwargs):
         logger.debug("info: %s", path)
         path = self._strip_protocol(path)
         if path in self.pseudo_dirs or any(
-            p.startswith(path + "/") for p in list(self.store) + self.pseudo_dirs
+            p.startswith(f"{path}/") for p in list(self.store) + self.pseudo_dirs
         ):
             return {
                 "name": path,

diff --git a/fsspec/implementations/smb.py b/fsspec/implementations/smb.py
@@ -172,7 +172,7 @@ def _connect(self):
         # will be equal to `wait`. For any number of retries the last wait time will be
         # equal to `wait` and for retries>2 the first wait time will be equal to `wait / factor`.
         wait_times = iter(
-            factor ** (n / n_waits - 1) * wait_time for n in range(0, n_waits + 1)
+            factor ** (n / n_waits - 1) * wait_time for n in range(n_waits + 1)
         )
 
         for attempt in range(self.register_session_retries + 1):

diff --git a/fsspec/implementations/tests/conftest.py b/fsspec/implementations/tests/conftest.py
@@ -27,13 +27,12 @@ def fs(request):
     pyarrow_fs = pytest.importorskip("pyarrow.fs")
     FileSystem = pyarrow_fs.FileSystem
     if request.param == "arrow":
-        fs = ArrowFSWrapper(FileSystem.from_uri("file:///")[0])
-        return fs
+        return ArrowFSWrapper(FileSystem.from_uri("file:///")[0])
     cls = FILESYSTEMS[request.param]
     return cls()
 
 
 @pytest.fixture(scope="function")
 def temp_file():
     with tempfile.TemporaryDirectory() as temp_dir:
-        return temp_dir + "test-file"
+        return f"{temp_dir}test-file"
diff --git a/fsspec/implementations/tests/test_archive.py b/fsspec/implementations/tests/test_archive.py
@@ -191,7 +191,7 @@ def pytest_generate_tests(metafunc):
         scenario: ArchiveTestScenario = scenario
         label = scenario.protocol
         if scenario.variant:
-            label += "-" + scenario.variant
+            label = f"{label}-{scenario.variant}"
         idlist.append(label)
         argvalues.append([scenario])
     metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class")

diff --git a/fsspec/implementations/tests/test_dbfs.py b/fsspec/implementations/tests/test_dbfs.py
@@ -9,7 +9,7 @@
 itself changes, which is very unlikely to occur as it is versioned),
 you need to re-record the answers. This can be done as follows:
 
-1. Delete all casettes files in the "./cassettes/test_dbfs" folder
+1. Delete all cassettes files in the "./cassettes/test_dbfs" folder
 2. Spin up a databricks cluster. For example,
    you can use an Azure Databricks instance for this.
 3. Take note of the instance details (the instance URL. For example for an Azure

diff --git a/fsspec/implementations/tests/test_github.py b/fsspec/implementations/tests/test_github.py
@@ -42,7 +42,7 @@ def test_github_cat():
 
 
 def test_github_ls():
-    # test using ls to list the files in a resository
+    # test using ls to list the files in a repository
     fs = fsspec.filesystem("github", org="mwaskom", repo="seaborn-data")
     ls_result = set(fs.ls(""))
     expected = {"brain_networks.csv", "mpg.csv", "penguins.csv", "README.md", "raw"}

diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py
@@ -172,7 +172,7 @@ def test_moves(m):
     assert m.find("") == ["/target.txt", "/target2.txt"]
 
 
-def test_rm_reursive_empty_subdir(m):
+def test_rm_recursive_empty_subdir(m):
     # https://github.com/fsspec/filesystem_spec/issues/500
     m.mkdir("recdir")
     m.mkdir("recdir/subdir2")
@@ -211,7 +211,7 @@ def test_cp_directory_recursive(m):
     # https://github.com/fsspec/filesystem_spec/issues/1062
     # Recursive cp/get/put of source directory into non-existent target directory.
     src = "/src"
-    src_file = src + "/file"
+    src_file = f"{src}/file"
     m.mkdir(src)
     m.touch(src_file)
 

diff --git a/fsspec/implementations/zip.py b/fsspec/implementations/zip.py
@@ -161,9 +161,8 @@ def _matching_starts(file_path):
                 continue
 
             if file_info["type"] == "directory":
-                if withdirs:
-                    if file_path not in result:
-                        result[file_path.strip("/")] = file_info
+                if withdirs and file_path not in result:
+                    result[file_path.strip("/")] = file_info
                 continue
 
             if file_path not in result:

diff --git a/fsspec/mapping.py b/fsspec/mapping.py
@@ -60,8 +60,8 @@ def __init__(self, root, fs, check=False, create=False, missing_exceptions=None)
                     f"Path {root} does not exist. Create "
                     f" with the ``create=True`` keyword"
                 )
-            self.fs.touch(root + "/a")
-            self.fs.rm(root + "/a")
+            self.fs.touch(f"{root}/a")
+            self.fs.rm(f"{root}/a")
 
     @cached_property
     def dirfs(self):

diff --git a/fsspec/parquet.py b/fsspec/parquet.py
@@ -335,11 +335,7 @@ def _transfer_ranges(fs, blocks, paths, starts, ends):
 def _add_header_magic(data):
     # Add b"PAR1" to file headers
     for path in list(data.keys()):
-        add_magic = True
-        for k in data[path]:
-            if k[0] == 0 and k[1] >= 4:
-                add_magic = False
-                break
+        add_magic = not any(k[0] == 0 and k[1] >= 4 for k in data[path])
         if add_magic:
             data[path][(0, 4)] = b"PAR1"
 
@@ -446,8 +442,8 @@ def _parquet_byte_ranges(
                         file_offset0 = column.meta_data.dictionary_page_offset
                         if file_offset0 is None:
                             file_offset0 = column.meta_data.data_page_offset
-                        num_bytes = column.meta_data.total_compressed_size
                         if footer_start is None or file_offset0 < footer_start:
+                            num_bytes = column.meta_data.total_compressed_size
                             data_paths.append(fn)
                             data_starts.append(file_offset0)
                             data_ends.append(