Spaces:

SyamNaren
/

medicalGpt

Runtime error

App Files Files Community

medicalGpt / myenv /Lib /site-packages /fsspec /implementations /tests /test_archive.py

SyamNaren

upload env

63deadc verified over 1 year ago

raw

history blame contribute delete

13.1 kB

	import bz2
	import gzip
	import lzma
	import os
	import pickle
	import tarfile
	import tempfile
	import zipfile
	from contextlib import contextmanager
	from io import BytesIO

	import pytest

	import fsspec

	# The blueprint to create synthesized archive files from.
	archive_data = {"a": b"", "b": b"hello", "deeply/nested/path": b"stuff"}


	@contextmanager
	def tempzip(data=None):
	"""
	Provide test cases with temporary synthesized Zip archives.
	"""
	data = data or {}
	f = tempfile.mkstemp(suffix=".zip")[1]
	with zipfile.ZipFile(f, mode="w") as z:
	for k, v in data.items():
	z.writestr(k, v)
	try:
	yield f
	finally:
	try:
	os.remove(f)
	except OSError:
	pass


	@contextmanager
	def temparchive(data=None):
	"""
	Provide test cases with temporary synthesized 7-Zip archives.
	"""
	data = data or {}
	libarchive = pytest.importorskip("libarchive")
	f = tempfile.mkstemp(suffix=".7z")[1]
	with libarchive.file_writer(f, "7zip") as archive:
	for k, v in data.items():
	archive.add_file_from_memory(entry_path=k, entry_size=len(v), entry_data=v)
	try:
	yield f
	finally:
	try:
	os.remove(f)
	except OSError:
	pass


	@contextmanager
	def temptar(data=None, mode="w", suffix=".tar"):
	"""
	Provide test cases with temporary synthesized .tar archives.
	"""
	data = data or {}
	fn = tempfile.mkstemp(suffix=suffix)[1]
	with tarfile.TarFile.open(fn, mode=mode) as t:
	touched = {}
	for name, data in data.items():
	# Create directory hierarchy.
	# https://bugs.python.org/issue22208#msg225558
	if "/" in name and name not in touched:
	parts = os.path.dirname(name).split("/")
	for index in range(1, len(parts) + 1):
	info = tarfile.TarInfo("/".join(parts[:index]))
	info.type = tarfile.DIRTYPE
	t.addfile(info)
	touched[name] = True

	# Add file content.
	info = tarfile.TarInfo(name=name)
	info.size = len(data)
	t.addfile(info, BytesIO(data))

	try:
	yield fn
	finally:
	try:
	os.remove(fn)
	except OSError:
	pass


	@contextmanager
	def temptargz(data=None, mode="w", suffix=".tar.gz"):
	"""
	Provide test cases with temporary synthesized .tar.gz archives.
	"""

	with temptar(data=data, mode=mode) as tarname:
	fn = tempfile.mkstemp(suffix=suffix)[1]
	with open(tarname, "rb") as tar:
	cf = gzip.GzipFile(filename=fn, mode=mode)
	cf.write(tar.read())
	cf.close()

	try:
	yield fn
	finally:
	try:
	os.remove(fn)
	except OSError:
	pass


	@contextmanager
	def temptarbz2(data=None, mode="w", suffix=".tar.bz2"):
	"""
	Provide test cases with temporary synthesized .tar.bz2 archives.
	"""

	with temptar(data=data, mode=mode) as tarname:
	fn = tempfile.mkstemp(suffix=suffix)[1]
	with open(tarname, "rb") as tar:
	cf = bz2.BZ2File(filename=fn, mode=mode)
	cf.write(tar.read())
	cf.close()

	try:
	yield fn
	finally:
	try:
	os.remove(fn)
	except OSError:
	pass


	@contextmanager
	def temptarxz(data=None, mode="w", suffix=".tar.xz"):
	"""
	Provide test cases with temporary synthesized .tar.xz archives.
	"""

	with temptar(data=data, mode=mode) as tarname:
	fn = tempfile.mkstemp(suffix=suffix)[1]
	with open(tarname, "rb") as tar:
	cf = lzma.open(filename=fn, mode=mode, format=lzma.FORMAT_XZ)
	cf.write(tar.read())
	cf.close()

	try:
	yield fn
	finally:
	try:
	os.remove(fn)
	except OSError:
	pass


	class ArchiveTestScenario:
	"""
	Describe a test scenario for any type of archive.
	"""

	def __init__(self, protocol=None, provider=None, variant=None):
	# The filesystem protocol identifier. Any of "zip", "tar" or "libarchive".
	self.protocol = protocol
	# A contextmanager function to provide temporary synthesized archives.
	self.provider = provider
	# The filesystem protocol variant identifier. Any of "gz", "bz2" or "xz".
	self.variant = variant


	def pytest_generate_tests(metafunc):
	"""
	Generate test scenario parametrization arguments with appropriate labels (idlist).

	On the one hand, this yields an appropriate output like::

	fsspec/implementations/tests/test_archive.py::TestArchive::test_empty[zip] PASSED # noqa

	On the other hand, it will support perfect test discovery, like::

	pytest fsspec -vvv -k "zip or tar or libarchive"

	https://docs.pytest.org/en/latest/example/parametrize.html#a-quick-port-of-testscenarios
	"""
	idlist = []
	argnames = ["scenario"]
	argvalues = []
	for scenario in metafunc.cls.scenarios:
	scenario: ArchiveTestScenario = scenario
	label = scenario.protocol
	if scenario.variant:
	label += "-" + scenario.variant
	idlist.append(label)
	argvalues.append([scenario])
	metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class")


	# Define test scenarios.
	scenario_zip = ArchiveTestScenario(protocol="zip", provider=tempzip)
	scenario_tar = ArchiveTestScenario(protocol="tar", provider=temptar)
	scenario_targz = ArchiveTestScenario(protocol="tar", provider=temptargz, variant="gz")
	scenario_tarbz2 = ArchiveTestScenario(
	protocol="tar", provider=temptarbz2, variant="bz2"
	)
	scenario_tarxz = ArchiveTestScenario(protocol="tar", provider=temptarxz, variant="xz")
	scenario_libarchive = ArchiveTestScenario(protocol="libarchive", provider=temparchive)


	class TestAnyArchive:
	"""
	Validate that all filesystem adapter implementations for archive files
	will adhere to the same specification.
	"""

	scenarios = [
	scenario_zip,
	scenario_tar,
	scenario_targz,
	scenario_tarbz2,
	scenario_tarxz,
	scenario_libarchive,
	]

	def test_repr(self, scenario: ArchiveTestScenario):
	with scenario.provider() as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)
	assert repr(fs).startswith("<Archive-like object")

	def test_empty(self, scenario: ArchiveTestScenario):
	with scenario.provider() as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)
	assert fs.find("") == []
	assert fs.find("", withdirs=True) == []
	with pytest.raises(FileNotFoundError):
	fs.info("")
	assert fs.ls("") == []

	def test_glob(self, scenario: ArchiveTestScenario):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)
	assert fs.glob("//*th") == ["deeply/nested/path"]

	def test_mapping(self, scenario: ArchiveTestScenario):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)
	m = fs.get_mapper()
	assert list(m) == ["a", "b", "deeply/nested/path"]
	assert m["b"] == archive_data["b"]

	def test_pickle(self, scenario: ArchiveTestScenario):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)
	fs2 = pickle.loads(pickle.dumps(fs))
	assert fs2.cat("b") == b"hello"

	def test_all_dirnames(self, scenario: ArchiveTestScenario):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)

	# fx are files, dx are a directories
	assert fs._all_dirnames([]) == set()
	assert fs._all_dirnames(["f1"]) == set()
	assert fs._all_dirnames(["f1", "f2"]) == set()
	assert fs._all_dirnames(["f1", "f2", "d1/f1"]) == {"d1"}
	assert fs._all_dirnames(["f1", "d1/f1", "d1/f2"]) == {"d1"}
	assert fs._all_dirnames(["f1", "d1/f1", "d2/f1"]) == {"d1", "d2"}
	assert fs._all_dirnames(["d1/d1/d1/f1"]) == {"d1", "d1/d1", "d1/d1/d1"}

	def test_ls(self, scenario: ArchiveTestScenario):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)

	assert fs.ls("", detail=False) == ["a", "b", "deeply"]
	assert fs.ls("/") == fs.ls("")

	assert fs.ls("deeply", detail=False) == ["deeply/nested"]
	assert fs.ls("deeply/") == fs.ls("deeply")

	assert fs.ls("deeply/nested", detail=False) == ["deeply/nested/path"]
	assert fs.ls("deeply/nested/") == fs.ls("deeply/nested")

	def test_find(self, scenario: ArchiveTestScenario):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)

	assert fs.find("") == ["a", "b", "deeply/nested/path"]
	assert fs.find("", withdirs=True) == [
	"a",
	"b",
	"deeply",
	"deeply/nested",
	"deeply/nested/path",
	]

	assert fs.find("deeply") == ["deeply/nested/path"]
	assert fs.find("deeply/") == fs.find("deeply")

	@pytest.mark.parametrize("topdown", [True, False])
	@pytest.mark.parametrize("prune_nested", [True, False])
	def test_walk(self, scenario: ArchiveTestScenario, topdown, prune_nested):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)
	expected = [
	# (dirname, list of subdirs, list of files)
	("", ["deeply"], ["a", "b"]),
	("deeply", ["nested"], []),
	]
	if not topdown or not prune_nested:
	expected.append(("deeply/nested", [], ["path"]))
	if not topdown:
	expected.reverse()

	result = []
	for path, dirs, files in fs.walk("", topdown=topdown):
	result.append((path, dirs.copy(), files))
	# Bypass the "nested" dir
	if prune_nested and "nested" in dirs:
	dirs.remove("nested")

	# prior py3.10 zip() does not support strict=True, we need
	# a manual len check here
	assert len(result) == len(expected)
	for lhs, rhs in zip(result, expected):
	assert lhs[0] == rhs[0]
	assert sorted(lhs[1]) == sorted(rhs[1])
	assert sorted(lhs[2]) == sorted(rhs[2])

	def test_info(self, scenario: ArchiveTestScenario):
	# https://github.com/Suor/funcy/blob/1.15/funcy/colls.py#L243-L245
	def project(mapping, keys):
	"""Leaves only given keys in mapping."""
	return {k: mapping[k] for k in keys if k in mapping}

	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)

	with pytest.raises(FileNotFoundError):
	fs.info("i-do-not-exist")

	# Iterate over all directories.
	for d in fs._all_dirnames(archive_data.keys()):
	lhs = project(fs.info(d), ["name", "size", "type"])
	expected = {"name": f"{d}", "size": 0, "type": "directory"}
	assert lhs == expected

	# Iterate over all files.
	for f, v in archive_data.items():
	lhs = fs.info(f)
	assert lhs["name"] == f
	assert lhs["size"] == len(v)
	assert lhs["type"] == "file"

	@pytest.mark.parametrize("scale", [128, 512, 4096])
	def test_isdir_isfile(self, scenario: ArchiveTestScenario, scale: int):
	def make_nested_dir(i):
	x = f"{i}"
	table = x.maketrans("0123456789", "ABCDEFGHIJ")
	return "/".join(x.translate(table))

	scaled_data = {f"{make_nested_dir(i)}/{i}": b"" for i in range(1, scale + 1)}
	with scenario.provider(scaled_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)

	lhs_dirs, lhs_files = (
	fs._all_dirnames(scaled_data.keys()),
	scaled_data.keys(),
	)

	# Warm-up the Cache, this is done in both cases anyways...
	fs._get_dirs()

	entries = lhs_files \| lhs_dirs

	assert lhs_dirs == {e for e in entries if fs.isdir(e)}
	assert lhs_files == {e for e in entries if fs.isfile(e)}

	def test_read_empty_file(self, scenario: ArchiveTestScenario):
	with scenario.provider(archive_data) as archive:
	fs = fsspec.filesystem(scenario.protocol, fo=archive)
	assert fs.open("a").read() == b""