This commit is contained in:
2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions

View File

@@ -0,0 +1,6 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,10 @@
subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
tiny-data.zip 0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb
large-data.txt 98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a
store.zip 0498d2a001e71051bbd2acd2346f38da7cbd345a633cb7bf0f8a20938714b51a
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d https://some-site/tiny-data.txt
tiny-data.tar.gz 41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b
store.tar.gz 088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511
tiny-data.txt.bz2 753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306
tiny-data.txt.gz 2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52
tiny-data.txt.xz 99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765

View File

@@ -0,0 +1,2 @@
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
some-file.txt second_element third_element forth_element

View File

@@ -0,0 +1,2 @@
"file with spaces.txt" baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
other\ with\ spaces.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d

View File

@@ -0,0 +1,12 @@
subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
large-data.txt 98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a
tiny-data.zip 0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb
store.zip 0498D2A001E71051BBD2ACD2346F38DA7CBD345A633CB7BF0F8A20938714B51A
tiny-data.tar.gz 41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b
store.tar.gz 088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511
tiny-data.txt.bz2 753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306
tiny-data.txt.gz 2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52
tiny-data.txt.xz 99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765

View File

@@ -0,0 +1,14 @@
# a comment
subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
large-data.txt 98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a
tiny-data.zip 0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb
# a comment with a starting space
store.zip 0498d2a001e71051bbd2acd2346f38da7cbd345a633cb7bf0f8a20938714b51a
tiny-data.tar.gz 41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b
store.tar.gz 088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511
tiny-data.txt.bz2 753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306
tiny-data.txt.gz 2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52
tiny-data.txt.xz 99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765

View File

@@ -0,0 +1,2 @@
# A tiny data file for test purposes only
1 2 3 4 5 6

View File

@@ -0,0 +1,2 @@
# A tiny data file for test purposes only
1 2 3 4 5 6

View File

@@ -0,0 +1,2 @@
# A tiny data file for test purposes only
1 2 3 4 5 6

View File

@@ -0,0 +1,689 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
# pylint: disable=redefined-outer-name
"""
Test the core class and factory function.
"""
import hashlib
import os
from pathlib import Path
from tempfile import TemporaryDirectory
import pytest
from ..core import create, Pooch, retrieve, download_action, stream_download
from ..utils import get_logger, temporary_file, os_cache
from ..hashes import file_hash, hash_matches
# Import the core module so that we can monkeypatch some functions
from .. import core
from ..downloaders import HTTPDownloader, FTPDownloader
from .utils import (
pooch_test_url,
data_over_ftp,
pooch_test_figshare_url,
pooch_test_zenodo_url,
pooch_test_zenodo_with_slash_url,
pooch_test_dataverse_url,
pooch_test_registry,
check_tiny_data,
check_large_data,
capture_log,
mirror_directory,
)
DATA_DIR = str(Path(__file__).parent / "data")
REGISTRY = pooch_test_registry()
BASEURL = pooch_test_url()
FIGSHAREURL = pooch_test_figshare_url()
ZENODOURL = pooch_test_zenodo_url()
ZENODOURL_W_SLASH = pooch_test_zenodo_with_slash_url()
DATAVERSEURL = pooch_test_dataverse_url()
REGISTRY_CORRUPTED = {
# The same data file but I changed the hash manually to a wrong one
"tiny-data.txt": "098h0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d"
}
@pytest.fixture
def data_dir_mirror(tmp_path):
"""
Mirror the test data folder on a temporary directory. Needed to avoid
permission errors when pooch is installed on a non-writable path.
"""
return mirror_directory(DATA_DIR, tmp_path)
@pytest.mark.network
def test_retrieve():
"Try downloading some data with retrieve"
with TemporaryDirectory() as local_store:
data_file = "tiny-data.txt"
url = BASEURL + data_file
# Check that the logs say that the file is being downloaded
with capture_log() as log_file:
fname = retrieve(url, known_hash=None, path=local_store)
logs = log_file.getvalue()
assert logs.split()[0] == "Downloading"
assert "SHA256 hash of downloaded file:" in logs
assert REGISTRY[data_file] in logs
# Check that the downloaded file has the right content
assert data_file == fname[-len(data_file) :]
check_tiny_data(fname)
assert file_hash(fname) == REGISTRY[data_file]
# Check that no logging happens when not downloading
with capture_log() as log_file:
fname = retrieve(url, known_hash=None, path=local_store)
assert log_file.getvalue() == ""
with capture_log() as log_file:
fname = retrieve(url, known_hash=REGISTRY[data_file], path=local_store)
assert log_file.getvalue() == ""
@pytest.mark.network
def test_retrieve_fname():
"Try downloading some data with retrieve and setting the file name"
with TemporaryDirectory() as local_store:
data_file = "tiny-data.txt"
url = BASEURL + data_file
# Check that the logs say that the file is being downloaded
with capture_log() as log_file:
fname = retrieve(url, known_hash=None, path=local_store, fname=data_file)
logs = log_file.getvalue()
assert logs.split()[0] == "Downloading"
assert "SHA256 hash of downloaded file:" in logs
assert REGISTRY[data_file] in logs
# Check that the downloaded file has the right name and content
assert data_file == os.path.split(fname)[1]
check_tiny_data(fname)
assert file_hash(fname) == REGISTRY[data_file]
@pytest.mark.network
def test_retrieve_default_path():
"Try downloading some data with retrieve to the default cache location"
data_file = "tiny-data.txt"
url = BASEURL + data_file
expected_location = os_cache("pooch") / data_file
try:
# Check that the logs say that the file is being downloaded
with capture_log() as log_file:
fname = retrieve(url, known_hash=None, fname=data_file)
logs = log_file.getvalue()
assert logs.split()[0] == "Downloading"
assert str(os_cache("pooch").resolve()) in logs
assert "SHA256 hash of downloaded file" in logs
assert REGISTRY[data_file] in logs
# Check that the downloaded file has the right content
assert fname == str(expected_location.resolve())
check_tiny_data(fname)
assert file_hash(fname) == REGISTRY[data_file]
finally:
if os.path.exists(str(expected_location)):
os.remove(str(expected_location))
def test_pooch_local(data_dir_mirror):
"Setup a pooch that already has the local data and test the fetch."
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry=REGISTRY)
true = str(data_dir_mirror / "tiny-data.txt")
fname = pup.fetch("tiny-data.txt")
assert true == fname
check_tiny_data(fname)
@pytest.mark.network
@pytest.mark.parametrize(
"url",
[
BASEURL,
pytest.param(FIGSHAREURL, marks=pytest.mark.figshare),
ZENODOURL,
DATAVERSEURL,
],
ids=["https", "figshare", "zenodo", "dataverse"],
)
def test_pooch_custom_url(url):
"Have pooch download the file from URL that is not base_url"
with TemporaryDirectory() as local_store:
path = Path(local_store)
urls = {"tiny-data.txt": url + "tiny-data.txt"}
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url="", registry=REGISTRY, urls=urls)
# Check that the logs say that the file is being downloaded
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt")
logs = log_file.getvalue()
assert logs.split()[0] == "Downloading"
assert logs.split()[-1] == f"'{path}'."
check_tiny_data(fname)
# Check that no logging happens when there are no events
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt")
assert log_file.getvalue() == ""
@pytest.mark.network
@pytest.mark.parametrize(
"url",
[
BASEURL,
pytest.param(FIGSHAREURL, marks=pytest.mark.figshare),
ZENODOURL,
DATAVERSEURL,
],
ids=["https", "figshare", "zenodo", "dataverse"],
)
def test_pooch_download(url):
"Setup a pooch that has no local data and needs to download"
with TemporaryDirectory() as local_store:
path = Path(local_store)
true_path = str(path / "tiny-data.txt")
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=url, registry=REGISTRY)
# Check that the logs say that the file is being downloaded
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt")
logs = log_file.getvalue()
assert logs.split()[0] == "Downloading"
assert logs.split()[-1] == f"'{path}'."
# Check that the downloaded file has the right content
assert true_path == fname
check_tiny_data(fname)
assert file_hash(fname) == REGISTRY["tiny-data.txt"]
# Check that no logging happens when not downloading
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt")
assert log_file.getvalue() == ""
class FakeHashMatches: # pylint: disable=too-few-public-methods
"Create a fake version of hash_matches that fails n times"
def __init__(self, nfailures):
self.nfailures = nfailures
self.failed = 0
def hash_matches(self, *args, **kwargs):
"Fail n times before finally passing"
if self.failed < self.nfailures:
self.failed += 1
# Give it an invalid hash to force a failure
return hash_matches(args[0], "bla", **kwargs)
return hash_matches(*args, **kwargs)
@pytest.mark.network
def test_pooch_download_retry_off_by_default(monkeypatch):
"Check that retrying the download is off by default"
with TemporaryDirectory() as local_store:
monkeypatch.setattr(core, "hash_matches", FakeHashMatches(3).hash_matches)
# Setup a pooch without download retrying
path = Path(local_store)
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Make sure it fails with no retries
with pytest.raises(ValueError) as error:
with capture_log() as log_file:
pup.fetch("tiny-data.txt")
assert "does not match the known hash" in str(error)
# Check that the log doesn't have the download retry message
logs = log_file.getvalue().strip().split("\n")
assert len(logs) == 1
assert logs[0].startswith("Downloading")
assert logs[0].endswith(f"'{path}'.")
class FakeSleep: # pylint: disable=too-few-public-methods
"Create a fake version of sleep that logs the specified times"
def __init__(self):
self.times = []
def sleep(self, secs):
"Store the time and doesn't sleep"
self.times.append(secs)
@pytest.mark.network
def test_pooch_download_retry(monkeypatch):
"Check that retrying the download works if the hash is different"
with TemporaryDirectory() as local_store:
monkeypatch.setattr(core, "hash_matches", FakeHashMatches(11).hash_matches)
fakesleep = FakeSleep()
monkeypatch.setattr(core.time, "sleep", fakesleep.sleep)
# Setup a pooch with download retrying
path = Path(local_store)
true_path = str(path / "tiny-data.txt")
retries = 11
pup = Pooch(
path=path, base_url=BASEURL, registry=REGISTRY, retry_if_failed=retries
)
# Check that the logs say that the download failed n times
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt")
logs = log_file.getvalue().strip().split("\n")
assert len(logs) == 1 + retries
assert logs[0].startswith("Downloading")
assert logs[0].endswith(f"'{path}'.")
for i, line in zip(range(retries, 0, -1), logs[1:]):
assert "Failed to download" in line
plural = "s" if i > 1 else ""
assert f"download again {i} more time{plural}." in line
# Check that the sleep time increases but stops at 10s
assert fakesleep.times == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10]
# Check that the downloaded file has the right content
assert true_path == fname
check_tiny_data(fname)
assert file_hash(fname) == REGISTRY["tiny-data.txt"]
@pytest.mark.network
def test_pooch_download_retry_fails_eventually(monkeypatch):
"Check that retrying the download fails after the set amount of retries"
with TemporaryDirectory() as local_store:
monkeypatch.setattr(core, "hash_matches", FakeHashMatches(3).hash_matches)
# Setup a pooch with insufficient retry attempts
path = Path(local_store)
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY, retry_if_failed=1)
# Make sure it fails with no retries
with pytest.raises(ValueError) as error:
# Check that the logs say that the download failed n times
with capture_log() as log_file:
pup.fetch("tiny-data.txt")
logs = log_file.getvalue().strip().split("\n")
assert len(logs) == 2
assert logs[0].startswith("Downloading")
assert logs[0].endswith(f"'{path}'.")
assert "Failed to download" in logs[1]
assert "download again 1 more time." in logs[1]
assert "does not match the known hash" in str(error)
@pytest.mark.network
def test_pooch_logging_level():
"Setup a pooch and check that no logging happens when the level is raised"
with TemporaryDirectory() as local_store:
path = Path(local_store)
urls = {"tiny-data.txt": BASEURL + "tiny-data.txt"}
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url="", registry=REGISTRY, urls=urls)
# Capture only critical logging events
with capture_log("CRITICAL") as log_file:
fname = pup.fetch("tiny-data.txt")
assert log_file.getvalue() == ""
check_tiny_data(fname)
@pytest.mark.network
def test_pooch_update():
"Setup a pooch that already has the local data but the file is outdated"
with TemporaryDirectory() as local_store:
path = Path(local_store)
# Create a dummy version of tiny-data.txt that is different from the
# one in the remote storage
true_path = str(path / "tiny-data.txt")
with open(true_path, "w", encoding="utf-8") as fin:
fin.write("different data")
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Check that the logs say that the file is being updated
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt")
logs = log_file.getvalue()
assert logs.split()[0] == "Updating"
assert logs.split()[-1] == f"'{path}'."
# Check that the updated file has the right content
assert true_path == fname
check_tiny_data(fname)
assert file_hash(fname) == REGISTRY["tiny-data.txt"]
# Check that no logging happens when not downloading
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt")
assert log_file.getvalue() == ""
def test_pooch_update_disallowed():
"Test that disallowing updates works."
with TemporaryDirectory() as local_store:
path = Path(local_store)
# Create a dummy version of tiny-data.txt that is different from the
# one in the remote storage
true_path = str(path / "tiny-data.txt")
with open(true_path, "w", encoding="utf-8") as fin:
fin.write("different data")
# Setup a pooch in a temp dir
pup = Pooch(
path=path,
base_url=BASEURL,
registry=REGISTRY,
allow_updates=False,
)
with pytest.raises(ValueError):
pup.fetch("tiny-data.txt")
def test_pooch_update_disallowed_environment():
"Test that disallowing updates works through an environment variable."
variable_name = "MYPROJECT_DISALLOW_UPDATES"
try:
os.environ[variable_name] = "False"
with TemporaryDirectory() as local_store:
path = Path(local_store)
# Create a dummy version of tiny-data.txt that is different from
# the one in the remote storage
true_path = str(path / "tiny-data.txt")
with open(true_path, "w", encoding="utf-8") as fin:
fin.write("different data")
# Setup a pooch in a temp dir
pup = create(
path=path,
base_url=BASEURL,
registry=REGISTRY,
allow_updates=variable_name,
)
with pytest.raises(ValueError):
pup.fetch("tiny-data.txt")
finally:
os.environ.pop(variable_name)
def test_pooch_create_base_url_no_trailing_slash():
"""
Test if pooch.create appends a trailing slash to the base url if missing
"""
base_url = "https://mybase.url"
pup = create(base_url=base_url, registry=None, path=DATA_DIR)
assert pup.base_url == base_url + "/"
@pytest.mark.network
def test_pooch_corrupted(data_dir_mirror):
"Raise an exception if the file hash doesn't match the registry"
# Test the case where the file wasn't in the directory
with TemporaryDirectory() as local_store:
path = os.path.abspath(local_store)
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY_CORRUPTED)
with capture_log() as log_file:
with pytest.raises(ValueError) as error:
pup.fetch("tiny-data.txt")
assert "(tiny-data.txt)" in str(error.value)
logs = log_file.getvalue()
assert logs.split()[0] == "Downloading"
assert logs.split()[-1] == f"'{path}'."
# and the case where the file exists but hash doesn't match
pup = Pooch(path=data_dir_mirror, base_url=BASEURL, registry=REGISTRY_CORRUPTED)
with capture_log() as log_file:
with pytest.raises(ValueError) as error:
pup.fetch("tiny-data.txt")
assert "(tiny-data.txt)" in str(error.value)
logs = log_file.getvalue()
assert logs.split()[0] == "Updating"
assert logs.split()[-1] == f"'{data_dir_mirror}'."
def test_pooch_file_not_in_registry():
"Should raise an exception if the file is not in the registry."
pup = Pooch(
path="it shouldn't matter", base_url="this shouldn't either", registry=REGISTRY
)
with pytest.raises(ValueError):
pup.fetch("this-file-does-not-exit.csv")
def test_pooch_load_registry():
"Loading the registry from a file should work"
pup = Pooch(path="", base_url="")
pup.load_registry(os.path.join(DATA_DIR, "registry.txt"))
assert pup.registry == REGISTRY
assert pup.registry_files.sort() == list(REGISTRY).sort()
def test_pooch_load_registry_comments():
"Loading the registry from a file and strip line comments"
pup = Pooch(path="", base_url="")
pup.load_registry(os.path.join(DATA_DIR, "registry_comments.txt"))
assert pup.registry == REGISTRY
assert pup.registry_files.sort() == list(REGISTRY).sort()
def test_pooch_load_registry_fileobj():
"Loading the registry from a file object"
path = os.path.join(DATA_DIR, "registry.txt")
# Binary mode
pup = Pooch(path="", base_url="")
with open(path, "rb") as fin:
pup.load_registry(fin)
assert pup.registry == REGISTRY
assert pup.registry_files.sort() == list(REGISTRY).sort()
# Text mode
pup = Pooch(path="", base_url="")
with open(path, "r", encoding="utf-8") as fin:
pup.load_registry(fin)
assert pup.registry == REGISTRY
assert pup.registry_files.sort() == list(REGISTRY).sort()
def test_pooch_load_registry_custom_url():
"Load the registry from a file with a custom URL inserted"
pup = Pooch(path="", base_url="")
pup.load_registry(os.path.join(DATA_DIR, "registry-custom-url.txt"))
assert pup.registry == REGISTRY
assert pup.urls == {"tiny-data.txt": "https://some-site/tiny-data.txt"}
def test_pooch_load_registry_invalid_line():
"Should raise an exception when a line doesn't have two elements"
pup = Pooch(path="", base_url="", registry={})
with pytest.raises(IOError):
pup.load_registry(os.path.join(DATA_DIR, "registry-invalid.txt"))
def test_pooch_load_registry_with_spaces():
"Should check that spaces in filenames are allowed in registry files"
pup = Pooch(path="", base_url="")
pup.load_registry(os.path.join(DATA_DIR, "registry-spaces.txt"))
assert "file with spaces.txt" in pup.registry
assert "other with spaces.txt" in pup.registry
@pytest.mark.network
def test_check_availability():
"Should correctly check availability of existing and non existing files"
# Check available remote file
pup = Pooch(path=DATA_DIR, base_url=BASEURL, registry=REGISTRY)
assert pup.is_available("tiny-data.txt")
# Check non available remote file
pup = Pooch(path=DATA_DIR, base_url=BASEURL + "wrong-url/", registry=REGISTRY)
assert not pup.is_available("tiny-data.txt")
# Wrong file name
registry = {"not-a-real-data-file.txt": "notarealhash"}
registry.update(REGISTRY)
pup = Pooch(path=DATA_DIR, base_url=BASEURL, registry=registry)
assert not pup.is_available("not-a-real-data-file.txt")
def test_check_availability_on_ftp(ftpserver):
"Should correctly check availability of existing and non existing files"
with data_over_ftp(ftpserver, "tiny-data.txt") as url:
# Check available remote file on FTP server
pup = Pooch(
path=DATA_DIR,
base_url=url.replace("tiny-data.txt", ""),
registry={
"tiny-data.txt": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
"doesnot_exist.zip": "jdjdjdjdflld",
},
)
downloader = FTPDownloader(port=ftpserver.server_port)
assert pup.is_available("tiny-data.txt", downloader=downloader)
# Check non available remote file
assert not pup.is_available("doesnot_exist.zip", downloader=downloader)
def test_check_availability_invalid_downloader():
"Should raise an exception if the downloader doesn't support this"
def downloader(url, output, pooch): # pylint: disable=unused-argument
"A downloader that doesn't support check_only"
return None
pup = Pooch(path=DATA_DIR, base_url=BASEURL, registry=REGISTRY)
msg = "does not support availability checks."
with pytest.raises(NotImplementedError, match=msg):
pup.is_available("tiny-data.txt", downloader=downloader)
@pytest.mark.network
def test_fetch_with_downloader(capsys):
"Setup a downloader function for fetch"
def download(url, output_file, pup): # pylint: disable=unused-argument
"Download through HTTP and warn that we're doing it"
get_logger().info("downloader executed")
HTTPDownloader()(url, output_file, pup)
with TemporaryDirectory() as local_store:
path = Path(local_store)
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Check that the logs say that the file is being downloaded
with capture_log() as log_file:
fname = pup.fetch("large-data.txt", downloader=download)
logs = log_file.getvalue()
lines = logs.splitlines()
assert len(lines) == 2
assert lines[0].split()[0] == "Downloading"
assert lines[1] == "downloader executed"
# Read stderr and make sure no progress bar was printed by default
assert not capsys.readouterr().err
# Check that the downloaded file has the right content
check_large_data(fname)
# Check that no logging happens when not downloading
with capture_log() as log_file:
fname = pup.fetch("large-data.txt")
assert log_file.getvalue() == ""
def test_invalid_hash_alg(data_dir_mirror):
"Test an invalid hashing algorithm"
pup = Pooch(
path=data_dir_mirror, base_url=BASEURL, registry={"tiny-data.txt": "blah:1234"}
)
with pytest.raises(ValueError) as exc:
pup.fetch("tiny-data.txt")
assert "'blah'" in str(exc.value)
def test_alternative_hashing_algorithms(data_dir_mirror):
"Test different hashing algorithms using local data"
fname = str(data_dir_mirror / "tiny-data.txt")
check_tiny_data(fname)
with open(fname, "rb") as fin:
data = fin.read()
for alg in ("sha512", "md5"):
hasher = hashlib.new(alg)
hasher.update(data)
registry = {"tiny-data.txt": f"{alg}:{hasher.hexdigest()}"}
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry=registry)
assert fname == pup.fetch("tiny-data.txt")
check_tiny_data(fname)
def test_download_action():
"Test that the right action is performed based on file existing"
action, verb = download_action(
Path("this_file_does_not_exist.txt"), known_hash=None
)
assert action == "download"
assert verb == "Downloading"
with temporary_file() as tmp:
action, verb = download_action(Path(tmp), known_hash="not the correct hash")
assert action == "update"
assert verb == "Updating"
with temporary_file() as tmp:
with open(tmp, "w", encoding="utf-8") as output:
output.write("some data")
action, verb = download_action(Path(tmp), known_hash=file_hash(tmp))
assert action == "fetch"
assert verb == "Fetching"
@pytest.mark.network
@pytest.mark.parametrize("fname", ["tiny-data.txt", "subdir/tiny-data.txt"])
def test_stream_download(fname):
"Check that downloading a file over HTTP works as expected"
# Use the data in store/ because the subdir is in there for some reason
url = BASEURL + "store/" + fname
known_hash = REGISTRY[fname]
downloader = HTTPDownloader()
with TemporaryDirectory() as local_store:
destination = Path(local_store) / fname
assert not destination.exists()
stream_download(url, destination, known_hash, downloader, pooch=None)
assert destination.exists()
check_tiny_data(str(destination))
@pytest.mark.network
@pytest.mark.parametrize(
"url",
[pytest.param(FIGSHAREURL, marks=pytest.mark.figshare), ZENODOURL, DATAVERSEURL],
ids=["figshare", "zenodo", "dataverse"],
)
def test_load_registry_from_doi(url):
"""Check that the registry is correctly populated from the API"""
with TemporaryDirectory() as local_store:
path = os.path.abspath(local_store)
pup = Pooch(path=path, base_url=url)
pup.load_registry_from_doi()
# Check the existence of all files in the registry
assert len(pup.registry) == 2
assert "tiny-data.txt" in pup.registry
assert "store.zip" in pup.registry
# Ensure that all files have correct checksums by fetching them
for filename in pup.registry:
pup.fetch(filename)
@pytest.mark.network
def test_load_registry_from_doi_zenodo_with_slash():
"""
Check that the registry is correctly populated from the Zenodo API when
the filename contains a slash
"""
url = ZENODOURL_W_SLASH
with TemporaryDirectory() as local_store:
path = os.path.abspath(local_store)
pup = Pooch(path=path, base_url=url)
pup.load_registry_from_doi()
# Check the existence of all files in the registry
assert len(pup.registry) == 1
assert "santisoler/pooch-test-data-v1.zip" in pup.registry
# Ensure that all files have correct checksums by fetching them
for filename in pup.registry:
pup.fetch(filename)
def test_wrong_load_registry_from_doi():
"""Check that non-DOI URLs produce an error"""
pup = Pooch(path="", base_url=BASEURL)
with pytest.raises(ValueError) as exc:
pup.load_registry_from_doi()
assert "only implemented for DOIs" in str(exc.value)

View File

@@ -0,0 +1,582 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
Test the downloader classes and functions separately from the Pooch core.
"""
import os
import sys
from tempfile import TemporaryDirectory
import pytest
from requests import HTTPError
# Mypy doesn't like assigning None like this.
# Can just use a guard variable
try:
import tqdm
except ImportError:
tqdm = None # type: ignore
try:
import paramiko
except ImportError:
paramiko = None # type: ignore
from .. import Pooch
from ..downloaders import (
HTTPDownloader,
FTPDownloader,
SFTPDownloader,
DOIDownloader,
choose_downloader,
FigshareRepository,
ZenodoRepository,
DataverseRepository,
doi_to_url,
REQUESTS_HEADERS,
)
from ..processors import Unzip
from .utils import (
pooch_test_url,
check_large_data,
check_tiny_data,
data_over_ftp,
pooch_test_figshare_url,
pooch_test_zenodo_url,
pooch_test_zenodo_with_slash_url,
pooch_test_dataverse_url,
)
BASEURL = pooch_test_url()
FIGSHAREURL = pooch_test_figshare_url()
ZENODOURL = pooch_test_zenodo_url()
ZENODOURL_W_SLASH = pooch_test_zenodo_with_slash_url()
DATAVERSEURL = pooch_test_dataverse_url()
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
@pytest.mark.parametrize(
"url",
[
BASEURL + "tiny-data.txt", # HTTPDownloader
ZENODOURL, # DOIDownloader
],
)
def test_progressbar_kwarg_passed(url):
"""The progressbar keyword argument must pass through choose_downloader"""
downloader = choose_downloader(url, progressbar=True)
assert downloader.progressbar is True
@pytest.mark.skipif(paramiko is None, reason="requires paramiko")
def test_progressbar_kwarg_passed_sftp():
"""The progressbar keyword argument must pass through choose_downloader"""
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
downloader = choose_downloader(url, progressbar=True)
assert downloader.progressbar is True
def test_unsupported_protocol():
"Should raise ValueError when protocol is not supported"
with pytest.raises(ValueError):
choose_downloader("httpup://some-invalid-url.com")
# Simulate the DOI format
with pytest.raises(ValueError):
choose_downloader("doii:XXX/XXX/file")
@pytest.mark.network
def test_invalid_doi_repository():
"Should fail if data repository is not supported"
with pytest.raises(ValueError) as exc:
# Use the DOI of the Pooch paper in JOSS (not a data repository)
DOIDownloader()(
url="doi:10.21105/joss.01943/file_name.txt", output_file=None, pooch=None
)
assert "Invalid data repository 'joss.theoj.org'" in str(exc.value)
@pytest.mark.network
def test_doi_url_not_found():
"Should fail if the DOI is not found"
with pytest.raises(HTTPError):
doi_to_url(doi="NOTAREALDOI")
@pytest.mark.network
@pytest.mark.parametrize(
"repository,doi",
[
pytest.param(
FigshareRepository,
"10.6084/m9.figshare.14763051.v1",
marks=pytest.mark.figshare,
),
(ZenodoRepository, "10.5281/zenodo.4924875"),
(DataverseRepository, "10.11588/data/TKCFEF"),
],
ids=["figshare", "zenodo", "dataverse"],
)
def test_figshare_url_file_not_found(repository, doi):
"Should fail if the file is not found in the archive"
with pytest.raises(ValueError) as exc:
url = doi_to_url(doi)
repo = repository.initialize(doi, url)
repo.download_url(file_name="bla.txt")
assert "File 'bla.txt' not found" in str(exc.value)
@pytest.mark.network
@pytest.mark.parametrize(
"url",
[pytest.param(FIGSHAREURL, marks=pytest.mark.figshare), ZENODOURL, DATAVERSEURL],
ids=["figshare", "zenodo", "dataverse"],
)
def test_doi_downloader(url):
"Test the DOI downloader"
# Use the test data we have on the repository
with TemporaryDirectory() as local_store:
downloader = DOIDownloader()
outfile = os.path.join(local_store, "tiny-data.txt")
downloader(url + "tiny-data.txt", outfile, None)
check_tiny_data(outfile)
@pytest.mark.network
def test_zenodo_downloader_with_slash_in_fname():
"""
Test the Zenodo downloader when the path contains a forward slash
Related to issue #336
"""
# Use the test data we have on the repository
with TemporaryDirectory() as local_store:
base_url = ZENODOURL_W_SLASH + "santisoler/pooch-test-data-v1.zip"
downloader = DOIDownloader()
outfile = os.path.join(local_store, "test-data.zip")
downloader(base_url, outfile, None)
# unpack the downloaded zip file so we can check the integrity of
# tiny-data.txt
fnames = Unzip()(outfile, action="download", pooch=None)
(fname,) = [f for f in fnames if "tiny-data.txt" in f]
check_tiny_data(fname)
@pytest.mark.network
@pytest.mark.figshare
def test_figshare_unspecified_version():
"""
Test if passing a Figshare url without a version warns about it, but still
downloads it.
"""
url = FIGSHAREURL
# Remove the last bits of the doi, where the version is specified and
url = url[: url.rindex(".")] + "/"
# Create expected warning message
doi = url[4:-1]
warning_msg = f"The Figshare DOI '{doi}' doesn't specify which version of "
with TemporaryDirectory() as local_store:
downloader = DOIDownloader()
outfile = os.path.join(local_store, "tiny-data.txt")
with pytest.warns(UserWarning, match=warning_msg):
downloader(url + "tiny-data.txt", outfile, None)
@pytest.mark.network
@pytest.mark.figshare
@pytest.mark.parametrize(
"version, missing, present",
[
(
1,
"LC08_L2SP_218074_20190114_20200829_02_T1-cropped.tar.gz",
"cropped-before.tar.gz",
),
(
2,
"cropped-before.tar.gz",
"LC08_L2SP_218074_20190114_20200829_02_T1-cropped.tar.gz",
),
],
)
def test_figshare_data_repository_versions(version, missing, present):
"""
Test if setting the version in Figshare DOI works as expected
"""
# Use a Figshare repo as example (we won't download files from it since
# they are too big)
doi = f"10.6084/m9.figshare.21665630.v{version}"
url = f"https://doi.org/{doi}/"
figshare = FigshareRepository(doi, url)
filenames = [item["name"] for item in figshare.api_response]
assert present in filenames
assert missing not in filenames
@pytest.mark.network
def test_ftp_downloader(ftpserver):
"Test ftp downloader"
with data_over_ftp(ftpserver, "tiny-data.txt") as url:
with TemporaryDirectory() as local_store:
downloader = FTPDownloader(port=ftpserver.server_port)
outfile = os.path.join(local_store, "tiny-data.txt")
downloader(url, outfile, None)
check_tiny_data(outfile)
@pytest.mark.network
@pytest.mark.skipif(paramiko is None, reason="requires paramiko to run SFTP")
def test_sftp_downloader():
"Test sftp downloader"
with TemporaryDirectory() as local_store:
downloader = SFTPDownloader(username="demo", password="password")
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
outfile = os.path.join(local_store, "pocketftp.png")
downloader(url, outfile, None)
assert os.path.exists(outfile)
@pytest.mark.network
@pytest.mark.skipif(paramiko is None, reason="requires paramiko to run SFTP")
def test_sftp_downloader_fail_if_file_object():
"Downloader should fail when a file object rather than string is passed"
with TemporaryDirectory() as local_store:
downloader = SFTPDownloader(username="demo", password="password")
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
outfile = os.path.join(local_store, "pocketftp.png")
with open(outfile, "wb") as outfile_obj:
with pytest.raises(TypeError):
downloader(url, outfile_obj, None)
@pytest.mark.skipif(paramiko is not None, reason="paramiko must be missing")
def test_sftp_downloader_fail_if_paramiko_missing():
"test must fail if paramiko is not installed"
with pytest.raises(ValueError) as exc:
SFTPDownloader()
assert "'paramiko'" in str(exc.value)
@pytest.mark.skipif(tqdm is not None, reason="tqdm must be missing")
@pytest.mark.parametrize("downloader", [HTTPDownloader, FTPDownloader, SFTPDownloader])
def test_downloader_progressbar_fails(downloader):
"Make sure an error is raised if trying to use progressbar without tqdm"
with pytest.raises(ValueError) as exc:
downloader(progressbar=True)
assert "'tqdm'" in str(exc.value)
@pytest.mark.network
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
@pytest.mark.parametrize(
"url,downloader",
[
(BASEURL, HTTPDownloader),
pytest.param(FIGSHAREURL, DOIDownloader, marks=pytest.mark.figshare),
],
ids=["http", "figshare"],
)
def test_downloader_progressbar(url, downloader, capsys):
"Setup a downloader function that prints a progress bar for fetch"
download = downloader(progressbar=True)
with TemporaryDirectory() as local_store:
fname = "tiny-data.txt"
url = url + fname
outfile = os.path.join(local_store, fname)
download(url, outfile, None)
# Read stderr and make sure the progress bar is printed only when told
captured = capsys.readouterr()
printed = captured.err.split("\r")[-1].strip()
assert len(printed) == 79
if sys.platform == "win32":
progress = "100%|####################"
else:
progress = "100%|████████████████████"
# Bar size is not always the same so can't reliably test the whole bar.
assert printed[:25] == progress
# Check that the downloaded file has the right content
check_tiny_data(outfile)
@pytest.mark.network
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
def test_downloader_progressbar_ftp(capsys, ftpserver):
"Setup an FTP downloader function that prints a progress bar for fetch"
with data_over_ftp(ftpserver, "tiny-data.txt") as url:
download = FTPDownloader(progressbar=True, port=ftpserver.server_port)
with TemporaryDirectory() as local_store:
outfile = os.path.join(local_store, "tiny-data.txt")
download(url, outfile, None)
# Read stderr and make sure the progress bar is printed only when
# told
captured = capsys.readouterr()
printed = captured.err.split("\r")[-1].strip()
assert len(printed) == 79
if sys.platform == "win32":
progress = "100%|####################"
else:
progress = "100%|████████████████████"
# Bar size is not always the same so can't reliably test the whole
# bar.
assert printed[:25] == progress
# Check that the file was actually downloaded
check_tiny_data(outfile)
@pytest.mark.network
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
@pytest.mark.skipif(paramiko is None, reason="requires paramiko")
def test_downloader_progressbar_sftp(capsys):
"Setup an SFTP downloader function that prints a progress bar for fetch"
downloader = SFTPDownloader(progressbar=True, username="demo", password="password")
with TemporaryDirectory() as local_store:
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
outfile = os.path.join(local_store, "pocketftp.png")
downloader(url, outfile, None)
# Read stderr and make sure the progress bar is printed only when told
captured = capsys.readouterr()
printed = captured.err.split("\r")[-1].strip()
assert len(printed) == 79
if sys.platform == "win32":
progress = "100%|####################"
else:
progress = "100%|████████████████████"
# Bar size is not always the same so can't reliably test the whole bar.
assert printed[:25] == progress
# Check that the file was actually downloaded
assert os.path.exists(outfile)
@pytest.mark.network
def test_downloader_arbitrary_progressbar(capsys):
"Setup a downloader function with an arbitrary progress bar class."
class MinimalProgressDisplay:
"""A minimalist replacement for tqdm.tqdm"""
def __init__(self, total):
self.count = 0
self.total = total
def __repr__(self):
"""represent current completion"""
return str(self.count) + "/" + str(self.total)
def render(self):
"""print self.__repr__ to stderr"""
print(f"\r{self}", file=sys.stderr, end="")
def update(self, i):
"""modify completion and render"""
self.count = i
self.render()
def reset(self):
"""set counter to 0"""
self.count = 0
@staticmethod
def close():
"""print a new empty line"""
print("", file=sys.stderr)
pbar = MinimalProgressDisplay(total=None)
download = HTTPDownloader(progressbar=pbar)
with TemporaryDirectory() as local_store:
fname = "large-data.txt"
url = BASEURL + fname
outfile = os.path.join(local_store, "large-data.txt")
download(url, outfile, None)
# Read stderr and make sure the progress bar is printed only when told
captured = capsys.readouterr()
printed = captured.err.split("\r")[-1].strip()
progress = "336/336"
assert printed == progress
# Check that the downloaded file has the right content
check_large_data(outfile)
class TestZenodoAPISupport:
"""
Test support for different Zenodo APIs
"""
article_id = 123456
doi = f"10.0001/zenodo.{article_id}"
doi_url = f"https://doi.org/{doi}"
file_name = "my-file.zip"
file_url = (
"https://zenodo.org/api/files/513d7033-93a2-4eeb-821c-2fb0bbab0012/my-file.zip"
)
file_checksum = "2942bfabb3d05332b66eb128e0842cff"
legacy_api_response = {
"created": "2021-20-19T08:00:00.000000+00:00",
"modified": "2021-20-19T08:00:00.000000+00:00",
"id": article_id,
"doi": doi,
"doi_url": doi_url,
"files": [
{
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
"key": file_name,
"checksum": f"md5:{file_checksum}",
"links": {
"self": file_url,
},
}
],
}
new_api_response = {
"created": "2021-20-19T08:00:00.000000+00:00",
"modified": "2021-20-19T08:00:00.000000+00:00",
"id": article_id,
"doi": doi,
"doi_url": doi_url,
"files": [
{
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
"filename": file_name,
"checksum": file_checksum,
"links": {
"self": file_url,
},
}
],
}
invalid_api_response = {
"created": "2021-20-19T08:00:00.000000+00:00",
"modified": "2021-20-19T08:00:00.000000+00:00",
"id": article_id,
"doi": doi,
"doi_url": doi_url,
"files": [
{
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
"filename": file_name,
"checksum": file_checksum,
"links": {
"self": file_url,
},
},
{
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
"key": file_name,
"checksum": f"md5:{file_checksum}",
"links": {
"self": file_url,
},
},
],
}
@pytest.mark.parametrize(
"api_version, api_response",
[
("legacy", legacy_api_response),
("new", new_api_response),
("invalid", invalid_api_response),
],
)
def test_api_version(self, httpserver, api_version, api_response):
"""
Test if the API version is correctly detected.
"""
# Create a local http server
httpserver.expect_request(f"/zenodo.{self.article_id}").respond_with_json(
api_response
)
# Create Zenodo downloader
downloader = ZenodoRepository(doi=self.doi, archive_url=self.doi_url)
# Override base url for the API of the downloader
downloader.base_api_url = httpserver.url_for("")
# Check if the API version is correctly identified
if api_version != "invalid":
assert downloader.api_version == api_version
else:
msg = "Couldn't determine the version of the Zenodo API"
with pytest.raises(ValueError, match=msg):
api_version = downloader.api_version
@pytest.mark.parametrize(
"api_version, api_response",
[("legacy", legacy_api_response), ("new", new_api_response)],
)
def test_download_url(self, httpserver, api_version, api_response):
"""
Test if the download url is correct for each API version.
"""
# Create a local http server
httpserver.expect_request(f"/zenodo.{self.article_id}").respond_with_json(
api_response
)
# Create Zenodo downloader
downloader = ZenodoRepository(doi=self.doi, archive_url=self.doi_url)
# Override base url for the API of the downloader
downloader.base_api_url = httpserver.url_for("")
# Check if the download url is correct
download_url = downloader.download_url(file_name=self.file_name)
if api_version == "legacy":
assert download_url == self.file_url
else:
expected_url = (
"https://zenodo.org/records/"
f"{self.article_id}/files/{self.file_name}?download=1"
)
assert download_url == expected_url
@pytest.mark.parametrize(
"api_response",
[legacy_api_response, new_api_response],
)
def test_populate_registry(self, httpserver, tmp_path, api_response):
"""
Test if population of registry is correctly done for each API version.
"""
# Create a local http server
httpserver.expect_request(f"/zenodo.{self.article_id}").respond_with_json(
api_response
)
# Create sample pooch object
puppy = Pooch(base_url="", path=tmp_path)
# Create Zenodo downloader
downloader = ZenodoRepository(doi=self.doi, archive_url=self.doi_url)
# Override base url for the API of the downloader
downloader.base_api_url = httpserver.url_for("")
# Populate registry
downloader.populate_registry(puppy)
assert puppy.registry == {self.file_name: f"md5:{self.file_checksum}"}
class TestDOIDownloaderHeaders:
"""Test the headers argument in DOIDownloader."""
def test_default_headers(self):
"""Test the default value for headers."""
downloader = DOIDownloader()
assert downloader.headers == REQUESTS_HEADERS
downloader = DOIDownloader(headers=None)
assert downloader.headers == REQUESTS_HEADERS
def test_overwrite_headers(self):
"""Test overwriting for headers."""
downloader = DOIDownloader(headers={"custom": "field"})
expected_headers = {
"custom": "field",
}
assert downloader.headers == expected_headers
def test_headers_empty_dict(self):
"""Test passing an emtpy dict to headers."""
downloader = DOIDownloader(headers={})
assert downloader.headers == {}

View File

@@ -0,0 +1,204 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
# pylint: disable=redefined-outer-name
"""
Test the hash calculation and checking functions.
"""
import os
from pathlib import Path
from tempfile import NamedTemporaryFile
import pytest
try:
import xxhash
XXHASH_MAJOR_VERSION = int(xxhash.VERSION.split(".", maxsplit=1)[0])
except ImportError:
xxhash = None # type: ignore[assignment]
XXHASH_MAJOR_VERSION = 0
from ..core import Pooch
from ..hashes import (
make_registry,
file_hash,
hash_matches,
)
from .utils import check_tiny_data, mirror_directory
DATA_DIR = str(Path(__file__).parent / "data" / "store")
REGISTRY = (
"tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
)
REGISTRY_RECURSIVE = (
"subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
"tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
)
TINY_DATA_HASHES_HASHLIB = {
"sha1": "c03148994acd89317915ea2f2d080d6dd127aa09",
"sha256": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
"md5": "70e2afd3fd7e336ae478b1e740a5f08e",
}
TINY_DATA_HASHES_XXH = {
"xxh64": "f843815fe57948fa",
"xxh32": "98d6f1a2",
# Require xxHash > 2.0
"xxh128": "0267d220db258fffb0c567c0ecd1b689",
"xxh3_128": "0267d220db258fffb0c567c0ecd1b689",
"xxh3_64": "811e3f2a12aec53f",
}
TINY_DATA_HASHES = TINY_DATA_HASHES_HASHLIB.copy()
TINY_DATA_HASHES.update(TINY_DATA_HASHES_XXH)
@pytest.fixture
def data_dir_mirror(tmp_path):
"""
Mirror the test data folder on a temporary directory. Needed to avoid
permission errors when pooch is installed on a non-writable path.
"""
return mirror_directory(DATA_DIR, tmp_path)
def test_make_registry(data_dir_mirror):
"Check that the registry builder creates the right file names and hashes"
outfile = NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with
# Need to close the file before writing to it.
outfile.close()
try:
make_registry(data_dir_mirror, outfile.name, recursive=False)
with open(outfile.name, encoding="utf-8") as fout:
registry = fout.read()
assert registry == REGISTRY
# Check that the registry can be used.
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry={})
pup.load_registry(outfile.name)
true = str(data_dir_mirror / "tiny-data.txt")
fname = pup.fetch("tiny-data.txt")
assert true == fname
check_tiny_data(fname)
finally:
os.remove(outfile.name)
def test_make_registry_recursive(data_dir_mirror):
"Check that the registry builder works in recursive mode"
outfile = NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with
# Need to close the file before writing to it.
outfile.close()
try:
make_registry(data_dir_mirror, outfile.name, recursive=True)
with open(outfile.name, encoding="utf-8") as fout:
registry = fout.read()
assert registry == REGISTRY_RECURSIVE
# Check that the registry can be used.
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry={})
pup.load_registry(outfile.name)
assert str(data_dir_mirror / "tiny-data.txt") == pup.fetch("tiny-data.txt")
check_tiny_data(pup.fetch("tiny-data.txt"))
true = str(data_dir_mirror / "subdir" / "tiny-data.txt")
assert true == pup.fetch("subdir/tiny-data.txt")
check_tiny_data(pup.fetch("subdir/tiny-data.txt"))
finally:
os.remove(outfile.name)
def test_file_hash_invalid_algorithm():
"Test an invalid hashing algorithm"
with pytest.raises(ValueError) as exc:
file_hash(fname="something", alg="blah")
assert "'blah'" in str(exc.value)
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES.items()),
ids=list(TINY_DATA_HASHES.keys()),
)
def test_file_hash(alg, expected_hash):
"Test the hash calculation using hashlib and xxhash"
if alg.startswith("xxh"):
if xxhash is None:
pytest.skip("requires xxhash")
if alg not in ["xxh64", "xxh32"] and XXHASH_MAJOR_VERSION < 2:
pytest.skip("requires xxhash > 2.0")
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
returned_hash = file_hash(fname, alg)
assert returned_hash == expected_hash
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES.items()),
ids=list(TINY_DATA_HASHES.keys()),
)
def test_hash_matches(alg, expected_hash):
"Make sure the hash checking function works"
if alg.startswith("xxh"):
if xxhash is None:
pytest.skip("requires xxhash")
if alg not in ["xxh64", "xxh32"] and XXHASH_MAJOR_VERSION < 2:
pytest.skip("requires xxhash > 2.0")
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
# Check if the check passes
known_hash = f"{alg}:{expected_hash}"
assert hash_matches(fname, known_hash)
# And also if it fails
known_hash = f"{alg}:blablablabla"
assert not hash_matches(fname, known_hash)
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES_HASHLIB.items()),
ids=list(TINY_DATA_HASHES_HASHLIB.keys()),
)
def test_hash_matches_strict(alg, expected_hash):
"Make sure the hash checking function raises an exception if strict"
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
# Check if the check passes
known_hash = f"{alg}:{expected_hash}"
assert hash_matches(fname, known_hash, strict=True)
# And also if it fails
bad_hash = f"{alg}:blablablabla"
with pytest.raises(ValueError) as error:
hash_matches(fname, bad_hash, strict=True, source="Neverland")
assert "Neverland" in str(error.value)
with pytest.raises(ValueError) as error:
hash_matches(fname, bad_hash, strict=True, source=None)
assert fname in str(error.value)
def test_hash_matches_none():
"The hash checking function should always returns True if known_hash=None"
fname = os.path.join(DATA_DIR, "tiny-data.txt")
assert hash_matches(fname, known_hash=None)
# Should work even if the file is invalid
assert hash_matches(fname="", known_hash=None)
# strict should cause an error if this wasn't working
assert hash_matches(fname, known_hash=None, strict=True)
@pytest.mark.parametrize(
"alg,expected_hash",
list(TINY_DATA_HASHES_HASHLIB.items()),
ids=list(TINY_DATA_HASHES_HASHLIB.keys()),
)
def test_hash_matches_uppercase(alg, expected_hash):
"Hash matching should be independent of upper or lower case"
fname = os.path.join(DATA_DIR, "tiny-data.txt")
check_tiny_data(fname)
# Check if the check passes
known_hash = f"{alg}:{expected_hash.upper()}"
assert hash_matches(fname, known_hash, strict=True)
# And also if it fails
with pytest.raises(ValueError) as error:
hash_matches(fname, known_hash[:-5], strict=True, source="Neverland")
assert "Neverland" in str(error.value)

View File

@@ -0,0 +1,49 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
# pylint: disable=redefined-outer-name
"""
Test the entire process of creating a Pooch and using it.
"""
import os
import shutil
from pathlib import Path
import pytest
from .. import create, os_cache
from .. import __version__ as full_version
from .utils import check_tiny_data, capture_log
@pytest.mark.network
def test_create_and_fetch():
"Fetch a data file from the local storage"
path = os_cache("pooch-testing")
if path.exists():
shutil.rmtree(str(path))
pup = create(
path=path,
base_url="https://github.com/fatiando/pooch/raw/{version}/data/",
version=full_version,
version_dev="main",
env="POOCH_DATA_DIR",
)
# Make sure the storage isn't created until a download is required
assert not pup.abspath.exists()
pup.load_registry(Path(os.path.dirname(__file__), "data", "registry.txt"))
for target in ["tiny-data.txt", "subdir/tiny-data.txt"]:
with capture_log() as log_file:
fname = pup.fetch(target)
assert log_file.getvalue().split()[0] == "Downloading"
check_tiny_data(fname)
# Now modify the file to trigger an update on the next fetch
with open(fname, "w", encoding="utf-8") as fin:
fin.write("The data is now different")
with capture_log() as log_file:
fname = pup.fetch(target)
assert log_file.getvalue().split()[0] == "Updating"
check_tiny_data(fname)

View File

@@ -0,0 +1,289 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
Test the processor hooks
"""
from pathlib import Path
from tempfile import TemporaryDirectory
import warnings
import pytest
from .. import Pooch
from ..processors import Unzip, Untar, Decompress
from .utils import pooch_test_url, pooch_test_registry, check_tiny_data, capture_log
REGISTRY = pooch_test_registry()
BASEURL = pooch_test_url()
@pytest.mark.network
@pytest.mark.parametrize(
"method,ext,name",
[
("auto", "xz", None),
("lzma", "xz", None),
("xz", "xz", None),
("bzip2", "bz2", None),
("gzip", "gz", None),
("gzip", "gz", "different-name.txt"),
],
ids=["auto", "lzma", "xz", "bz2", "gz", "name"],
)
def test_decompress(method, ext, name):
"Check that decompression after download works for all formats"
processor = Decompress(method=method, name=name)
with TemporaryDirectory() as local_store:
path = Path(local_store)
if name is None:
true_path = str(path / ".".join(["tiny-data.txt", ext, "decomp"]))
else:
true_path = str(path / name)
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Check the logs when downloading and from the processor
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
logs = log_file.getvalue()
lines = logs.splitlines()
assert len(lines) == 2
assert lines[0].split()[0] == "Downloading"
assert lines[-1].startswith("Decompressing")
assert method in lines[-1]
assert fname == true_path
check_tiny_data(fname)
# Check that processor doesn't execute when not downloading
with capture_log() as log_file:
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
assert log_file.getvalue() == ""
assert fname == true_path
check_tiny_data(fname)
@pytest.mark.network
def test_decompress_fails():
"Should fail if method='auto' and no extension is given in the file name"
with TemporaryDirectory() as local_store:
path = Path(local_store)
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Invalid extension
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("tiny-data.txt", processor=Decompress(method="auto"))
assert exception.value.args[0].startswith("Unrecognized file extension '.txt'")
assert "pooch.Unzip/Untar" not in exception.value.args[0]
# Should also fail for a bad method name
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("tiny-data.txt", processor=Decompress(method="bla"))
assert exception.value.args[0].startswith("Invalid compression method 'bla'")
assert "pooch.Unzip/Untar" not in exception.value.args[0]
# Point people to Untar and Unzip
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("tiny-data.txt", processor=Decompress(method="zip"))
assert exception.value.args[0].startswith("Invalid compression method 'zip'")
assert "pooch.Unzip/Untar" in exception.value.args[0]
with pytest.raises(ValueError) as exception:
with warnings.catch_warnings():
pup.fetch("store.zip", processor=Decompress(method="auto"))
assert exception.value.args[0].startswith("Unrecognized file extension '.zip'")
assert "pooch.Unzip/Untar" in exception.value.args[0]
@pytest.mark.network
@pytest.mark.parametrize(
"target_path", [None, "some_custom_path"], ids=["default_path", "custom_path"]
)
@pytest.mark.parametrize(
"archive,members",
[
("tiny-data", ["tiny-data.txt"]),
("store", None),
("store", ["store/tiny-data.txt"]),
("store", ["store/subdir/tiny-data.txt"]),
("store", ["store/subdir"]),
("store", ["store/tiny-data.txt", "store/subdir"]),
],
ids=[
"single_file",
"archive_all",
"archive_file",
"archive_subdir_file",
"archive_subdir",
"archive_multiple",
],
)
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
ids=["Unzip", "Untar"],
)
def test_unpacking(processor_class, extension, target_path, archive, members):
"Tests the behaviour of processors for unpacking archives (Untar, Unzip)"
processor = processor_class(members=members, extract_dir=target_path)
if target_path is None:
target_path = archive + extension + processor.suffix
with TemporaryDirectory() as path:
path = Path(path)
true_paths, expected_log = _unpacking_expected_paths_and_logs(
archive, members, path / target_path, processor_class.__name__
)
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Capture logs and check for the right processor message
with capture_log() as log_file:
fnames = pup.fetch(archive + extension, processor=processor)
assert set(fnames) == true_paths
_check_logs(log_file, expected_log)
for fname in fnames:
check_tiny_data(fname)
# Check that processor doesn't execute when not downloading
with capture_log() as log_file:
fnames = pup.fetch(archive + extension, processor=processor)
assert set(fnames) == true_paths
_check_logs(log_file, [])
for fname in fnames:
check_tiny_data(fname)
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_multiple_unpacking(processor_class, extension):
"Test that multiple subsequent calls to a processor yield correct results"
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with the one member only
processor1 = processor_class(members=["store/tiny-data.txt"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 1
check_tiny_data(filenames1[0])
# Do a second fetch with the other member
processor2 = processor_class(
members=["store/tiny-data.txt", "store/subdir/tiny-data.txt"]
)
filenames2 = pup.fetch("store" + extension, processor=processor2)
assert len(filenames2) == 2
check_tiny_data(filenames2[0])
check_tiny_data(filenames2[1])
# Do a third fetch, again with one member and assert
# that only this member was returned
filenames3 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames3) == 1
check_tiny_data(filenames3[0])
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_unpack_members_with_leading_dot(processor_class, extension):
"Test that unpack members can also be specifed both with a leading ./"
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with the one member only
processor1 = processor_class(members=["./store/tiny-data.txt"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 1
check_tiny_data(filenames1[0])
def _check_logs(log_file, expected_lines):
"""
Assert that the lines in the log match the expected ones.
"""
lines = log_file.getvalue().splitlines()
assert len(lines) == len(expected_lines)
for line, expected_line in zip(lines, expected_lines):
assert line.startswith(expected_line)
def _unpacking_expected_paths_and_logs(archive, members, path, name):
"""
Generate the appropriate expected paths and log message depending on the
parameters for the test.
"""
log_lines = ["Downloading"]
if archive == "tiny-data":
true_paths = {str(path / "tiny-data.txt")}
log_lines.append("Extracting 'tiny-data.txt'")
elif archive == "store" and members is None:
true_paths = {
str(path / "store" / "tiny-data.txt"),
str(path / "store" / "subdir" / "tiny-data.txt"),
}
log_lines.append(f"{name}{name[-1]}ing contents")
elif archive == "store" and members is not None:
true_paths = []
for member in members:
true_path = path / Path(*member.split("/"))
if not str(true_path).endswith("tiny-data.txt"):
true_path = true_path / "tiny-data.txt"
true_paths.append(str(true_path))
log_lines.append(f"Extracting '{member}'")
true_paths = set(true_paths)
return true_paths, log_lines
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_unpacking_members_then_no_members(processor_class, extension):
"""
Test that calling with valid members then without them works.
https://github.com/fatiando/pooch/issues/364
"""
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with an existing member
processor1 = processor_class(members=["store/tiny-data.txt"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 1
# Do a second fetch with no members
processor2 = processor_class()
filenames2 = pup.fetch("store" + extension, processor=processor2)
assert len(filenames2) > 1
@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_unpacking_wrong_members_then_no_members(processor_class, extension):
"""
Test that calling with invalid members then without them works.
https://github.com/fatiando/pooch/issues/364
"""
with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
# Do a first fetch with incorrect member
processor1 = processor_class(members=["not-a-valid-file.csv"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 0
# Do a second fetch with no members
processor2 = processor_class()
filenames2 = pup.fetch("store" + extension, processor=processor2)
assert len(filenames2) > 0

View File

@@ -0,0 +1,197 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
Test the utility functions.
"""
import os
import shutil
import time
from pathlib import Path
import tempfile
from tempfile import TemporaryDirectory
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import pytest
from ..utils import (
parse_url,
make_local_storage,
temporary_file,
unique_file_name,
)
def test_unique_name_long():
"The file name should never be longer than 255 characters"
url = f"https://www.something.com/data{'a' * 500}.txt"
assert len(url) > 255
fname = unique_file_name(url)
assert len(fname) == 255
assert fname[-10:] == "aaaaaa.txt"
assert fname.split("-")[1][:10] == "aaaaaaaaaa"
@pytest.mark.parametrize(
"pool",
[ThreadPoolExecutor, ProcessPoolExecutor],
ids=["threads", "processes"],
)
def test_make_local_storage_parallel(pool, monkeypatch):
"Try to create the cache folder in parallel"
# Can cause multiple attempts at creating the folder which leads to an
# exception. Check that this doesn't happen.
# See https://github.com/fatiando/pooch/issues/170
# Monkey path makedirs to make it delay before creating the directory.
# Otherwise, the dispatch is too fast and the directory will exist before
# another process tries to create it.
# Need to keep a reference to the original function to avoid infinite
# recursions from the monkey patching.
makedirs = os.makedirs
def mockmakedirs(path, exist_ok=False): # pylint: disable=unused-argument
"Delay before calling makedirs"
time.sleep(1.5)
makedirs(path, exist_ok=exist_ok)
monkeypatch.setattr(os, "makedirs", mockmakedirs)
data_cache = os.path.join(os.curdir, "test_parallel_cache")
assert not os.path.exists(data_cache)
try:
with pool() as executor:
futures = [
executor.submit(make_local_storage, data_cache) for i in range(4)
]
for future in futures:
future.result()
assert os.path.exists(data_cache)
finally:
if os.path.exists(data_cache):
shutil.rmtree(data_cache)
def test_local_storage_makedirs_permissionerror(monkeypatch):
"Should warn the user when can't create the local data dir"
def mockmakedirs(path, exist_ok=False): # pylint: disable=unused-argument
"Raise an exception to mimic permission issues"
raise PermissionError("Fake error")
data_cache = os.path.join(os.curdir, "test_permission")
assert not os.path.exists(data_cache)
monkeypatch.setattr(os, "makedirs", mockmakedirs)
with pytest.raises(PermissionError) as error:
make_local_storage(
path=data_cache,
env="SOME_VARIABLE",
)
assert "Pooch could not create data cache" in str(error)
assert "'SOME_VARIABLE'" in str(error)
def test_local_storage_newfile_permissionerror(monkeypatch):
"Should warn the user when can't write to the local data dir"
# This is a separate function because there should be a warning if the data
# dir already exists but we can't write to it.
def mocktempfile(**kwargs): # pylint: disable=unused-argument
"Raise an exception to mimic permission issues"
raise PermissionError("Fake error")
with TemporaryDirectory() as data_cache:
os.makedirs(os.path.join(data_cache, "1.0"))
assert os.path.exists(data_cache)
monkeypatch.setattr(tempfile, "NamedTemporaryFile", mocktempfile)
with pytest.raises(PermissionError) as error:
make_local_storage(
path=data_cache,
env="SOME_VARIABLE",
)
assert "Pooch could not write to data cache" in str(error)
assert "'SOME_VARIABLE'" in str(error)
@pytest.mark.parametrize(
"url,output",
[
(
"http://127.0.0.1:8080/test.nc",
{"protocol": "http", "netloc": "127.0.0.1:8080", "path": "/test.nc"},
),
(
"ftp://127.0.0.1:8080/test.nc",
{"protocol": "ftp", "netloc": "127.0.0.1:8080", "path": "/test.nc"},
),
(
"doi:10.6084/m9.figshare.923450.v1/dike.json",
{
"protocol": "doi",
"netloc": "10.6084/m9.figshare.923450.v1",
"path": "/dike.json",
},
),
(
r"doi:10.5281/zenodo.7632643/santisoler/pooch-test-data-v1.zip",
{
"protocol": "doi",
"netloc": "10.5281/zenodo.7632643",
"path": "/santisoler/pooch-test-data-v1.zip",
},
),
],
ids=["http", "ftp", "doi", "zenodo-doi-with-slash"],
)
def test_parse_url(url, output):
"Parse URL into 3 components"
assert parse_url(url) == output
def test_parse_url_invalid_doi():
"Should fail if we forget to not include // in the DOI link"
with pytest.raises(ValueError):
parse_url("doi://XXX/XXX/fname.txt")
def test_temporary_file():
"Make sure the file is writable and cleaned up in the end"
with temporary_file() as tmp:
assert Path(tmp).exists()
with open(tmp, "w", encoding="utf-8") as outfile:
outfile.write("Meh")
with open(tmp, encoding="utf-8") as infile:
assert infile.read().strip() == "Meh"
assert not Path(tmp).exists()
def test_temporary_file_path():
"Make sure the file is writable and cleaned up in the end when given a dir"
with TemporaryDirectory() as path:
with temporary_file(path) as tmp:
assert Path(tmp).exists()
assert path in tmp
with open(tmp, "w", encoding="utf-8") as outfile:
outfile.write("Meh")
with open(tmp, encoding="utf-8") as infile:
assert infile.read().strip() == "Meh"
assert not Path(tmp).exists()
def test_temporary_file_exception():
"Make sure the file is writable and cleaned up when there is an exception"
try:
with temporary_file() as tmp:
assert Path(tmp).exists()
raise ValueError("Nooooooooo!")
except ValueError:
assert not Path(tmp).exists()

View File

@@ -0,0 +1,19 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
Test the version.
"""
from packaging.version import Version
import pooch
def test_version():
"Check there's a usable version number in the usual __version__"
assert pooch.__version__.startswith("v")
# Check that it's PEP440 compliant (will raise an exception otherwise)
Version(pooch.__version__)

View File

@@ -0,0 +1,237 @@
# Copyright (c) 2018 The Pooch Developers.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
#
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
#
"""
Utilities for testing code.
"""
import os
import io
import logging
import shutil
import stat
from pathlib import Path
from contextlib import contextmanager
from .. import __version__ as full_version
from ..utils import check_version, get_logger
def check_tiny_data(fname):
"""
Load the tiny-data.txt file and check that the contents are correct.
"""
assert os.path.exists(fname)
with open(fname, encoding="utf-8") as tinydata:
content = tinydata.read()
true_content = "\n".join(
["# A tiny data file for test purposes only", "1 2 3 4 5 6"]
)
assert content.strip() == true_content
def check_large_data(fname):
"""
Load the large-data.txt file and check that the contents are correct.
"""
assert os.path.exists(fname)
with open(fname, encoding="utf-8") as data:
content = data.read()
true_content = ["# A larer data file for test purposes only"]
true_content.extend(["1 2 3 4 5 6"] * 6002)
assert content.strip() == "\n".join(true_content)
def pooch_test_url():
"""
Get the base URL for the test data used in Pooch itself.
The URL is a GitHub raw link to the ``pooch/tests/data`` directory from the
`GitHub repository <https://github.com/fatiando/pooch>`__. It matches the
pooch version specified in ``pooch.version.full_version``.
Returns
-------
url
The versioned URL for pooch's test data.
"""
version = check_version(full_version, fallback="main")
url = f"https://github.com/fatiando/pooch/raw/{version}/pooch/tests/data/"
return url
def pooch_test_figshare_url():
"""
Get the base URL for the test data stored in figshare.
The URL contains the DOI for the figshare dataset using the appropriate
version for this version of Pooch.
Returns
-------
url
The URL for pooch's test data.
"""
url = "doi:10.6084/m9.figshare.14763051.v1/"
return url
def pooch_test_zenodo_url():
"""
Get the base URL for the test data stored in Zenodo.
The URL contains the DOI for the Zenodo dataset using the appropriate
version for this version of Pooch.
Returns
-------
url
The URL for pooch's test data.
"""
url = "doi:10.5281/zenodo.4924875/"
return url
def pooch_test_zenodo_with_slash_url():
"""
Get base URL for test data in Zenodo, where the file name contains a slash
The URL contains the DOI for the Zenodo dataset that has a slash in the
filename (created with the GitHub-Zenodo integration service), using the
appropriate version for this version of Pooch.
Returns
-------
url
The URL for pooch's test data.
"""
url = "doi:10.5281/zenodo.7632643/"
return url
def pooch_test_dataverse_url():
"""
Get the base URL for the test data stored on a DataVerse instance.
Returns
-------
url
The URL for pooch's test data.
"""
url = "doi:10.11588/data/TKCFEF/"
return url
def pooch_test_registry():
"""
Get a registry for the test data used in Pooch itself.
Returns
-------
registry
Dictionary with pooch's test data files and their hashes.
"""
registry = {
"tiny-data.txt": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
"large-data.txt": "98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a",
"subdir/tiny-data.txt": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
"tiny-data.zip": "0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb",
"store.zip": "0498d2a001e71051bbd2acd2346f38da7cbd345a633cb7bf0f8a20938714b51a",
"tiny-data.tar.gz": "41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b",
"store.tar.gz": "088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511",
"tiny-data.txt.bz2": "753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306",
"tiny-data.txt.gz": "2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52",
"tiny-data.txt.xz": "99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765",
}
return registry
@contextmanager
def capture_log(level=logging.DEBUG):
"""
Create a context manager for reading from the logs.
Yields
------
log_file : StringIO
a file-like object to which the logs were written
"""
log_file = io.StringIO()
handler = logging.StreamHandler(log_file)
handler.setLevel(level)
get_logger().addHandler(handler)
yield log_file
get_logger().removeHandler(handler)
@contextmanager
def data_over_ftp(server, fname):
"""
Add a test data file to the test FTP server and clean it up afterwards.
Parameters
----------
server
The ``ftpserver`` fixture provided by pytest-localftpserver.
fname : str
The name of a file *relative* to the test data folder of the package
(usually just the file name, not the full path).
Yields
------
url : str
The download URL of the data file from the test FTP server.
"""
package_path = str(Path(__file__).parent / "data" / fname)
server_path = os.path.join(server.anon_root, fname)
try:
shutil.copyfile(package_path, server_path)
url = f"ftp://localhost/{fname}"
yield url
finally:
if os.path.exists(server_path):
os.remove(server_path)
def _recursive_chmod_directories(root, mode):
"""
Recursively change the permissions on the child directories using a bitwise
OR operation.
"""
for item in root.iterdir():
if item.is_dir():
item.chmod(item.stat().st_mode | mode)
_recursive_chmod_directories(item, mode)
def mirror_directory(source, destination):
"""
Copy contents of the source directory into destination and fix permissions.
Parameters
----------
source : str, :class:`pathlib.Path`
Source data directory.
destination : str, :class:`pathlib.Path`
Destination directory that will contain the copy of source. The actual
source directory (not just it's contents) is copied.
Returns
-------
mirror : :class:`pathlib.Path`
The path of the mirrored output directory.
"""
source = Path(source)
mirror = Path(destination) / source.name
shutil.copytree(source, mirror)
_recursive_chmod_directories(mirror, mode=stat.S_IWUSR)
return mirror