Videre
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,10 @@
|
||||
subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
tiny-data.zip 0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb
|
||||
large-data.txt 98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a
|
||||
store.zip 0498d2a001e71051bbd2acd2346f38da7cbd345a633cb7bf0f8a20938714b51a
|
||||
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d https://some-site/tiny-data.txt
|
||||
tiny-data.tar.gz 41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b
|
||||
store.tar.gz 088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511
|
||||
tiny-data.txt.bz2 753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306
|
||||
tiny-data.txt.gz 2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52
|
||||
tiny-data.txt.xz 99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765
|
||||
@@ -0,0 +1,2 @@
|
||||
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
some-file.txt second_element third_element forth_element
|
||||
@@ -0,0 +1,2 @@
|
||||
"file with spaces.txt" baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
other\ with\ spaces.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
@@ -0,0 +1,12 @@
|
||||
subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
large-data.txt 98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a
|
||||
tiny-data.zip 0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb
|
||||
|
||||
store.zip 0498D2A001E71051BBD2ACD2346F38DA7CBD345A633CB7BF0F8A20938714B51A
|
||||
tiny-data.tar.gz 41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b
|
||||
|
||||
store.tar.gz 088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511
|
||||
tiny-data.txt.bz2 753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306
|
||||
tiny-data.txt.gz 2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52
|
||||
tiny-data.txt.xz 99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765
|
||||
@@ -0,0 +1,14 @@
|
||||
# a comment
|
||||
subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d
|
||||
large-data.txt 98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a
|
||||
tiny-data.zip 0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb
|
||||
|
||||
# a comment with a starting space
|
||||
store.zip 0498d2a001e71051bbd2acd2346f38da7cbd345a633cb7bf0f8a20938714b51a
|
||||
tiny-data.tar.gz 41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b
|
||||
|
||||
store.tar.gz 088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511
|
||||
tiny-data.txt.bz2 753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306
|
||||
tiny-data.txt.gz 2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52
|
||||
tiny-data.txt.xz 99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,2 @@
|
||||
# A tiny data file for test purposes only
|
||||
1 2 3 4 5 6
|
||||
@@ -0,0 +1,2 @@
|
||||
# A tiny data file for test purposes only
|
||||
1 2 3 4 5 6
|
||||
Binary file not shown.
@@ -0,0 +1,2 @@
|
||||
# A tiny data file for test purposes only
|
||||
1 2 3 4 5 6
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,689 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
# pylint: disable=redefined-outer-name
|
||||
"""
|
||||
Test the core class and factory function.
|
||||
"""
|
||||
import hashlib
|
||||
import os
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
import pytest
|
||||
|
||||
from ..core import create, Pooch, retrieve, download_action, stream_download
|
||||
from ..utils import get_logger, temporary_file, os_cache
|
||||
from ..hashes import file_hash, hash_matches
|
||||
|
||||
# Import the core module so that we can monkeypatch some functions
|
||||
from .. import core
|
||||
from ..downloaders import HTTPDownloader, FTPDownloader
|
||||
|
||||
from .utils import (
|
||||
pooch_test_url,
|
||||
data_over_ftp,
|
||||
pooch_test_figshare_url,
|
||||
pooch_test_zenodo_url,
|
||||
pooch_test_zenodo_with_slash_url,
|
||||
pooch_test_dataverse_url,
|
||||
pooch_test_registry,
|
||||
check_tiny_data,
|
||||
check_large_data,
|
||||
capture_log,
|
||||
mirror_directory,
|
||||
)
|
||||
|
||||
DATA_DIR = str(Path(__file__).parent / "data")
|
||||
REGISTRY = pooch_test_registry()
|
||||
BASEURL = pooch_test_url()
|
||||
FIGSHAREURL = pooch_test_figshare_url()
|
||||
ZENODOURL = pooch_test_zenodo_url()
|
||||
ZENODOURL_W_SLASH = pooch_test_zenodo_with_slash_url()
|
||||
DATAVERSEURL = pooch_test_dataverse_url()
|
||||
REGISTRY_CORRUPTED = {
|
||||
# The same data file but I changed the hash manually to a wrong one
|
||||
"tiny-data.txt": "098h0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_dir_mirror(tmp_path):
|
||||
"""
|
||||
Mirror the test data folder on a temporary directory. Needed to avoid
|
||||
permission errors when pooch is installed on a non-writable path.
|
||||
"""
|
||||
return mirror_directory(DATA_DIR, tmp_path)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_retrieve():
|
||||
"Try downloading some data with retrieve"
|
||||
with TemporaryDirectory() as local_store:
|
||||
data_file = "tiny-data.txt"
|
||||
url = BASEURL + data_file
|
||||
# Check that the logs say that the file is being downloaded
|
||||
with capture_log() as log_file:
|
||||
fname = retrieve(url, known_hash=None, path=local_store)
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Downloading"
|
||||
assert "SHA256 hash of downloaded file:" in logs
|
||||
assert REGISTRY[data_file] in logs
|
||||
# Check that the downloaded file has the right content
|
||||
assert data_file == fname[-len(data_file) :]
|
||||
check_tiny_data(fname)
|
||||
assert file_hash(fname) == REGISTRY[data_file]
|
||||
# Check that no logging happens when not downloading
|
||||
with capture_log() as log_file:
|
||||
fname = retrieve(url, known_hash=None, path=local_store)
|
||||
assert log_file.getvalue() == ""
|
||||
with capture_log() as log_file:
|
||||
fname = retrieve(url, known_hash=REGISTRY[data_file], path=local_store)
|
||||
assert log_file.getvalue() == ""
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_retrieve_fname():
|
||||
"Try downloading some data with retrieve and setting the file name"
|
||||
with TemporaryDirectory() as local_store:
|
||||
data_file = "tiny-data.txt"
|
||||
url = BASEURL + data_file
|
||||
# Check that the logs say that the file is being downloaded
|
||||
with capture_log() as log_file:
|
||||
fname = retrieve(url, known_hash=None, path=local_store, fname=data_file)
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Downloading"
|
||||
assert "SHA256 hash of downloaded file:" in logs
|
||||
assert REGISTRY[data_file] in logs
|
||||
# Check that the downloaded file has the right name and content
|
||||
assert data_file == os.path.split(fname)[1]
|
||||
check_tiny_data(fname)
|
||||
assert file_hash(fname) == REGISTRY[data_file]
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_retrieve_default_path():
|
||||
"Try downloading some data with retrieve to the default cache location"
|
||||
data_file = "tiny-data.txt"
|
||||
url = BASEURL + data_file
|
||||
expected_location = os_cache("pooch") / data_file
|
||||
try:
|
||||
# Check that the logs say that the file is being downloaded
|
||||
with capture_log() as log_file:
|
||||
fname = retrieve(url, known_hash=None, fname=data_file)
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Downloading"
|
||||
assert str(os_cache("pooch").resolve()) in logs
|
||||
assert "SHA256 hash of downloaded file" in logs
|
||||
assert REGISTRY[data_file] in logs
|
||||
# Check that the downloaded file has the right content
|
||||
assert fname == str(expected_location.resolve())
|
||||
check_tiny_data(fname)
|
||||
assert file_hash(fname) == REGISTRY[data_file]
|
||||
finally:
|
||||
if os.path.exists(str(expected_location)):
|
||||
os.remove(str(expected_location))
|
||||
|
||||
|
||||
def test_pooch_local(data_dir_mirror):
|
||||
"Setup a pooch that already has the local data and test the fetch."
|
||||
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry=REGISTRY)
|
||||
true = str(data_dir_mirror / "tiny-data.txt")
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
assert true == fname
|
||||
check_tiny_data(fname)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"url",
|
||||
[
|
||||
BASEURL,
|
||||
pytest.param(FIGSHAREURL, marks=pytest.mark.figshare),
|
||||
ZENODOURL,
|
||||
DATAVERSEURL,
|
||||
],
|
||||
ids=["https", "figshare", "zenodo", "dataverse"],
|
||||
)
|
||||
def test_pooch_custom_url(url):
|
||||
"Have pooch download the file from URL that is not base_url"
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
urls = {"tiny-data.txt": url + "tiny-data.txt"}
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(path=path, base_url="", registry=REGISTRY, urls=urls)
|
||||
# Check that the logs say that the file is being downloaded
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Downloading"
|
||||
assert logs.split()[-1] == f"'{path}'."
|
||||
check_tiny_data(fname)
|
||||
# Check that no logging happens when there are no events
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
assert log_file.getvalue() == ""
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"url",
|
||||
[
|
||||
BASEURL,
|
||||
pytest.param(FIGSHAREURL, marks=pytest.mark.figshare),
|
||||
ZENODOURL,
|
||||
DATAVERSEURL,
|
||||
],
|
||||
ids=["https", "figshare", "zenodo", "dataverse"],
|
||||
)
|
||||
def test_pooch_download(url):
|
||||
"Setup a pooch that has no local data and needs to download"
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
true_path = str(path / "tiny-data.txt")
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(path=path, base_url=url, registry=REGISTRY)
|
||||
# Check that the logs say that the file is being downloaded
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Downloading"
|
||||
assert logs.split()[-1] == f"'{path}'."
|
||||
# Check that the downloaded file has the right content
|
||||
assert true_path == fname
|
||||
check_tiny_data(fname)
|
||||
assert file_hash(fname) == REGISTRY["tiny-data.txt"]
|
||||
# Check that no logging happens when not downloading
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
assert log_file.getvalue() == ""
|
||||
|
||||
|
||||
class FakeHashMatches: # pylint: disable=too-few-public-methods
|
||||
"Create a fake version of hash_matches that fails n times"
|
||||
|
||||
def __init__(self, nfailures):
|
||||
self.nfailures = nfailures
|
||||
self.failed = 0
|
||||
|
||||
def hash_matches(self, *args, **kwargs):
|
||||
"Fail n times before finally passing"
|
||||
if self.failed < self.nfailures:
|
||||
self.failed += 1
|
||||
# Give it an invalid hash to force a failure
|
||||
return hash_matches(args[0], "bla", **kwargs)
|
||||
return hash_matches(*args, **kwargs)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_pooch_download_retry_off_by_default(monkeypatch):
|
||||
"Check that retrying the download is off by default"
|
||||
with TemporaryDirectory() as local_store:
|
||||
monkeypatch.setattr(core, "hash_matches", FakeHashMatches(3).hash_matches)
|
||||
# Setup a pooch without download retrying
|
||||
path = Path(local_store)
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
||||
# Make sure it fails with no retries
|
||||
with pytest.raises(ValueError) as error:
|
||||
with capture_log() as log_file:
|
||||
pup.fetch("tiny-data.txt")
|
||||
assert "does not match the known hash" in str(error)
|
||||
# Check that the log doesn't have the download retry message
|
||||
logs = log_file.getvalue().strip().split("\n")
|
||||
assert len(logs) == 1
|
||||
assert logs[0].startswith("Downloading")
|
||||
assert logs[0].endswith(f"'{path}'.")
|
||||
|
||||
|
||||
class FakeSleep: # pylint: disable=too-few-public-methods
|
||||
"Create a fake version of sleep that logs the specified times"
|
||||
|
||||
def __init__(self):
|
||||
self.times = []
|
||||
|
||||
def sleep(self, secs):
|
||||
"Store the time and doesn't sleep"
|
||||
self.times.append(secs)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_pooch_download_retry(monkeypatch):
|
||||
"Check that retrying the download works if the hash is different"
|
||||
with TemporaryDirectory() as local_store:
|
||||
monkeypatch.setattr(core, "hash_matches", FakeHashMatches(11).hash_matches)
|
||||
fakesleep = FakeSleep()
|
||||
monkeypatch.setattr(core.time, "sleep", fakesleep.sleep)
|
||||
# Setup a pooch with download retrying
|
||||
path = Path(local_store)
|
||||
true_path = str(path / "tiny-data.txt")
|
||||
retries = 11
|
||||
pup = Pooch(
|
||||
path=path, base_url=BASEURL, registry=REGISTRY, retry_if_failed=retries
|
||||
)
|
||||
# Check that the logs say that the download failed n times
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
logs = log_file.getvalue().strip().split("\n")
|
||||
assert len(logs) == 1 + retries
|
||||
assert logs[0].startswith("Downloading")
|
||||
assert logs[0].endswith(f"'{path}'.")
|
||||
for i, line in zip(range(retries, 0, -1), logs[1:]):
|
||||
assert "Failed to download" in line
|
||||
plural = "s" if i > 1 else ""
|
||||
assert f"download again {i} more time{plural}." in line
|
||||
# Check that the sleep time increases but stops at 10s
|
||||
assert fakesleep.times == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10]
|
||||
# Check that the downloaded file has the right content
|
||||
assert true_path == fname
|
||||
check_tiny_data(fname)
|
||||
assert file_hash(fname) == REGISTRY["tiny-data.txt"]
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_pooch_download_retry_fails_eventually(monkeypatch):
|
||||
"Check that retrying the download fails after the set amount of retries"
|
||||
with TemporaryDirectory() as local_store:
|
||||
monkeypatch.setattr(core, "hash_matches", FakeHashMatches(3).hash_matches)
|
||||
# Setup a pooch with insufficient retry attempts
|
||||
path = Path(local_store)
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY, retry_if_failed=1)
|
||||
# Make sure it fails with no retries
|
||||
with pytest.raises(ValueError) as error:
|
||||
# Check that the logs say that the download failed n times
|
||||
with capture_log() as log_file:
|
||||
pup.fetch("tiny-data.txt")
|
||||
logs = log_file.getvalue().strip().split("\n")
|
||||
assert len(logs) == 2
|
||||
assert logs[0].startswith("Downloading")
|
||||
assert logs[0].endswith(f"'{path}'.")
|
||||
assert "Failed to download" in logs[1]
|
||||
assert "download again 1 more time." in logs[1]
|
||||
assert "does not match the known hash" in str(error)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_pooch_logging_level():
|
||||
"Setup a pooch and check that no logging happens when the level is raised"
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
urls = {"tiny-data.txt": BASEURL + "tiny-data.txt"}
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(path=path, base_url="", registry=REGISTRY, urls=urls)
|
||||
# Capture only critical logging events
|
||||
with capture_log("CRITICAL") as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
assert log_file.getvalue() == ""
|
||||
check_tiny_data(fname)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_pooch_update():
|
||||
"Setup a pooch that already has the local data but the file is outdated"
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
# Create a dummy version of tiny-data.txt that is different from the
|
||||
# one in the remote storage
|
||||
true_path = str(path / "tiny-data.txt")
|
||||
with open(true_path, "w", encoding="utf-8") as fin:
|
||||
fin.write("different data")
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
||||
# Check that the logs say that the file is being updated
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Updating"
|
||||
assert logs.split()[-1] == f"'{path}'."
|
||||
# Check that the updated file has the right content
|
||||
assert true_path == fname
|
||||
check_tiny_data(fname)
|
||||
assert file_hash(fname) == REGISTRY["tiny-data.txt"]
|
||||
# Check that no logging happens when not downloading
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
assert log_file.getvalue() == ""
|
||||
|
||||
|
||||
def test_pooch_update_disallowed():
|
||||
"Test that disallowing updates works."
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
# Create a dummy version of tiny-data.txt that is different from the
|
||||
# one in the remote storage
|
||||
true_path = str(path / "tiny-data.txt")
|
||||
with open(true_path, "w", encoding="utf-8") as fin:
|
||||
fin.write("different data")
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(
|
||||
path=path,
|
||||
base_url=BASEURL,
|
||||
registry=REGISTRY,
|
||||
allow_updates=False,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
pup.fetch("tiny-data.txt")
|
||||
|
||||
|
||||
def test_pooch_update_disallowed_environment():
|
||||
"Test that disallowing updates works through an environment variable."
|
||||
variable_name = "MYPROJECT_DISALLOW_UPDATES"
|
||||
try:
|
||||
os.environ[variable_name] = "False"
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
# Create a dummy version of tiny-data.txt that is different from
|
||||
# the one in the remote storage
|
||||
true_path = str(path / "tiny-data.txt")
|
||||
with open(true_path, "w", encoding="utf-8") as fin:
|
||||
fin.write("different data")
|
||||
# Setup a pooch in a temp dir
|
||||
pup = create(
|
||||
path=path,
|
||||
base_url=BASEURL,
|
||||
registry=REGISTRY,
|
||||
allow_updates=variable_name,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
pup.fetch("tiny-data.txt")
|
||||
finally:
|
||||
os.environ.pop(variable_name)
|
||||
|
||||
|
||||
def test_pooch_create_base_url_no_trailing_slash():
|
||||
"""
|
||||
Test if pooch.create appends a trailing slash to the base url if missing
|
||||
"""
|
||||
base_url = "https://mybase.url"
|
||||
pup = create(base_url=base_url, registry=None, path=DATA_DIR)
|
||||
assert pup.base_url == base_url + "/"
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_pooch_corrupted(data_dir_mirror):
|
||||
"Raise an exception if the file hash doesn't match the registry"
|
||||
# Test the case where the file wasn't in the directory
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = os.path.abspath(local_store)
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY_CORRUPTED)
|
||||
with capture_log() as log_file:
|
||||
with pytest.raises(ValueError) as error:
|
||||
pup.fetch("tiny-data.txt")
|
||||
assert "(tiny-data.txt)" in str(error.value)
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Downloading"
|
||||
assert logs.split()[-1] == f"'{path}'."
|
||||
# and the case where the file exists but hash doesn't match
|
||||
pup = Pooch(path=data_dir_mirror, base_url=BASEURL, registry=REGISTRY_CORRUPTED)
|
||||
with capture_log() as log_file:
|
||||
with pytest.raises(ValueError) as error:
|
||||
pup.fetch("tiny-data.txt")
|
||||
assert "(tiny-data.txt)" in str(error.value)
|
||||
logs = log_file.getvalue()
|
||||
assert logs.split()[0] == "Updating"
|
||||
assert logs.split()[-1] == f"'{data_dir_mirror}'."
|
||||
|
||||
|
||||
def test_pooch_file_not_in_registry():
|
||||
"Should raise an exception if the file is not in the registry."
|
||||
pup = Pooch(
|
||||
path="it shouldn't matter", base_url="this shouldn't either", registry=REGISTRY
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
pup.fetch("this-file-does-not-exit.csv")
|
||||
|
||||
|
||||
def test_pooch_load_registry():
|
||||
"Loading the registry from a file should work"
|
||||
pup = Pooch(path="", base_url="")
|
||||
pup.load_registry(os.path.join(DATA_DIR, "registry.txt"))
|
||||
assert pup.registry == REGISTRY
|
||||
assert pup.registry_files.sort() == list(REGISTRY).sort()
|
||||
|
||||
|
||||
def test_pooch_load_registry_comments():
|
||||
"Loading the registry from a file and strip line comments"
|
||||
pup = Pooch(path="", base_url="")
|
||||
pup.load_registry(os.path.join(DATA_DIR, "registry_comments.txt"))
|
||||
assert pup.registry == REGISTRY
|
||||
assert pup.registry_files.sort() == list(REGISTRY).sort()
|
||||
|
||||
|
||||
def test_pooch_load_registry_fileobj():
|
||||
"Loading the registry from a file object"
|
||||
path = os.path.join(DATA_DIR, "registry.txt")
|
||||
|
||||
# Binary mode
|
||||
pup = Pooch(path="", base_url="")
|
||||
with open(path, "rb") as fin:
|
||||
pup.load_registry(fin)
|
||||
assert pup.registry == REGISTRY
|
||||
assert pup.registry_files.sort() == list(REGISTRY).sort()
|
||||
|
||||
# Text mode
|
||||
pup = Pooch(path="", base_url="")
|
||||
with open(path, "r", encoding="utf-8") as fin:
|
||||
pup.load_registry(fin)
|
||||
assert pup.registry == REGISTRY
|
||||
assert pup.registry_files.sort() == list(REGISTRY).sort()
|
||||
|
||||
|
||||
def test_pooch_load_registry_custom_url():
|
||||
"Load the registry from a file with a custom URL inserted"
|
||||
pup = Pooch(path="", base_url="")
|
||||
pup.load_registry(os.path.join(DATA_DIR, "registry-custom-url.txt"))
|
||||
assert pup.registry == REGISTRY
|
||||
assert pup.urls == {"tiny-data.txt": "https://some-site/tiny-data.txt"}
|
||||
|
||||
|
||||
def test_pooch_load_registry_invalid_line():
|
||||
"Should raise an exception when a line doesn't have two elements"
|
||||
pup = Pooch(path="", base_url="", registry={})
|
||||
with pytest.raises(IOError):
|
||||
pup.load_registry(os.path.join(DATA_DIR, "registry-invalid.txt"))
|
||||
|
||||
|
||||
def test_pooch_load_registry_with_spaces():
|
||||
"Should check that spaces in filenames are allowed in registry files"
|
||||
pup = Pooch(path="", base_url="")
|
||||
pup.load_registry(os.path.join(DATA_DIR, "registry-spaces.txt"))
|
||||
assert "file with spaces.txt" in pup.registry
|
||||
assert "other with spaces.txt" in pup.registry
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_check_availability():
|
||||
"Should correctly check availability of existing and non existing files"
|
||||
# Check available remote file
|
||||
pup = Pooch(path=DATA_DIR, base_url=BASEURL, registry=REGISTRY)
|
||||
assert pup.is_available("tiny-data.txt")
|
||||
# Check non available remote file
|
||||
pup = Pooch(path=DATA_DIR, base_url=BASEURL + "wrong-url/", registry=REGISTRY)
|
||||
assert not pup.is_available("tiny-data.txt")
|
||||
# Wrong file name
|
||||
registry = {"not-a-real-data-file.txt": "notarealhash"}
|
||||
registry.update(REGISTRY)
|
||||
pup = Pooch(path=DATA_DIR, base_url=BASEURL, registry=registry)
|
||||
assert not pup.is_available("not-a-real-data-file.txt")
|
||||
|
||||
|
||||
def test_check_availability_on_ftp(ftpserver):
|
||||
"Should correctly check availability of existing and non existing files"
|
||||
with data_over_ftp(ftpserver, "tiny-data.txt") as url:
|
||||
# Check available remote file on FTP server
|
||||
pup = Pooch(
|
||||
path=DATA_DIR,
|
||||
base_url=url.replace("tiny-data.txt", ""),
|
||||
registry={
|
||||
"tiny-data.txt": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
|
||||
"doesnot_exist.zip": "jdjdjdjdflld",
|
||||
},
|
||||
)
|
||||
downloader = FTPDownloader(port=ftpserver.server_port)
|
||||
assert pup.is_available("tiny-data.txt", downloader=downloader)
|
||||
# Check non available remote file
|
||||
assert not pup.is_available("doesnot_exist.zip", downloader=downloader)
|
||||
|
||||
|
||||
def test_check_availability_invalid_downloader():
|
||||
"Should raise an exception if the downloader doesn't support this"
|
||||
|
||||
def downloader(url, output, pooch): # pylint: disable=unused-argument
|
||||
"A downloader that doesn't support check_only"
|
||||
return None
|
||||
|
||||
pup = Pooch(path=DATA_DIR, base_url=BASEURL, registry=REGISTRY)
|
||||
msg = "does not support availability checks."
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
pup.is_available("tiny-data.txt", downloader=downloader)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_fetch_with_downloader(capsys):
|
||||
"Setup a downloader function for fetch"
|
||||
|
||||
def download(url, output_file, pup): # pylint: disable=unused-argument
|
||||
"Download through HTTP and warn that we're doing it"
|
||||
get_logger().info("downloader executed")
|
||||
HTTPDownloader()(url, output_file, pup)
|
||||
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
||||
# Check that the logs say that the file is being downloaded
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("large-data.txt", downloader=download)
|
||||
logs = log_file.getvalue()
|
||||
lines = logs.splitlines()
|
||||
assert len(lines) == 2
|
||||
assert lines[0].split()[0] == "Downloading"
|
||||
assert lines[1] == "downloader executed"
|
||||
# Read stderr and make sure no progress bar was printed by default
|
||||
assert not capsys.readouterr().err
|
||||
# Check that the downloaded file has the right content
|
||||
check_large_data(fname)
|
||||
# Check that no logging happens when not downloading
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("large-data.txt")
|
||||
assert log_file.getvalue() == ""
|
||||
|
||||
|
||||
def test_invalid_hash_alg(data_dir_mirror):
|
||||
"Test an invalid hashing algorithm"
|
||||
pup = Pooch(
|
||||
path=data_dir_mirror, base_url=BASEURL, registry={"tiny-data.txt": "blah:1234"}
|
||||
)
|
||||
with pytest.raises(ValueError) as exc:
|
||||
pup.fetch("tiny-data.txt")
|
||||
|
||||
assert "'blah'" in str(exc.value)
|
||||
|
||||
|
||||
def test_alternative_hashing_algorithms(data_dir_mirror):
|
||||
"Test different hashing algorithms using local data"
|
||||
fname = str(data_dir_mirror / "tiny-data.txt")
|
||||
check_tiny_data(fname)
|
||||
with open(fname, "rb") as fin:
|
||||
data = fin.read()
|
||||
for alg in ("sha512", "md5"):
|
||||
hasher = hashlib.new(alg)
|
||||
hasher.update(data)
|
||||
registry = {"tiny-data.txt": f"{alg}:{hasher.hexdigest()}"}
|
||||
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry=registry)
|
||||
assert fname == pup.fetch("tiny-data.txt")
|
||||
check_tiny_data(fname)
|
||||
|
||||
|
||||
def test_download_action():
|
||||
"Test that the right action is performed based on file existing"
|
||||
action, verb = download_action(
|
||||
Path("this_file_does_not_exist.txt"), known_hash=None
|
||||
)
|
||||
assert action == "download"
|
||||
assert verb == "Downloading"
|
||||
|
||||
with temporary_file() as tmp:
|
||||
action, verb = download_action(Path(tmp), known_hash="not the correct hash")
|
||||
assert action == "update"
|
||||
assert verb == "Updating"
|
||||
|
||||
with temporary_file() as tmp:
|
||||
with open(tmp, "w", encoding="utf-8") as output:
|
||||
output.write("some data")
|
||||
action, verb = download_action(Path(tmp), known_hash=file_hash(tmp))
|
||||
assert action == "fetch"
|
||||
assert verb == "Fetching"
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize("fname", ["tiny-data.txt", "subdir/tiny-data.txt"])
|
||||
def test_stream_download(fname):
|
||||
"Check that downloading a file over HTTP works as expected"
|
||||
# Use the data in store/ because the subdir is in there for some reason
|
||||
url = BASEURL + "store/" + fname
|
||||
known_hash = REGISTRY[fname]
|
||||
downloader = HTTPDownloader()
|
||||
with TemporaryDirectory() as local_store:
|
||||
destination = Path(local_store) / fname
|
||||
assert not destination.exists()
|
||||
stream_download(url, destination, known_hash, downloader, pooch=None)
|
||||
assert destination.exists()
|
||||
check_tiny_data(str(destination))
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"url",
|
||||
[pytest.param(FIGSHAREURL, marks=pytest.mark.figshare), ZENODOURL, DATAVERSEURL],
|
||||
ids=["figshare", "zenodo", "dataverse"],
|
||||
)
|
||||
def test_load_registry_from_doi(url):
|
||||
"""Check that the registry is correctly populated from the API"""
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = os.path.abspath(local_store)
|
||||
pup = Pooch(path=path, base_url=url)
|
||||
pup.load_registry_from_doi()
|
||||
|
||||
# Check the existence of all files in the registry
|
||||
assert len(pup.registry) == 2
|
||||
assert "tiny-data.txt" in pup.registry
|
||||
assert "store.zip" in pup.registry
|
||||
|
||||
# Ensure that all files have correct checksums by fetching them
|
||||
for filename in pup.registry:
|
||||
pup.fetch(filename)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_load_registry_from_doi_zenodo_with_slash():
|
||||
"""
|
||||
Check that the registry is correctly populated from the Zenodo API when
|
||||
the filename contains a slash
|
||||
"""
|
||||
url = ZENODOURL_W_SLASH
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = os.path.abspath(local_store)
|
||||
pup = Pooch(path=path, base_url=url)
|
||||
pup.load_registry_from_doi()
|
||||
|
||||
# Check the existence of all files in the registry
|
||||
assert len(pup.registry) == 1
|
||||
assert "santisoler/pooch-test-data-v1.zip" in pup.registry
|
||||
|
||||
# Ensure that all files have correct checksums by fetching them
|
||||
for filename in pup.registry:
|
||||
pup.fetch(filename)
|
||||
|
||||
|
||||
def test_wrong_load_registry_from_doi():
|
||||
"""Check that non-DOI URLs produce an error"""
|
||||
|
||||
pup = Pooch(path="", base_url=BASEURL)
|
||||
|
||||
with pytest.raises(ValueError) as exc:
|
||||
pup.load_registry_from_doi()
|
||||
|
||||
assert "only implemented for DOIs" in str(exc.value)
|
||||
@@ -0,0 +1,582 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
"""
|
||||
Test the downloader classes and functions separately from the Pooch core.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
import pytest
|
||||
from requests import HTTPError
|
||||
|
||||
# Mypy doesn't like assigning None like this.
|
||||
# Can just use a guard variable
|
||||
|
||||
try:
|
||||
import tqdm
|
||||
except ImportError:
|
||||
tqdm = None # type: ignore
|
||||
|
||||
try:
|
||||
import paramiko
|
||||
except ImportError:
|
||||
paramiko = None # type: ignore
|
||||
|
||||
from .. import Pooch
|
||||
from ..downloaders import (
|
||||
HTTPDownloader,
|
||||
FTPDownloader,
|
||||
SFTPDownloader,
|
||||
DOIDownloader,
|
||||
choose_downloader,
|
||||
FigshareRepository,
|
||||
ZenodoRepository,
|
||||
DataverseRepository,
|
||||
doi_to_url,
|
||||
REQUESTS_HEADERS,
|
||||
)
|
||||
from ..processors import Unzip
|
||||
from .utils import (
|
||||
pooch_test_url,
|
||||
check_large_data,
|
||||
check_tiny_data,
|
||||
data_over_ftp,
|
||||
pooch_test_figshare_url,
|
||||
pooch_test_zenodo_url,
|
||||
pooch_test_zenodo_with_slash_url,
|
||||
pooch_test_dataverse_url,
|
||||
)
|
||||
|
||||
|
||||
BASEURL = pooch_test_url()
|
||||
FIGSHAREURL = pooch_test_figshare_url()
|
||||
ZENODOURL = pooch_test_zenodo_url()
|
||||
ZENODOURL_W_SLASH = pooch_test_zenodo_with_slash_url()
|
||||
DATAVERSEURL = pooch_test_dataverse_url()
|
||||
|
||||
|
||||
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
|
||||
@pytest.mark.parametrize(
|
||||
"url",
|
||||
[
|
||||
BASEURL + "tiny-data.txt", # HTTPDownloader
|
||||
ZENODOURL, # DOIDownloader
|
||||
],
|
||||
)
|
||||
def test_progressbar_kwarg_passed(url):
|
||||
"""The progressbar keyword argument must pass through choose_downloader"""
|
||||
downloader = choose_downloader(url, progressbar=True)
|
||||
assert downloader.progressbar is True
|
||||
|
||||
|
||||
@pytest.mark.skipif(paramiko is None, reason="requires paramiko")
|
||||
def test_progressbar_kwarg_passed_sftp():
|
||||
"""The progressbar keyword argument must pass through choose_downloader"""
|
||||
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
|
||||
downloader = choose_downloader(url, progressbar=True)
|
||||
assert downloader.progressbar is True
|
||||
|
||||
|
||||
def test_unsupported_protocol():
|
||||
"Should raise ValueError when protocol is not supported"
|
||||
with pytest.raises(ValueError):
|
||||
choose_downloader("httpup://some-invalid-url.com")
|
||||
# Simulate the DOI format
|
||||
with pytest.raises(ValueError):
|
||||
choose_downloader("doii:XXX/XXX/file")
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_invalid_doi_repository():
|
||||
"Should fail if data repository is not supported"
|
||||
with pytest.raises(ValueError) as exc:
|
||||
# Use the DOI of the Pooch paper in JOSS (not a data repository)
|
||||
DOIDownloader()(
|
||||
url="doi:10.21105/joss.01943/file_name.txt", output_file=None, pooch=None
|
||||
)
|
||||
assert "Invalid data repository 'joss.theoj.org'" in str(exc.value)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_doi_url_not_found():
|
||||
"Should fail if the DOI is not found"
|
||||
with pytest.raises(HTTPError):
|
||||
doi_to_url(doi="NOTAREALDOI")
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"repository,doi",
|
||||
[
|
||||
pytest.param(
|
||||
FigshareRepository,
|
||||
"10.6084/m9.figshare.14763051.v1",
|
||||
marks=pytest.mark.figshare,
|
||||
),
|
||||
(ZenodoRepository, "10.5281/zenodo.4924875"),
|
||||
(DataverseRepository, "10.11588/data/TKCFEF"),
|
||||
],
|
||||
ids=["figshare", "zenodo", "dataverse"],
|
||||
)
|
||||
def test_figshare_url_file_not_found(repository, doi):
|
||||
"Should fail if the file is not found in the archive"
|
||||
with pytest.raises(ValueError) as exc:
|
||||
url = doi_to_url(doi)
|
||||
repo = repository.initialize(doi, url)
|
||||
repo.download_url(file_name="bla.txt")
|
||||
assert "File 'bla.txt' not found" in str(exc.value)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"url",
|
||||
[pytest.param(FIGSHAREURL, marks=pytest.mark.figshare), ZENODOURL, DATAVERSEURL],
|
||||
ids=["figshare", "zenodo", "dataverse"],
|
||||
)
|
||||
def test_doi_downloader(url):
|
||||
"Test the DOI downloader"
|
||||
# Use the test data we have on the repository
|
||||
with TemporaryDirectory() as local_store:
|
||||
downloader = DOIDownloader()
|
||||
outfile = os.path.join(local_store, "tiny-data.txt")
|
||||
downloader(url + "tiny-data.txt", outfile, None)
|
||||
check_tiny_data(outfile)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_zenodo_downloader_with_slash_in_fname():
|
||||
"""
|
||||
Test the Zenodo downloader when the path contains a forward slash
|
||||
|
||||
Related to issue #336
|
||||
"""
|
||||
# Use the test data we have on the repository
|
||||
with TemporaryDirectory() as local_store:
|
||||
base_url = ZENODOURL_W_SLASH + "santisoler/pooch-test-data-v1.zip"
|
||||
downloader = DOIDownloader()
|
||||
outfile = os.path.join(local_store, "test-data.zip")
|
||||
downloader(base_url, outfile, None)
|
||||
# unpack the downloaded zip file so we can check the integrity of
|
||||
# tiny-data.txt
|
||||
fnames = Unzip()(outfile, action="download", pooch=None)
|
||||
(fname,) = [f for f in fnames if "tiny-data.txt" in f]
|
||||
check_tiny_data(fname)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.figshare
|
||||
def test_figshare_unspecified_version():
|
||||
"""
|
||||
Test if passing a Figshare url without a version warns about it, but still
|
||||
downloads it.
|
||||
"""
|
||||
url = FIGSHAREURL
|
||||
# Remove the last bits of the doi, where the version is specified and
|
||||
url = url[: url.rindex(".")] + "/"
|
||||
# Create expected warning message
|
||||
doi = url[4:-1]
|
||||
warning_msg = f"The Figshare DOI '{doi}' doesn't specify which version of "
|
||||
with TemporaryDirectory() as local_store:
|
||||
downloader = DOIDownloader()
|
||||
outfile = os.path.join(local_store, "tiny-data.txt")
|
||||
with pytest.warns(UserWarning, match=warning_msg):
|
||||
downloader(url + "tiny-data.txt", outfile, None)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.figshare
|
||||
@pytest.mark.parametrize(
|
||||
"version, missing, present",
|
||||
[
|
||||
(
|
||||
1,
|
||||
"LC08_L2SP_218074_20190114_20200829_02_T1-cropped.tar.gz",
|
||||
"cropped-before.tar.gz",
|
||||
),
|
||||
(
|
||||
2,
|
||||
"cropped-before.tar.gz",
|
||||
"LC08_L2SP_218074_20190114_20200829_02_T1-cropped.tar.gz",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_figshare_data_repository_versions(version, missing, present):
|
||||
"""
|
||||
Test if setting the version in Figshare DOI works as expected
|
||||
"""
|
||||
# Use a Figshare repo as example (we won't download files from it since
|
||||
# they are too big)
|
||||
doi = f"10.6084/m9.figshare.21665630.v{version}"
|
||||
url = f"https://doi.org/{doi}/"
|
||||
figshare = FigshareRepository(doi, url)
|
||||
filenames = [item["name"] for item in figshare.api_response]
|
||||
assert present in filenames
|
||||
assert missing not in filenames
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_ftp_downloader(ftpserver):
|
||||
"Test ftp downloader"
|
||||
with data_over_ftp(ftpserver, "tiny-data.txt") as url:
|
||||
with TemporaryDirectory() as local_store:
|
||||
downloader = FTPDownloader(port=ftpserver.server_port)
|
||||
outfile = os.path.join(local_store, "tiny-data.txt")
|
||||
downloader(url, outfile, None)
|
||||
check_tiny_data(outfile)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(paramiko is None, reason="requires paramiko to run SFTP")
|
||||
def test_sftp_downloader():
|
||||
"Test sftp downloader"
|
||||
with TemporaryDirectory() as local_store:
|
||||
downloader = SFTPDownloader(username="demo", password="password")
|
||||
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
|
||||
outfile = os.path.join(local_store, "pocketftp.png")
|
||||
downloader(url, outfile, None)
|
||||
assert os.path.exists(outfile)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(paramiko is None, reason="requires paramiko to run SFTP")
|
||||
def test_sftp_downloader_fail_if_file_object():
|
||||
"Downloader should fail when a file object rather than string is passed"
|
||||
with TemporaryDirectory() as local_store:
|
||||
downloader = SFTPDownloader(username="demo", password="password")
|
||||
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
|
||||
outfile = os.path.join(local_store, "pocketftp.png")
|
||||
with open(outfile, "wb") as outfile_obj:
|
||||
with pytest.raises(TypeError):
|
||||
downloader(url, outfile_obj, None)
|
||||
|
||||
|
||||
@pytest.mark.skipif(paramiko is not None, reason="paramiko must be missing")
|
||||
def test_sftp_downloader_fail_if_paramiko_missing():
|
||||
"test must fail if paramiko is not installed"
|
||||
with pytest.raises(ValueError) as exc:
|
||||
SFTPDownloader()
|
||||
assert "'paramiko'" in str(exc.value)
|
||||
|
||||
|
||||
@pytest.mark.skipif(tqdm is not None, reason="tqdm must be missing")
|
||||
@pytest.mark.parametrize("downloader", [HTTPDownloader, FTPDownloader, SFTPDownloader])
|
||||
def test_downloader_progressbar_fails(downloader):
|
||||
"Make sure an error is raised if trying to use progressbar without tqdm"
|
||||
with pytest.raises(ValueError) as exc:
|
||||
downloader(progressbar=True)
|
||||
assert "'tqdm'" in str(exc.value)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
|
||||
@pytest.mark.parametrize(
|
||||
"url,downloader",
|
||||
[
|
||||
(BASEURL, HTTPDownloader),
|
||||
pytest.param(FIGSHAREURL, DOIDownloader, marks=pytest.mark.figshare),
|
||||
],
|
||||
ids=["http", "figshare"],
|
||||
)
|
||||
def test_downloader_progressbar(url, downloader, capsys):
|
||||
"Setup a downloader function that prints a progress bar for fetch"
|
||||
download = downloader(progressbar=True)
|
||||
with TemporaryDirectory() as local_store:
|
||||
fname = "tiny-data.txt"
|
||||
url = url + fname
|
||||
outfile = os.path.join(local_store, fname)
|
||||
download(url, outfile, None)
|
||||
# Read stderr and make sure the progress bar is printed only when told
|
||||
captured = capsys.readouterr()
|
||||
printed = captured.err.split("\r")[-1].strip()
|
||||
assert len(printed) == 79
|
||||
if sys.platform == "win32":
|
||||
progress = "100%|####################"
|
||||
else:
|
||||
progress = "100%|████████████████████"
|
||||
# Bar size is not always the same so can't reliably test the whole bar.
|
||||
assert printed[:25] == progress
|
||||
# Check that the downloaded file has the right content
|
||||
check_tiny_data(outfile)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
|
||||
def test_downloader_progressbar_ftp(capsys, ftpserver):
|
||||
"Setup an FTP downloader function that prints a progress bar for fetch"
|
||||
with data_over_ftp(ftpserver, "tiny-data.txt") as url:
|
||||
download = FTPDownloader(progressbar=True, port=ftpserver.server_port)
|
||||
with TemporaryDirectory() as local_store:
|
||||
outfile = os.path.join(local_store, "tiny-data.txt")
|
||||
download(url, outfile, None)
|
||||
# Read stderr and make sure the progress bar is printed only when
|
||||
# told
|
||||
captured = capsys.readouterr()
|
||||
printed = captured.err.split("\r")[-1].strip()
|
||||
assert len(printed) == 79
|
||||
if sys.platform == "win32":
|
||||
progress = "100%|####################"
|
||||
else:
|
||||
progress = "100%|████████████████████"
|
||||
# Bar size is not always the same so can't reliably test the whole
|
||||
# bar.
|
||||
assert printed[:25] == progress
|
||||
# Check that the file was actually downloaded
|
||||
check_tiny_data(outfile)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.skipif(tqdm is None, reason="requires tqdm")
|
||||
@pytest.mark.skipif(paramiko is None, reason="requires paramiko")
|
||||
def test_downloader_progressbar_sftp(capsys):
|
||||
"Setup an SFTP downloader function that prints a progress bar for fetch"
|
||||
downloader = SFTPDownloader(progressbar=True, username="demo", password="password")
|
||||
with TemporaryDirectory() as local_store:
|
||||
url = "sftp://test.rebex.net/pub/example/pocketftp.png"
|
||||
outfile = os.path.join(local_store, "pocketftp.png")
|
||||
downloader(url, outfile, None)
|
||||
# Read stderr and make sure the progress bar is printed only when told
|
||||
captured = capsys.readouterr()
|
||||
printed = captured.err.split("\r")[-1].strip()
|
||||
assert len(printed) == 79
|
||||
if sys.platform == "win32":
|
||||
progress = "100%|####################"
|
||||
else:
|
||||
progress = "100%|████████████████████"
|
||||
# Bar size is not always the same so can't reliably test the whole bar.
|
||||
assert printed[:25] == progress
|
||||
# Check that the file was actually downloaded
|
||||
assert os.path.exists(outfile)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_downloader_arbitrary_progressbar(capsys):
|
||||
"Setup a downloader function with an arbitrary progress bar class."
|
||||
|
||||
class MinimalProgressDisplay:
|
||||
"""A minimalist replacement for tqdm.tqdm"""
|
||||
|
||||
def __init__(self, total):
|
||||
self.count = 0
|
||||
self.total = total
|
||||
|
||||
def __repr__(self):
|
||||
"""represent current completion"""
|
||||
return str(self.count) + "/" + str(self.total)
|
||||
|
||||
def render(self):
|
||||
"""print self.__repr__ to stderr"""
|
||||
print(f"\r{self}", file=sys.stderr, end="")
|
||||
|
||||
def update(self, i):
|
||||
"""modify completion and render"""
|
||||
self.count = i
|
||||
self.render()
|
||||
|
||||
def reset(self):
|
||||
"""set counter to 0"""
|
||||
self.count = 0
|
||||
|
||||
@staticmethod
|
||||
def close():
|
||||
"""print a new empty line"""
|
||||
print("", file=sys.stderr)
|
||||
|
||||
pbar = MinimalProgressDisplay(total=None)
|
||||
download = HTTPDownloader(progressbar=pbar)
|
||||
with TemporaryDirectory() as local_store:
|
||||
fname = "large-data.txt"
|
||||
url = BASEURL + fname
|
||||
outfile = os.path.join(local_store, "large-data.txt")
|
||||
download(url, outfile, None)
|
||||
# Read stderr and make sure the progress bar is printed only when told
|
||||
captured = capsys.readouterr()
|
||||
printed = captured.err.split("\r")[-1].strip()
|
||||
|
||||
progress = "336/336"
|
||||
assert printed == progress
|
||||
|
||||
# Check that the downloaded file has the right content
|
||||
check_large_data(outfile)
|
||||
|
||||
|
||||
class TestZenodoAPISupport:
|
||||
"""
|
||||
Test support for different Zenodo APIs
|
||||
"""
|
||||
|
||||
article_id = 123456
|
||||
doi = f"10.0001/zenodo.{article_id}"
|
||||
doi_url = f"https://doi.org/{doi}"
|
||||
file_name = "my-file.zip"
|
||||
file_url = (
|
||||
"https://zenodo.org/api/files/513d7033-93a2-4eeb-821c-2fb0bbab0012/my-file.zip"
|
||||
)
|
||||
file_checksum = "2942bfabb3d05332b66eb128e0842cff"
|
||||
|
||||
legacy_api_response = {
|
||||
"created": "2021-20-19T08:00:00.000000+00:00",
|
||||
"modified": "2021-20-19T08:00:00.000000+00:00",
|
||||
"id": article_id,
|
||||
"doi": doi,
|
||||
"doi_url": doi_url,
|
||||
"files": [
|
||||
{
|
||||
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
|
||||
"key": file_name,
|
||||
"checksum": f"md5:{file_checksum}",
|
||||
"links": {
|
||||
"self": file_url,
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
new_api_response = {
|
||||
"created": "2021-20-19T08:00:00.000000+00:00",
|
||||
"modified": "2021-20-19T08:00:00.000000+00:00",
|
||||
"id": article_id,
|
||||
"doi": doi,
|
||||
"doi_url": doi_url,
|
||||
"files": [
|
||||
{
|
||||
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
|
||||
"filename": file_name,
|
||||
"checksum": file_checksum,
|
||||
"links": {
|
||||
"self": file_url,
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
invalid_api_response = {
|
||||
"created": "2021-20-19T08:00:00.000000+00:00",
|
||||
"modified": "2021-20-19T08:00:00.000000+00:00",
|
||||
"id": article_id,
|
||||
"doi": doi,
|
||||
"doi_url": doi_url,
|
||||
"files": [
|
||||
{
|
||||
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
|
||||
"filename": file_name,
|
||||
"checksum": file_checksum,
|
||||
"links": {
|
||||
"self": file_url,
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "513d7033-93a2-4eeb-821c-2fb0bbab0012",
|
||||
"key": file_name,
|
||||
"checksum": f"md5:{file_checksum}",
|
||||
"links": {
|
||||
"self": file_url,
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"api_version, api_response",
|
||||
[
|
||||
("legacy", legacy_api_response),
|
||||
("new", new_api_response),
|
||||
("invalid", invalid_api_response),
|
||||
],
|
||||
)
|
||||
def test_api_version(self, httpserver, api_version, api_response):
|
||||
"""
|
||||
Test if the API version is correctly detected.
|
||||
"""
|
||||
# Create a local http server
|
||||
httpserver.expect_request(f"/zenodo.{self.article_id}").respond_with_json(
|
||||
api_response
|
||||
)
|
||||
# Create Zenodo downloader
|
||||
downloader = ZenodoRepository(doi=self.doi, archive_url=self.doi_url)
|
||||
# Override base url for the API of the downloader
|
||||
downloader.base_api_url = httpserver.url_for("")
|
||||
# Check if the API version is correctly identified
|
||||
if api_version != "invalid":
|
||||
assert downloader.api_version == api_version
|
||||
else:
|
||||
msg = "Couldn't determine the version of the Zenodo API"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
api_version = downloader.api_version
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"api_version, api_response",
|
||||
[("legacy", legacy_api_response), ("new", new_api_response)],
|
||||
)
|
||||
def test_download_url(self, httpserver, api_version, api_response):
|
||||
"""
|
||||
Test if the download url is correct for each API version.
|
||||
"""
|
||||
# Create a local http server
|
||||
httpserver.expect_request(f"/zenodo.{self.article_id}").respond_with_json(
|
||||
api_response
|
||||
)
|
||||
# Create Zenodo downloader
|
||||
downloader = ZenodoRepository(doi=self.doi, archive_url=self.doi_url)
|
||||
# Override base url for the API of the downloader
|
||||
downloader.base_api_url = httpserver.url_for("")
|
||||
# Check if the download url is correct
|
||||
download_url = downloader.download_url(file_name=self.file_name)
|
||||
if api_version == "legacy":
|
||||
assert download_url == self.file_url
|
||||
else:
|
||||
expected_url = (
|
||||
"https://zenodo.org/records/"
|
||||
f"{self.article_id}/files/{self.file_name}?download=1"
|
||||
)
|
||||
assert download_url == expected_url
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"api_response",
|
||||
[legacy_api_response, new_api_response],
|
||||
)
|
||||
def test_populate_registry(self, httpserver, tmp_path, api_response):
|
||||
"""
|
||||
Test if population of registry is correctly done for each API version.
|
||||
"""
|
||||
# Create a local http server
|
||||
httpserver.expect_request(f"/zenodo.{self.article_id}").respond_with_json(
|
||||
api_response
|
||||
)
|
||||
# Create sample pooch object
|
||||
puppy = Pooch(base_url="", path=tmp_path)
|
||||
# Create Zenodo downloader
|
||||
downloader = ZenodoRepository(doi=self.doi, archive_url=self.doi_url)
|
||||
# Override base url for the API of the downloader
|
||||
downloader.base_api_url = httpserver.url_for("")
|
||||
# Populate registry
|
||||
downloader.populate_registry(puppy)
|
||||
assert puppy.registry == {self.file_name: f"md5:{self.file_checksum}"}
|
||||
|
||||
|
||||
class TestDOIDownloaderHeaders:
|
||||
"""Test the headers argument in DOIDownloader."""
|
||||
|
||||
def test_default_headers(self):
|
||||
"""Test the default value for headers."""
|
||||
downloader = DOIDownloader()
|
||||
assert downloader.headers == REQUESTS_HEADERS
|
||||
downloader = DOIDownloader(headers=None)
|
||||
assert downloader.headers == REQUESTS_HEADERS
|
||||
|
||||
def test_overwrite_headers(self):
|
||||
"""Test overwriting for headers."""
|
||||
downloader = DOIDownloader(headers={"custom": "field"})
|
||||
expected_headers = {
|
||||
"custom": "field",
|
||||
}
|
||||
assert downloader.headers == expected_headers
|
||||
|
||||
def test_headers_empty_dict(self):
|
||||
"""Test passing an emtpy dict to headers."""
|
||||
downloader = DOIDownloader(headers={})
|
||||
assert downloader.headers == {}
|
||||
@@ -0,0 +1,204 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
# pylint: disable=redefined-outer-name
|
||||
"""
|
||||
Test the hash calculation and checking functions.
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
import pytest
|
||||
|
||||
try:
|
||||
import xxhash
|
||||
|
||||
XXHASH_MAJOR_VERSION = int(xxhash.VERSION.split(".", maxsplit=1)[0])
|
||||
except ImportError:
|
||||
xxhash = None # type: ignore[assignment]
|
||||
XXHASH_MAJOR_VERSION = 0
|
||||
|
||||
from ..core import Pooch
|
||||
from ..hashes import (
|
||||
make_registry,
|
||||
file_hash,
|
||||
hash_matches,
|
||||
)
|
||||
from .utils import check_tiny_data, mirror_directory
|
||||
|
||||
DATA_DIR = str(Path(__file__).parent / "data" / "store")
|
||||
REGISTRY = (
|
||||
"tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
|
||||
)
|
||||
REGISTRY_RECURSIVE = (
|
||||
"subdir/tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
|
||||
"tiny-data.txt baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d\n"
|
||||
)
|
||||
TINY_DATA_HASHES_HASHLIB = {
|
||||
"sha1": "c03148994acd89317915ea2f2d080d6dd127aa09",
|
||||
"sha256": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
|
||||
"md5": "70e2afd3fd7e336ae478b1e740a5f08e",
|
||||
}
|
||||
TINY_DATA_HASHES_XXH = {
|
||||
"xxh64": "f843815fe57948fa",
|
||||
"xxh32": "98d6f1a2",
|
||||
# Require xxHash > 2.0
|
||||
"xxh128": "0267d220db258fffb0c567c0ecd1b689",
|
||||
"xxh3_128": "0267d220db258fffb0c567c0ecd1b689",
|
||||
"xxh3_64": "811e3f2a12aec53f",
|
||||
}
|
||||
TINY_DATA_HASHES = TINY_DATA_HASHES_HASHLIB.copy()
|
||||
TINY_DATA_HASHES.update(TINY_DATA_HASHES_XXH)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data_dir_mirror(tmp_path):
|
||||
"""
|
||||
Mirror the test data folder on a temporary directory. Needed to avoid
|
||||
permission errors when pooch is installed on a non-writable path.
|
||||
"""
|
||||
return mirror_directory(DATA_DIR, tmp_path)
|
||||
|
||||
|
||||
def test_make_registry(data_dir_mirror):
|
||||
"Check that the registry builder creates the right file names and hashes"
|
||||
outfile = NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with
|
||||
# Need to close the file before writing to it.
|
||||
outfile.close()
|
||||
try:
|
||||
make_registry(data_dir_mirror, outfile.name, recursive=False)
|
||||
with open(outfile.name, encoding="utf-8") as fout:
|
||||
registry = fout.read()
|
||||
assert registry == REGISTRY
|
||||
# Check that the registry can be used.
|
||||
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry={})
|
||||
pup.load_registry(outfile.name)
|
||||
true = str(data_dir_mirror / "tiny-data.txt")
|
||||
fname = pup.fetch("tiny-data.txt")
|
||||
assert true == fname
|
||||
check_tiny_data(fname)
|
||||
finally:
|
||||
os.remove(outfile.name)
|
||||
|
||||
|
||||
def test_make_registry_recursive(data_dir_mirror):
|
||||
"Check that the registry builder works in recursive mode"
|
||||
outfile = NamedTemporaryFile(delete=False) # pylint: disable=consider-using-with
|
||||
# Need to close the file before writing to it.
|
||||
outfile.close()
|
||||
try:
|
||||
make_registry(data_dir_mirror, outfile.name, recursive=True)
|
||||
with open(outfile.name, encoding="utf-8") as fout:
|
||||
registry = fout.read()
|
||||
assert registry == REGISTRY_RECURSIVE
|
||||
# Check that the registry can be used.
|
||||
pup = Pooch(path=data_dir_mirror, base_url="some bogus URL", registry={})
|
||||
pup.load_registry(outfile.name)
|
||||
assert str(data_dir_mirror / "tiny-data.txt") == pup.fetch("tiny-data.txt")
|
||||
check_tiny_data(pup.fetch("tiny-data.txt"))
|
||||
true = str(data_dir_mirror / "subdir" / "tiny-data.txt")
|
||||
assert true == pup.fetch("subdir/tiny-data.txt")
|
||||
check_tiny_data(pup.fetch("subdir/tiny-data.txt"))
|
||||
finally:
|
||||
os.remove(outfile.name)
|
||||
|
||||
|
||||
def test_file_hash_invalid_algorithm():
|
||||
"Test an invalid hashing algorithm"
|
||||
with pytest.raises(ValueError) as exc:
|
||||
file_hash(fname="something", alg="blah")
|
||||
assert "'blah'" in str(exc.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"alg,expected_hash",
|
||||
list(TINY_DATA_HASHES.items()),
|
||||
ids=list(TINY_DATA_HASHES.keys()),
|
||||
)
|
||||
def test_file_hash(alg, expected_hash):
|
||||
"Test the hash calculation using hashlib and xxhash"
|
||||
if alg.startswith("xxh"):
|
||||
if xxhash is None:
|
||||
pytest.skip("requires xxhash")
|
||||
if alg not in ["xxh64", "xxh32"] and XXHASH_MAJOR_VERSION < 2:
|
||||
pytest.skip("requires xxhash > 2.0")
|
||||
fname = os.path.join(DATA_DIR, "tiny-data.txt")
|
||||
check_tiny_data(fname)
|
||||
returned_hash = file_hash(fname, alg)
|
||||
assert returned_hash == expected_hash
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"alg,expected_hash",
|
||||
list(TINY_DATA_HASHES.items()),
|
||||
ids=list(TINY_DATA_HASHES.keys()),
|
||||
)
|
||||
def test_hash_matches(alg, expected_hash):
|
||||
"Make sure the hash checking function works"
|
||||
if alg.startswith("xxh"):
|
||||
if xxhash is None:
|
||||
pytest.skip("requires xxhash")
|
||||
if alg not in ["xxh64", "xxh32"] and XXHASH_MAJOR_VERSION < 2:
|
||||
pytest.skip("requires xxhash > 2.0")
|
||||
fname = os.path.join(DATA_DIR, "tiny-data.txt")
|
||||
check_tiny_data(fname)
|
||||
# Check if the check passes
|
||||
known_hash = f"{alg}:{expected_hash}"
|
||||
assert hash_matches(fname, known_hash)
|
||||
# And also if it fails
|
||||
known_hash = f"{alg}:blablablabla"
|
||||
assert not hash_matches(fname, known_hash)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"alg,expected_hash",
|
||||
list(TINY_DATA_HASHES_HASHLIB.items()),
|
||||
ids=list(TINY_DATA_HASHES_HASHLIB.keys()),
|
||||
)
|
||||
def test_hash_matches_strict(alg, expected_hash):
|
||||
"Make sure the hash checking function raises an exception if strict"
|
||||
fname = os.path.join(DATA_DIR, "tiny-data.txt")
|
||||
check_tiny_data(fname)
|
||||
# Check if the check passes
|
||||
known_hash = f"{alg}:{expected_hash}"
|
||||
assert hash_matches(fname, known_hash, strict=True)
|
||||
# And also if it fails
|
||||
bad_hash = f"{alg}:blablablabla"
|
||||
with pytest.raises(ValueError) as error:
|
||||
hash_matches(fname, bad_hash, strict=True, source="Neverland")
|
||||
assert "Neverland" in str(error.value)
|
||||
with pytest.raises(ValueError) as error:
|
||||
hash_matches(fname, bad_hash, strict=True, source=None)
|
||||
assert fname in str(error.value)
|
||||
|
||||
|
||||
def test_hash_matches_none():
|
||||
"The hash checking function should always returns True if known_hash=None"
|
||||
fname = os.path.join(DATA_DIR, "tiny-data.txt")
|
||||
assert hash_matches(fname, known_hash=None)
|
||||
# Should work even if the file is invalid
|
||||
assert hash_matches(fname="", known_hash=None)
|
||||
# strict should cause an error if this wasn't working
|
||||
assert hash_matches(fname, known_hash=None, strict=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"alg,expected_hash",
|
||||
list(TINY_DATA_HASHES_HASHLIB.items()),
|
||||
ids=list(TINY_DATA_HASHES_HASHLIB.keys()),
|
||||
)
|
||||
def test_hash_matches_uppercase(alg, expected_hash):
|
||||
"Hash matching should be independent of upper or lower case"
|
||||
fname = os.path.join(DATA_DIR, "tiny-data.txt")
|
||||
check_tiny_data(fname)
|
||||
# Check if the check passes
|
||||
known_hash = f"{alg}:{expected_hash.upper()}"
|
||||
assert hash_matches(fname, known_hash, strict=True)
|
||||
# And also if it fails
|
||||
with pytest.raises(ValueError) as error:
|
||||
hash_matches(fname, known_hash[:-5], strict=True, source="Neverland")
|
||||
assert "Neverland" in str(error.value)
|
||||
@@ -0,0 +1,49 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
# pylint: disable=redefined-outer-name
|
||||
"""
|
||||
Test the entire process of creating a Pooch and using it.
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from .. import create, os_cache
|
||||
from .. import __version__ as full_version
|
||||
from .utils import check_tiny_data, capture_log
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_create_and_fetch():
|
||||
"Fetch a data file from the local storage"
|
||||
path = os_cache("pooch-testing")
|
||||
if path.exists():
|
||||
shutil.rmtree(str(path))
|
||||
pup = create(
|
||||
path=path,
|
||||
base_url="https://github.com/fatiando/pooch/raw/{version}/data/",
|
||||
version=full_version,
|
||||
version_dev="main",
|
||||
env="POOCH_DATA_DIR",
|
||||
)
|
||||
# Make sure the storage isn't created until a download is required
|
||||
assert not pup.abspath.exists()
|
||||
pup.load_registry(Path(os.path.dirname(__file__), "data", "registry.txt"))
|
||||
for target in ["tiny-data.txt", "subdir/tiny-data.txt"]:
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch(target)
|
||||
assert log_file.getvalue().split()[0] == "Downloading"
|
||||
check_tiny_data(fname)
|
||||
# Now modify the file to trigger an update on the next fetch
|
||||
with open(fname, "w", encoding="utf-8") as fin:
|
||||
fin.write("The data is now different")
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch(target)
|
||||
assert log_file.getvalue().split()[0] == "Updating"
|
||||
check_tiny_data(fname)
|
||||
@@ -0,0 +1,289 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
"""
|
||||
Test the processor hooks
|
||||
"""
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
import warnings
|
||||
|
||||
import pytest
|
||||
|
||||
from .. import Pooch
|
||||
from ..processors import Unzip, Untar, Decompress
|
||||
|
||||
from .utils import pooch_test_url, pooch_test_registry, check_tiny_data, capture_log
|
||||
|
||||
|
||||
REGISTRY = pooch_test_registry()
|
||||
BASEURL = pooch_test_url()
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"method,ext,name",
|
||||
[
|
||||
("auto", "xz", None),
|
||||
("lzma", "xz", None),
|
||||
("xz", "xz", None),
|
||||
("bzip2", "bz2", None),
|
||||
("gzip", "gz", None),
|
||||
("gzip", "gz", "different-name.txt"),
|
||||
],
|
||||
ids=["auto", "lzma", "xz", "bz2", "gz", "name"],
|
||||
)
|
||||
def test_decompress(method, ext, name):
|
||||
"Check that decompression after download works for all formats"
|
||||
processor = Decompress(method=method, name=name)
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
if name is None:
|
||||
true_path = str(path / ".".join(["tiny-data.txt", ext, "decomp"]))
|
||||
else:
|
||||
true_path = str(path / name)
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
||||
# Check the logs when downloading and from the processor
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
|
||||
logs = log_file.getvalue()
|
||||
lines = logs.splitlines()
|
||||
assert len(lines) == 2
|
||||
assert lines[0].split()[0] == "Downloading"
|
||||
assert lines[-1].startswith("Decompressing")
|
||||
assert method in lines[-1]
|
||||
assert fname == true_path
|
||||
check_tiny_data(fname)
|
||||
# Check that processor doesn't execute when not downloading
|
||||
with capture_log() as log_file:
|
||||
fname = pup.fetch("tiny-data.txt." + ext, processor=processor)
|
||||
assert log_file.getvalue() == ""
|
||||
assert fname == true_path
|
||||
check_tiny_data(fname)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
def test_decompress_fails():
|
||||
"Should fail if method='auto' and no extension is given in the file name"
|
||||
with TemporaryDirectory() as local_store:
|
||||
path = Path(local_store)
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
||||
# Invalid extension
|
||||
with pytest.raises(ValueError) as exception:
|
||||
with warnings.catch_warnings():
|
||||
pup.fetch("tiny-data.txt", processor=Decompress(method="auto"))
|
||||
assert exception.value.args[0].startswith("Unrecognized file extension '.txt'")
|
||||
assert "pooch.Unzip/Untar" not in exception.value.args[0]
|
||||
# Should also fail for a bad method name
|
||||
with pytest.raises(ValueError) as exception:
|
||||
with warnings.catch_warnings():
|
||||
pup.fetch("tiny-data.txt", processor=Decompress(method="bla"))
|
||||
assert exception.value.args[0].startswith("Invalid compression method 'bla'")
|
||||
assert "pooch.Unzip/Untar" not in exception.value.args[0]
|
||||
# Point people to Untar and Unzip
|
||||
with pytest.raises(ValueError) as exception:
|
||||
with warnings.catch_warnings():
|
||||
pup.fetch("tiny-data.txt", processor=Decompress(method="zip"))
|
||||
assert exception.value.args[0].startswith("Invalid compression method 'zip'")
|
||||
assert "pooch.Unzip/Untar" in exception.value.args[0]
|
||||
with pytest.raises(ValueError) as exception:
|
||||
with warnings.catch_warnings():
|
||||
pup.fetch("store.zip", processor=Decompress(method="auto"))
|
||||
assert exception.value.args[0].startswith("Unrecognized file extension '.zip'")
|
||||
assert "pooch.Unzip/Untar" in exception.value.args[0]
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"target_path", [None, "some_custom_path"], ids=["default_path", "custom_path"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"archive,members",
|
||||
[
|
||||
("tiny-data", ["tiny-data.txt"]),
|
||||
("store", None),
|
||||
("store", ["store/tiny-data.txt"]),
|
||||
("store", ["store/subdir/tiny-data.txt"]),
|
||||
("store", ["store/subdir"]),
|
||||
("store", ["store/tiny-data.txt", "store/subdir"]),
|
||||
],
|
||||
ids=[
|
||||
"single_file",
|
||||
"archive_all",
|
||||
"archive_file",
|
||||
"archive_subdir_file",
|
||||
"archive_subdir",
|
||||
"archive_multiple",
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"processor_class,extension",
|
||||
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
||||
ids=["Unzip", "Untar"],
|
||||
)
|
||||
def test_unpacking(processor_class, extension, target_path, archive, members):
|
||||
"Tests the behaviour of processors for unpacking archives (Untar, Unzip)"
|
||||
processor = processor_class(members=members, extract_dir=target_path)
|
||||
if target_path is None:
|
||||
target_path = archive + extension + processor.suffix
|
||||
with TemporaryDirectory() as path:
|
||||
path = Path(path)
|
||||
true_paths, expected_log = _unpacking_expected_paths_and_logs(
|
||||
archive, members, path / target_path, processor_class.__name__
|
||||
)
|
||||
# Setup a pooch in a temp dir
|
||||
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
|
||||
# Capture logs and check for the right processor message
|
||||
with capture_log() as log_file:
|
||||
fnames = pup.fetch(archive + extension, processor=processor)
|
||||
assert set(fnames) == true_paths
|
||||
_check_logs(log_file, expected_log)
|
||||
for fname in fnames:
|
||||
check_tiny_data(fname)
|
||||
# Check that processor doesn't execute when not downloading
|
||||
with capture_log() as log_file:
|
||||
fnames = pup.fetch(archive + extension, processor=processor)
|
||||
assert set(fnames) == true_paths
|
||||
_check_logs(log_file, [])
|
||||
for fname in fnames:
|
||||
check_tiny_data(fname)
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"processor_class,extension",
|
||||
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
||||
)
|
||||
def test_multiple_unpacking(processor_class, extension):
|
||||
"Test that multiple subsequent calls to a processor yield correct results"
|
||||
|
||||
with TemporaryDirectory() as local_store:
|
||||
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
||||
|
||||
# Do a first fetch with the one member only
|
||||
processor1 = processor_class(members=["store/tiny-data.txt"])
|
||||
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
||||
assert len(filenames1) == 1
|
||||
check_tiny_data(filenames1[0])
|
||||
|
||||
# Do a second fetch with the other member
|
||||
processor2 = processor_class(
|
||||
members=["store/tiny-data.txt", "store/subdir/tiny-data.txt"]
|
||||
)
|
||||
filenames2 = pup.fetch("store" + extension, processor=processor2)
|
||||
assert len(filenames2) == 2
|
||||
check_tiny_data(filenames2[0])
|
||||
check_tiny_data(filenames2[1])
|
||||
|
||||
# Do a third fetch, again with one member and assert
|
||||
# that only this member was returned
|
||||
filenames3 = pup.fetch("store" + extension, processor=processor1)
|
||||
assert len(filenames3) == 1
|
||||
check_tiny_data(filenames3[0])
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"processor_class,extension",
|
||||
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
||||
)
|
||||
def test_unpack_members_with_leading_dot(processor_class, extension):
|
||||
"Test that unpack members can also be specifed both with a leading ./"
|
||||
|
||||
with TemporaryDirectory() as local_store:
|
||||
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
||||
|
||||
# Do a first fetch with the one member only
|
||||
processor1 = processor_class(members=["./store/tiny-data.txt"])
|
||||
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
||||
assert len(filenames1) == 1
|
||||
check_tiny_data(filenames1[0])
|
||||
|
||||
|
||||
def _check_logs(log_file, expected_lines):
|
||||
"""
|
||||
Assert that the lines in the log match the expected ones.
|
||||
"""
|
||||
lines = log_file.getvalue().splitlines()
|
||||
assert len(lines) == len(expected_lines)
|
||||
for line, expected_line in zip(lines, expected_lines):
|
||||
assert line.startswith(expected_line)
|
||||
|
||||
|
||||
def _unpacking_expected_paths_and_logs(archive, members, path, name):
|
||||
"""
|
||||
Generate the appropriate expected paths and log message depending on the
|
||||
parameters for the test.
|
||||
"""
|
||||
log_lines = ["Downloading"]
|
||||
if archive == "tiny-data":
|
||||
true_paths = {str(path / "tiny-data.txt")}
|
||||
log_lines.append("Extracting 'tiny-data.txt'")
|
||||
elif archive == "store" and members is None:
|
||||
true_paths = {
|
||||
str(path / "store" / "tiny-data.txt"),
|
||||
str(path / "store" / "subdir" / "tiny-data.txt"),
|
||||
}
|
||||
log_lines.append(f"{name}{name[-1]}ing contents")
|
||||
elif archive == "store" and members is not None:
|
||||
true_paths = []
|
||||
for member in members:
|
||||
true_path = path / Path(*member.split("/"))
|
||||
if not str(true_path).endswith("tiny-data.txt"):
|
||||
true_path = true_path / "tiny-data.txt"
|
||||
true_paths.append(str(true_path))
|
||||
log_lines.append(f"Extracting '{member}'")
|
||||
true_paths = set(true_paths)
|
||||
return true_paths, log_lines
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"processor_class,extension",
|
||||
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
||||
)
|
||||
def test_unpacking_members_then_no_members(processor_class, extension):
|
||||
"""
|
||||
Test that calling with valid members then without them works.
|
||||
https://github.com/fatiando/pooch/issues/364
|
||||
"""
|
||||
with TemporaryDirectory() as local_store:
|
||||
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
||||
|
||||
# Do a first fetch with an existing member
|
||||
processor1 = processor_class(members=["store/tiny-data.txt"])
|
||||
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
||||
assert len(filenames1) == 1
|
||||
|
||||
# Do a second fetch with no members
|
||||
processor2 = processor_class()
|
||||
filenames2 = pup.fetch("store" + extension, processor=processor2)
|
||||
assert len(filenames2) > 1
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
@pytest.mark.parametrize(
|
||||
"processor_class,extension",
|
||||
[(Unzip, ".zip"), (Untar, ".tar.gz")],
|
||||
)
|
||||
def test_unpacking_wrong_members_then_no_members(processor_class, extension):
|
||||
"""
|
||||
Test that calling with invalid members then without them works.
|
||||
https://github.com/fatiando/pooch/issues/364
|
||||
"""
|
||||
with TemporaryDirectory() as local_store:
|
||||
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)
|
||||
|
||||
# Do a first fetch with incorrect member
|
||||
processor1 = processor_class(members=["not-a-valid-file.csv"])
|
||||
filenames1 = pup.fetch("store" + extension, processor=processor1)
|
||||
assert len(filenames1) == 0
|
||||
|
||||
# Do a second fetch with no members
|
||||
processor2 = processor_class()
|
||||
filenames2 = pup.fetch("store" + extension, processor=processor2)
|
||||
assert len(filenames2) > 0
|
||||
@@ -0,0 +1,197 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
"""
|
||||
Test the utility functions.
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
from tempfile import TemporaryDirectory
|
||||
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
||||
|
||||
import pytest
|
||||
|
||||
from ..utils import (
|
||||
parse_url,
|
||||
make_local_storage,
|
||||
temporary_file,
|
||||
unique_file_name,
|
||||
)
|
||||
|
||||
|
||||
def test_unique_name_long():
|
||||
"The file name should never be longer than 255 characters"
|
||||
url = f"https://www.something.com/data{'a' * 500}.txt"
|
||||
assert len(url) > 255
|
||||
fname = unique_file_name(url)
|
||||
assert len(fname) == 255
|
||||
assert fname[-10:] == "aaaaaa.txt"
|
||||
assert fname.split("-")[1][:10] == "aaaaaaaaaa"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pool",
|
||||
[ThreadPoolExecutor, ProcessPoolExecutor],
|
||||
ids=["threads", "processes"],
|
||||
)
|
||||
def test_make_local_storage_parallel(pool, monkeypatch):
|
||||
"Try to create the cache folder in parallel"
|
||||
# Can cause multiple attempts at creating the folder which leads to an
|
||||
# exception. Check that this doesn't happen.
|
||||
# See https://github.com/fatiando/pooch/issues/170
|
||||
|
||||
# Monkey path makedirs to make it delay before creating the directory.
|
||||
# Otherwise, the dispatch is too fast and the directory will exist before
|
||||
# another process tries to create it.
|
||||
|
||||
# Need to keep a reference to the original function to avoid infinite
|
||||
# recursions from the monkey patching.
|
||||
makedirs = os.makedirs
|
||||
|
||||
def mockmakedirs(path, exist_ok=False): # pylint: disable=unused-argument
|
||||
"Delay before calling makedirs"
|
||||
time.sleep(1.5)
|
||||
makedirs(path, exist_ok=exist_ok)
|
||||
|
||||
monkeypatch.setattr(os, "makedirs", mockmakedirs)
|
||||
|
||||
data_cache = os.path.join(os.curdir, "test_parallel_cache")
|
||||
assert not os.path.exists(data_cache)
|
||||
|
||||
try:
|
||||
with pool() as executor:
|
||||
futures = [
|
||||
executor.submit(make_local_storage, data_cache) for i in range(4)
|
||||
]
|
||||
for future in futures:
|
||||
future.result()
|
||||
assert os.path.exists(data_cache)
|
||||
finally:
|
||||
if os.path.exists(data_cache):
|
||||
shutil.rmtree(data_cache)
|
||||
|
||||
|
||||
def test_local_storage_makedirs_permissionerror(monkeypatch):
|
||||
"Should warn the user when can't create the local data dir"
|
||||
|
||||
def mockmakedirs(path, exist_ok=False): # pylint: disable=unused-argument
|
||||
"Raise an exception to mimic permission issues"
|
||||
raise PermissionError("Fake error")
|
||||
|
||||
data_cache = os.path.join(os.curdir, "test_permission")
|
||||
assert not os.path.exists(data_cache)
|
||||
|
||||
monkeypatch.setattr(os, "makedirs", mockmakedirs)
|
||||
|
||||
with pytest.raises(PermissionError) as error:
|
||||
make_local_storage(
|
||||
path=data_cache,
|
||||
env="SOME_VARIABLE",
|
||||
)
|
||||
assert "Pooch could not create data cache" in str(error)
|
||||
assert "'SOME_VARIABLE'" in str(error)
|
||||
|
||||
|
||||
def test_local_storage_newfile_permissionerror(monkeypatch):
|
||||
"Should warn the user when can't write to the local data dir"
|
||||
# This is a separate function because there should be a warning if the data
|
||||
# dir already exists but we can't write to it.
|
||||
|
||||
def mocktempfile(**kwargs): # pylint: disable=unused-argument
|
||||
"Raise an exception to mimic permission issues"
|
||||
raise PermissionError("Fake error")
|
||||
|
||||
with TemporaryDirectory() as data_cache:
|
||||
os.makedirs(os.path.join(data_cache, "1.0"))
|
||||
assert os.path.exists(data_cache)
|
||||
|
||||
monkeypatch.setattr(tempfile, "NamedTemporaryFile", mocktempfile)
|
||||
|
||||
with pytest.raises(PermissionError) as error:
|
||||
make_local_storage(
|
||||
path=data_cache,
|
||||
env="SOME_VARIABLE",
|
||||
)
|
||||
assert "Pooch could not write to data cache" in str(error)
|
||||
assert "'SOME_VARIABLE'" in str(error)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url,output",
|
||||
[
|
||||
(
|
||||
"http://127.0.0.1:8080/test.nc",
|
||||
{"protocol": "http", "netloc": "127.0.0.1:8080", "path": "/test.nc"},
|
||||
),
|
||||
(
|
||||
"ftp://127.0.0.1:8080/test.nc",
|
||||
{"protocol": "ftp", "netloc": "127.0.0.1:8080", "path": "/test.nc"},
|
||||
),
|
||||
(
|
||||
"doi:10.6084/m9.figshare.923450.v1/dike.json",
|
||||
{
|
||||
"protocol": "doi",
|
||||
"netloc": "10.6084/m9.figshare.923450.v1",
|
||||
"path": "/dike.json",
|
||||
},
|
||||
),
|
||||
(
|
||||
r"doi:10.5281/zenodo.7632643/santisoler/pooch-test-data-v1.zip",
|
||||
{
|
||||
"protocol": "doi",
|
||||
"netloc": "10.5281/zenodo.7632643",
|
||||
"path": "/santisoler/pooch-test-data-v1.zip",
|
||||
},
|
||||
),
|
||||
],
|
||||
ids=["http", "ftp", "doi", "zenodo-doi-with-slash"],
|
||||
)
|
||||
def test_parse_url(url, output):
|
||||
"Parse URL into 3 components"
|
||||
assert parse_url(url) == output
|
||||
|
||||
|
||||
def test_parse_url_invalid_doi():
|
||||
"Should fail if we forget to not include // in the DOI link"
|
||||
with pytest.raises(ValueError):
|
||||
parse_url("doi://XXX/XXX/fname.txt")
|
||||
|
||||
|
||||
def test_temporary_file():
|
||||
"Make sure the file is writable and cleaned up in the end"
|
||||
with temporary_file() as tmp:
|
||||
assert Path(tmp).exists()
|
||||
with open(tmp, "w", encoding="utf-8") as outfile:
|
||||
outfile.write("Meh")
|
||||
with open(tmp, encoding="utf-8") as infile:
|
||||
assert infile.read().strip() == "Meh"
|
||||
assert not Path(tmp).exists()
|
||||
|
||||
|
||||
def test_temporary_file_path():
|
||||
"Make sure the file is writable and cleaned up in the end when given a dir"
|
||||
with TemporaryDirectory() as path:
|
||||
with temporary_file(path) as tmp:
|
||||
assert Path(tmp).exists()
|
||||
assert path in tmp
|
||||
with open(tmp, "w", encoding="utf-8") as outfile:
|
||||
outfile.write("Meh")
|
||||
with open(tmp, encoding="utf-8") as infile:
|
||||
assert infile.read().strip() == "Meh"
|
||||
assert not Path(tmp).exists()
|
||||
|
||||
|
||||
def test_temporary_file_exception():
|
||||
"Make sure the file is writable and cleaned up when there is an exception"
|
||||
try:
|
||||
with temporary_file() as tmp:
|
||||
assert Path(tmp).exists()
|
||||
raise ValueError("Nooooooooo!")
|
||||
except ValueError:
|
||||
assert not Path(tmp).exists()
|
||||
@@ -0,0 +1,19 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
"""
|
||||
Test the version.
|
||||
"""
|
||||
from packaging.version import Version
|
||||
|
||||
import pooch
|
||||
|
||||
|
||||
def test_version():
|
||||
"Check there's a usable version number in the usual __version__"
|
||||
assert pooch.__version__.startswith("v")
|
||||
# Check that it's PEP440 compliant (will raise an exception otherwise)
|
||||
Version(pooch.__version__)
|
||||
@@ -0,0 +1,237 @@
|
||||
# Copyright (c) 2018 The Pooch Developers.
|
||||
# Distributed under the terms of the BSD 3-Clause License.
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
|
||||
#
|
||||
"""
|
||||
Utilities for testing code.
|
||||
"""
|
||||
import os
|
||||
import io
|
||||
import logging
|
||||
import shutil
|
||||
import stat
|
||||
from pathlib import Path
|
||||
from contextlib import contextmanager
|
||||
|
||||
from .. import __version__ as full_version
|
||||
from ..utils import check_version, get_logger
|
||||
|
||||
|
||||
def check_tiny_data(fname):
|
||||
"""
|
||||
Load the tiny-data.txt file and check that the contents are correct.
|
||||
"""
|
||||
assert os.path.exists(fname)
|
||||
with open(fname, encoding="utf-8") as tinydata:
|
||||
content = tinydata.read()
|
||||
true_content = "\n".join(
|
||||
["# A tiny data file for test purposes only", "1 2 3 4 5 6"]
|
||||
)
|
||||
assert content.strip() == true_content
|
||||
|
||||
|
||||
def check_large_data(fname):
|
||||
"""
|
||||
Load the large-data.txt file and check that the contents are correct.
|
||||
"""
|
||||
assert os.path.exists(fname)
|
||||
with open(fname, encoding="utf-8") as data:
|
||||
content = data.read()
|
||||
true_content = ["# A larer data file for test purposes only"]
|
||||
true_content.extend(["1 2 3 4 5 6"] * 6002)
|
||||
assert content.strip() == "\n".join(true_content)
|
||||
|
||||
|
||||
def pooch_test_url():
|
||||
"""
|
||||
Get the base URL for the test data used in Pooch itself.
|
||||
|
||||
The URL is a GitHub raw link to the ``pooch/tests/data`` directory from the
|
||||
`GitHub repository <https://github.com/fatiando/pooch>`__. It matches the
|
||||
pooch version specified in ``pooch.version.full_version``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url
|
||||
The versioned URL for pooch's test data.
|
||||
|
||||
"""
|
||||
version = check_version(full_version, fallback="main")
|
||||
url = f"https://github.com/fatiando/pooch/raw/{version}/pooch/tests/data/"
|
||||
return url
|
||||
|
||||
|
||||
def pooch_test_figshare_url():
|
||||
"""
|
||||
Get the base URL for the test data stored in figshare.
|
||||
|
||||
The URL contains the DOI for the figshare dataset using the appropriate
|
||||
version for this version of Pooch.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url
|
||||
The URL for pooch's test data.
|
||||
|
||||
"""
|
||||
url = "doi:10.6084/m9.figshare.14763051.v1/"
|
||||
return url
|
||||
|
||||
|
||||
def pooch_test_zenodo_url():
|
||||
"""
|
||||
Get the base URL for the test data stored in Zenodo.
|
||||
|
||||
The URL contains the DOI for the Zenodo dataset using the appropriate
|
||||
version for this version of Pooch.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url
|
||||
The URL for pooch's test data.
|
||||
|
||||
"""
|
||||
url = "doi:10.5281/zenodo.4924875/"
|
||||
return url
|
||||
|
||||
|
||||
def pooch_test_zenodo_with_slash_url():
|
||||
"""
|
||||
Get base URL for test data in Zenodo, where the file name contains a slash
|
||||
|
||||
The URL contains the DOI for the Zenodo dataset that has a slash in the
|
||||
filename (created with the GitHub-Zenodo integration service), using the
|
||||
appropriate version for this version of Pooch.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url
|
||||
The URL for pooch's test data.
|
||||
|
||||
"""
|
||||
url = "doi:10.5281/zenodo.7632643/"
|
||||
return url
|
||||
|
||||
|
||||
def pooch_test_dataverse_url():
|
||||
"""
|
||||
Get the base URL for the test data stored on a DataVerse instance.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url
|
||||
The URL for pooch's test data.
|
||||
"""
|
||||
url = "doi:10.11588/data/TKCFEF/"
|
||||
return url
|
||||
|
||||
|
||||
def pooch_test_registry():
|
||||
"""
|
||||
Get a registry for the test data used in Pooch itself.
|
||||
|
||||
Returns
|
||||
-------
|
||||
registry
|
||||
Dictionary with pooch's test data files and their hashes.
|
||||
|
||||
"""
|
||||
registry = {
|
||||
"tiny-data.txt": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
|
||||
"large-data.txt": "98de171fb320da82982e6bf0f3994189fff4b42b23328769afce12bdd340444a",
|
||||
"subdir/tiny-data.txt": "baee0894dba14b12085eacb204284b97e362f4f3e5a5807693cc90ef415c1b2d",
|
||||
"tiny-data.zip": "0d49e94f07bc1866ec57e7fd1b93a351fba36842ec9b13dd50bf94e8dfa35cbb",
|
||||
"store.zip": "0498d2a001e71051bbd2acd2346f38da7cbd345a633cb7bf0f8a20938714b51a",
|
||||
"tiny-data.tar.gz": "41503f083814f43a01a8e9a30c28d7a9fe96839a99727a7fdd0acf7cd5bab63b",
|
||||
"store.tar.gz": "088c7f4e0f1859b1c769bb6065de24376f366374817ede8691a6ac2e49f29511",
|
||||
"tiny-data.txt.bz2": "753663687a4040c90c8578061867d1df623e6aa8011c870a5dbd88ee3c82e306",
|
||||
"tiny-data.txt.gz": "2e2da6161291657617c32192dba95635706af80c6e7335750812907b58fd4b52",
|
||||
"tiny-data.txt.xz": "99dcb5c32a6e916344bacb4badcbc2f2b6ee196977d1d8187610c21e7e607765",
|
||||
}
|
||||
return registry
|
||||
|
||||
|
||||
@contextmanager
|
||||
def capture_log(level=logging.DEBUG):
|
||||
"""
|
||||
Create a context manager for reading from the logs.
|
||||
|
||||
Yields
|
||||
------
|
||||
log_file : StringIO
|
||||
a file-like object to which the logs were written
|
||||
"""
|
||||
log_file = io.StringIO()
|
||||
handler = logging.StreamHandler(log_file)
|
||||
handler.setLevel(level)
|
||||
get_logger().addHandler(handler)
|
||||
yield log_file
|
||||
get_logger().removeHandler(handler)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def data_over_ftp(server, fname):
|
||||
"""
|
||||
Add a test data file to the test FTP server and clean it up afterwards.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
server
|
||||
The ``ftpserver`` fixture provided by pytest-localftpserver.
|
||||
fname : str
|
||||
The name of a file *relative* to the test data folder of the package
|
||||
(usually just the file name, not the full path).
|
||||
|
||||
Yields
|
||||
------
|
||||
url : str
|
||||
The download URL of the data file from the test FTP server.
|
||||
|
||||
"""
|
||||
package_path = str(Path(__file__).parent / "data" / fname)
|
||||
server_path = os.path.join(server.anon_root, fname)
|
||||
try:
|
||||
shutil.copyfile(package_path, server_path)
|
||||
url = f"ftp://localhost/{fname}"
|
||||
yield url
|
||||
finally:
|
||||
if os.path.exists(server_path):
|
||||
os.remove(server_path)
|
||||
|
||||
|
||||
def _recursive_chmod_directories(root, mode):
|
||||
"""
|
||||
Recursively change the permissions on the child directories using a bitwise
|
||||
OR operation.
|
||||
"""
|
||||
for item in root.iterdir():
|
||||
if item.is_dir():
|
||||
item.chmod(item.stat().st_mode | mode)
|
||||
_recursive_chmod_directories(item, mode)
|
||||
|
||||
|
||||
def mirror_directory(source, destination):
|
||||
"""
|
||||
Copy contents of the source directory into destination and fix permissions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str, :class:`pathlib.Path`
|
||||
Source data directory.
|
||||
destination : str, :class:`pathlib.Path`
|
||||
Destination directory that will contain the copy of source. The actual
|
||||
source directory (not just it's contents) is copied.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mirror : :class:`pathlib.Path`
|
||||
The path of the mirrored output directory.
|
||||
|
||||
"""
|
||||
source = Path(source)
|
||||
mirror = Path(destination) / source.name
|
||||
shutil.copytree(source, mirror)
|
||||
_recursive_chmod_directories(mirror, mode=stat.S_IWUSR)
|
||||
return mirror
|
||||
Reference in New Issue
Block a user