Add managed storage

Hide code cell content
!lamin disconnect
!lamin login testuser1
!lamin delete --force test-add-managed-storage
!docker stop pgtest && docker rm pgtest
import laminci
import pytest
from pathlib import Path
import lamindb_setup as ln_setup
from lamindb_setup._set_managed_storage import set_managed_storage
Hide code cell content
pgurl = laminci.db.setup_local_test_postgres()
ln_setup.init(storage="./storage1", name="test-add-managed-storage", db=pgurl)
instance_id = ln_setup.settings.instance._id
storage1_uid = ln_setup.settings.storage.uid
with pytest.raises(ValueError) as error:
    set_managed_storage("./storage2")
assert (
    error.exconly()
    == "ValueError: Can't add additional managed storage locations for instances that aren't managed through the hub."
)
ln_setup.register()

After registering the instance on the hub, things work out:

set_managed_storage("./storage2")
Hide code cell content
storage2_uid = ln_setup.settings.storage.uid
assert ln_setup.settings.storage.root_as_str == f"{Path.cwd()}/storage2"
assert (
    ln_setup.settings.storage.root / ".lamindb/_is_initialized"
).read_text() == ln_setup.settings.storage.uid
assert ln_setup.settings.storage.is_on_hub

Let’s confirm things are idempotent and we can switch between storage locations.

set_managed_storage("./storage1")
assert ln_setup.settings.storage.root_as_str == f"{Path.cwd()}/storage1"
assert (
    ln_setup.settings.storage.root / ".lamindb/_is_initialized"
).read_text() == ln_setup.settings.storage.uid
assert ln_setup.settings.storage.is_on_hub
assert ln_setup.settings.storage.uid == storage1_uid

Repeat:

set_managed_storage("./storage1")
assert ln_setup.settings.storage.root_as_str == f"{Path.cwd()}/storage1"
assert (
    ln_setup.settings.storage.root / ".lamindb/_is_initialized"
).read_text() == ln_setup.settings.storage.uid
assert ln_setup.settings.storage.is_on_hub
assert ln_setup.settings.storage.uid == storage1_uid
set_managed_storage("./storage2")
storage2_uid = ln_setup.settings.storage.uid
assert ln_setup.settings.storage.root_as_str == f"{Path.cwd()}/storage2"
assert (
    ln_setup.settings.storage.root / ".lamindb/_is_initialized"
).read_text() == ln_setup.settings.storage.uid
assert ln_setup.settings.storage.is_on_hub
assert ln_setup.settings.storage.uid == storage2_uid

Cloud storage:

set_managed_storage("s3://lamindb-ci/storage3")
Hide code cell content
assert ln_setup.settings.storage.type_is_cloud
assert ln_setup.settings.storage.root_as_str == "s3://lamindb-ci/storage3"
assert ln_setup.settings.storage.region == "us-west-1"
assert (
    ln_setup.settings.storage.root / ".lamindb/_is_initialized"
).read_text() == ln_setup.settings.storage.uid
# root.fs contains the underlying fsspec filesystem
assert (
    ln_setup.settings.storage.root.fs.cache_regions  # set by lamindb to True for s3 by default
)
assert ln_setup.settings.storage._instance_id is not None

You can set any additional fsspec filesystem arguments for cloud storage, such as profile or cache_regions (for s3 only), for example:

set_managed_storage("s3://lamindb-ci/storage3", cache_regions=False)
# test cache_regions
assert not ln_setup.settings.storage.root.fs.cache_regions

Cloud storage with mere read access:

with pytest.raises(ValueError) as error:
    set_managed_storage("gs://rxrx1-europe-west4/images/test/HEPG2-08")
assert error.exconly().startswith(
    "ValueError: Cannot manage storage without write access"
)

Add testuser2 as a collaborator to the instance:

from laminhub_rest.core.instance.collaborator import InstanceCollaboratorHandler
from laminhub_rest.core.account.user import UserAccountHandler
from lamindb_setup.core._hub_client import connect_hub_with_auth
from lamindb.models import User

admin_hub = connect_hub_with_auth()
testuser2 = UserAccountHandler(admin_hub).get_by_handle("testuser2")
InstanceCollaboratorHandler(admin_hub).add(
    instance_id=instance_id,
    account_id=testuser2.id,
    role="write",
    schema_id=None,
    skip_insert_user_table=True,
)
User.objects.create(uid=testuser2.lnid, handle=testuser2.handle, name=testuser2.name)
admin_hub.auth.close()

Sign them in and let them add another storage location:

ln_setup.login("testuser2")
set_managed_storage("./storage4")
assert ln_setup.settings.storage.root_as_str == f"{Path.cwd()}/storage4"

Attempt to delete instance with testuser2:

with pytest.raises(PermissionError) as error:
    ln_setup.delete("testuser1/test-add-managed-storage", force=True)

Delete test instance through testuser1:

ln_setup.login("testuser1")
ln_setup.delete("test-add-managed-storage", force=True)
!docker stop pgtest && docker rm pgtest

Assert everything is deleted:

from lamindb_setup.core._hub_client import call_with_fallback_auth
from lamindb_setup.core._hub_crud import select_instance_by_id
from lamindb_setup.core._hub_core import get_storage_records_for_instance

assert (
    call_with_fallback_auth(select_instance_by_id, instance_id=instance_id.hex) is None
)
assert not get_storage_records_for_instance(instance_id)