From 9c81b483a5db298cc9e8733610aa77c0296e142b Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Wed, 3 Jun 2026 15:13:48 -0700 Subject: [PATCH 1/2] Upgrade lsdb to >=0.8.1 --- tox.ini | 2 +- tutorial_requirements.txt | 3 ++- .../techniques-and-tools/irsa-hats-with-lsdb.md | 13 ++++++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tox.ini b/tox.ini index ce996336..f67c3bca 100644 --- a/tox.ini +++ b/tox.ini @@ -36,7 +36,7 @@ deps = # Pandas functionality we use requires lxml, but it's not yet available as optional extras for 1.5.2 oldestdeps: lxml # Oldest lsdb is not compatible with the versions above, we skip lsdb notebooks for oldestdeps job - # oldestdeps: lsdb==0.6.6 + # oldestdeps: lsdb==0.8.1 # Ugly workaround for the custom install_command to ensure the the arguments are properly passed into pip !oldestdeps: pip diff --git a/tutorial_requirements.txt b/tutorial_requirements.txt index 786d5ed4..6b880315 100644 --- a/tutorial_requirements.txt +++ b/tutorial_requirements.txt @@ -12,7 +12,8 @@ pyarrow>=10.0.1 hpgeom pandas[xml]>=1.5.2 dask[distributed] -lsdb>=0.6.6,<0.8 +# lsdb<0.8 returns wrong results for some catalogs, e.g. ZTF DR24 Lightcurves +lsdb>=0.8.1 psutil ray s3fs diff --git a/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md b/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md index 19b0a7c6..d12b4f5a 100644 --- a/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md +++ b/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md @@ -56,7 +56,7 @@ We will use lsdb to leverage HATS partitioning for performing fast spatial queri ```{code-cell} ipython3 # Uncomment the next line to install dependencies if needed. -# !pip install s3fs "lsdb>=0.6.6,<0.8" pyarrow pandas numpy astropy dask matplotlib +# !pip install s3fs "lsdb>=0.8.1" pyarrow pandas numpy astropy dask matplotlib ``` ```{code-cell} ipython3 @@ -562,10 +562,17 @@ ztf_lcs ``` As earlier, this creates a lazy catalog object with the partition(s) that contains our IDs. -We can load the light curves data into a DataFrame by using the `compute()` method: +We can load the light curves data into a DataFrame by using the `compute()` method. +Note: You may see a memory warning from lsdb which is expected due to the large size of Lightcurve data. ```{code-cell} ipython3 -ztf_lcs_df = ztf_lcs.compute() # ID search runs out of memory if we try to parallelize it with Dask client +with Client(n_workers=get_nworkers(ztf_lcs), + threads_per_worker=1, + memory_limit=None # to prevent it from running out of memory + ) as client: + print(f"This may take more than a few minutes to complete. You can monitor progress in Dask dashboard at {client.dashboard_link}") + ztf_lcs_df = ztf_lcs.compute() + ztf_lcs_df ``` From b2060e1bb937f7ad18eaa471fe3ff39881930531 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Wed, 3 Jun 2026 15:24:00 -0700 Subject: [PATCH 2/2] Add suffix_method kwarg explicitly --- tutorials/techniques-and-tools/irsa-hats-with-lsdb.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md b/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md index d12b4f5a..da20374a 100644 --- a/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md +++ b/tutorials/techniques-and-tools/irsa-hats-with-lsdb.md @@ -392,9 +392,10 @@ Since ZTF objects are defined per band, setting `n_neighbors=1` means this is on ```{code-cell} ipython3 euclid_x_ztf = euclid_cone.crossmatch( ztf_cone, - suffixes=("_euclid", "_ztf"), # to distinguish columns from the two catalogs n_neighbors=1, # default is 1 too, can be tweaked - radius_arcsec=1 # default is 1 arcsec too, can be tweaked + radius_arcsec=1, # default is 1 arcsec too, can be tweaked + suffixes=("_euclid", "_ztf"), # to distinguish columns from the two catalogs + suffix_method="all_columns", ) euclid_x_ztf ```