diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml index c86de84f1..75a0111cb 100644 --- a/.github/workflows/compile.yml +++ b/.github/workflows/compile.yml @@ -47,6 +47,7 @@ jobs: ${{ matrix.cuda == 'ON' && 'CUDA' || '' }} ${{ matrix.hip == 'ON' && 'HIP' || '' }} ${{ matrix.cuquantum == 'ON' && 'CUQ' || '' }} + ${{ matrix.adios2 == 'ON' && 'CKPT' || '' }} runs-on: ${{ matrix.os }} @@ -67,6 +68,7 @@ jobs: cuda: [ON, OFF] hip: [ON, OFF] cuquantum: [ON, OFF] + adios2: [ON, OFF] mpilib: ['', 'mpich', 'ompi', 'impi', 'msmpi'] # disable deprecated API on MSVC, and assign unique compilers, @@ -249,14 +251,16 @@ jobs: -DQUEST_ENABLE_CUDA=${{ matrix.cuda }} -DQUEST_ENABLE_HIP=${{ matrix.hip }} -DQUEST_ENABLE_CUQUANTUM=${{ matrix.cuquantum }} + -DQUEST_ENABLE_CHECKPOINTING=${{ matrix.adios2 }} -DCMAKE_CUDA_ARCHITECTURES=${{ env.cuda_arch }} -DCMAKE_HIP_ARCHITECTURES=${{ env.hip_arch }} -DCMAKE_CXX_COMPILER=${{ matrix.compiler }} -DCMAKE_CXX_FLAGS=${{ matrix.mpi == 'ON' && matrix.cuda == 'ON' && '-fno-lto' || '' }} - # force 'Release' build (needed by MSVC to enable optimisations) + # force 'Release' build (needed by MSVC to enable optimisations), + # temporarily forcing serial compilation to avoid ADIOS2 OOM error - name: Compile - run: cmake --build ${{ env.build_dir }} --config Release --parallel + run: cmake --build ${{ env.build_dir }} --config Release --parallel 1 # run all compiled isolated examples to test for link-time errors, # continuing if any fail (since some deliberately fail) diff --git a/.github/workflows/test_free.yml b/.github/workflows/test_free.yml index 2d332e842..0f12cae6b 100644 --- a/.github/workflows/test_free.yml +++ b/.github/workflows/test_free.yml @@ -68,16 +68,18 @@ jobs: -DQUEST_ENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }} -DQUEST_DISABLE_DEPRECATION_WARNINGS=${{ matrix.version == 3 && 'ON' || 'OFF' }} -DQUEST_FLOAT_PRECISION=${{ matrix.precision }} + -DQUEST_ENABLE_CHECKPOINTING=ON - # force 'Release' build (needed by MSVC to enable optimisations) + # force 'Release' build (needed by MSVC to enable optimisations), and force serial (to avoid ADIOS2 OOM) - name: Compile - run: cmake --build ${{ env.build_dir }} --config Release --parallel + run: cmake --build ${{ env.build_dir }} --config Release --parallel 1 # run v4 unit tests in random order, excluding the integration tests, # using the default environment variables (e.g. test all permutations) # TODO: # ctest currently doesn't know of our Catch2 tags, so we # are manually excluding each integration test by name + - name: Run v4 tests if: ${{ matrix.version == 4 }} run: ctest -j2 --output-on-failure --schedule-random -C Release -E "density evolution" diff --git a/CMakeLists.txt b/CMakeLists.txt index b5a438713..b99161fc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -543,6 +543,83 @@ endif() +# Checkpointing (ADIOS2) +option(QUEST_ENABLE_CHECKPOINTING "Enable Qureg checkpointing (saveQuregToFile / createQuregFromFile) via ADIOS2. Turned OFF by default." OFF) +message(STATUS "Checkpointing is turned ${QUEST_ENABLE_CHECKPOINTING}. Set QUEST_ENABLE_CHECKPOINTING to modify.") +if (QUEST_ENABLE_CHECKPOINTING) + + find_package(adios2 QUIET) + + # A distributed QuEST needs an MPI-enabled ADIOS2 (which provides the + # adios2::cxx_mpi target). A serial system install lacks it, so in that case we + # ignore the found package and fetch an MPI-enabled build instead of failing. + set(quest_use_found_adios2 ${adios2_FOUND}) + if (adios2_FOUND AND QUEST_ENABLE_MPI AND NOT TARGET adios2::cxx_mpi) + message(STATUS "Found ADIOS2 lacks MPI support (no adios2::cxx_mpi target); fetching an MPI-enabled build instead") + set(quest_use_found_adios2 FALSE) + endif() + + if(NOT quest_use_found_adios2) + message(STATUS "fetching ADIOS2 via FetchContent") + + include(FetchContent) + FetchContent_Declare( + adios2 + GIT_REPOSITORY https://github.com/ornladios/ADIOS2.git + GIT_TAG v2.12.1 + ) + + # Match ADIOS2's MPI to QuEST's so distributed runs write per-rank slices + # into one shared file. ADIOS2's CUDA support is deliberately left OFF: + # checkpointing copies amps to host memory (syncQuregFromGpu/syncQuregToGpu) + # before any I/O, so ADIOS2 never touches device pointers. Building it with + # CUDA is unnecessary and stalls the Windows CUDA CI job. + set(ADIOS2_USE_MPI ${QUEST_ENABLE_MPI} CACHE BOOL "" FORCE) + set(ADIOS2_USE_CUDA OFF CACHE BOOL "" FORCE) + + # Forego unused facilities + set(ADIOS2_BUILD_TESTING OFF CACHE BOOL "" FORCE) + set(ADIOS2_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SODIUM OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Fortran OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_HDF5 OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_ZeroMQ OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SST OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_DataMan OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SSC OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_MHS OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_DAOS OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_MGARD OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_BZip2 OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Blosc OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Blosc2 OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_SZ OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_ZFP OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_PNG OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Profiling OFF CACHE BOOL "" FORCE) + set(ADIOS2_USE_Python OFF CACHE BOOL "" FORCE) + + FetchContent_MakeAvailable(adios2) + + else() + # re-run non-QUIET so configuration fails with a clear error if the package + # somehow became unavailable between the two calls + find_package(adios2 REQUIRED) + endif() + + # In distributed builds link ADIOS2's MPI-enabled C++ interface: it defines + # ADIOS2_USE_MPI, which exposes the adios2::ADIOS(MPI_Comm) constructor used in + # qureg.cpp for collective per-rank I/O. The serial target lacks it. + if (QUEST_ENABLE_MPI) + target_link_libraries(QuEST PRIVATE adios2::cxx_mpi) + else() + target_link_libraries(QuEST PRIVATE adios2::cxx) + endif() + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) +endif() + + + # =============================== # Set options to save in config.h # =============================== @@ -553,6 +630,7 @@ set(QUEST_COMPILE_OMP ${QUEST_ENABLE_OMP}) set(QUEST_COMPILE_MPI ${QUEST_ENABLE_MPI}) set(QUEST_COMPILE_SUBCOMM ${QUEST_ENABLE_SUBCOMM}) set(QUEST_COMPILE_CUQUANTUM ${QUEST_ENABLE_CUQUANTUM}) +set(QUEST_COMPILE_CHECKPOINTING ${QUEST_ENABLE_CHECKPOINTING}) set(QUEST_INCLUDE_DEPRECATED_FUNCTIONS ${QUEST_ENABLE_DEPRECATED_API}) diff --git a/docs/compile.md b/docs/compile.md index ba4306a85..664ac56a0 100644 --- a/docs/compile.md +++ b/docs/compile.md @@ -689,3 +689,33 @@ Note that distributed executables are launched in a distinct way to the other de > - UCX > - launch flags > - checking via reportenv + + + + +------------------ + + + + + +## Checkpointing + +QuEST can optionally _checkpoint_ a `Qureg` to disk; writing its state to a file with `saveQuregToFile()`, to later be restored into a new `Qureg` with `createQuregFromFile()`. This is useful for long-running jobs which risk timeout or failure - an evolving `Qureg` can be periodically saved and resumed in a subsequent process. The file records only the `Qureg` dimension (the number of qubits, and whether it is a density matrix) and its amplitudes; never the incidental deployment configuration. A `Qureg` saved by one deployment (say, distributed over `8` nodes) can therefore be restored by any other (say, a single GPU-accelerated node). + +Checkpointing is built upon [ADIOS2](https://github.com/ornladios/ADIOS2) and is _disabled_ by default. To enable it, install ADIOS2 and specify `QUEST_ENABLE_CHECKPOINTING` at configuration: +```bash +# configure +cmake .. -D QUEST_ENABLE_CHECKPOINTING=ON + +# build +cmake --build . --parallel +``` + +> [!IMPORTANT] +> ADIOS2 must be discoverable by CMake. If it was installed to a non-standard location (such as `~/.local`), pass its prefix via `CMAKE_PREFIX_PATH`: +> ```bash +> cmake .. -D QUEST_ENABLE_CHECKPOINTING=ON -D CMAKE_PREFIX_PATH=$HOME/.local +> ``` + +Calling `saveQuregToFile()` or `createQuregFromFile()` in a build _without_ checkpointing enabled throws a validation error. diff --git a/quest/include/config.h.in b/quest/include/config.h.in index 1bb8a0470..ef40e4e91 100644 --- a/quest/include/config.h.in +++ b/quest/include/config.h.in @@ -41,6 +41,7 @@ defined(QUEST_COMPILE_CUDA) || \ defined(QUEST_COMPILE_HIP) || \ defined(QUEST_COMPILE_CUQUANTUM) || \ + defined(QUEST_COMPILE_CHECKPOINTING) || \ defined(QUEST_ENABLE_NUMA) || \ defined(QUEST_INCLUDE_DEPRECATED_FUNCTIONS) || \ defined(QUEST_DISABLE_DEPRECATION_WARNINGS) @@ -84,6 +85,7 @@ #cmakedefine01 QUEST_COMPILE_CUDA #cmakedefine01 QUEST_COMPILE_CUQUANTUM #cmakedefine01 QUEST_COMPILE_HIP +#cmakedefine01 QUEST_COMPILE_CHECKPOINTING // crucial to QuEST source (informs optional NUMA usage) @@ -125,6 +127,7 @@ ! defined(QUEST_COMPILE_CUDA) || \ ! defined(QUEST_COMPILE_HIP) || \ ! defined(QUEST_COMPILE_CUQUANTUM) || \ + ! defined(QUEST_COMPILE_CHECKPOINTING) || \ ! defined(QUEST_ENABLE_NUMA) || \ ! defined(QUEST_INCLUDE_DEPRECATED_FUNCTIONS) || \ ! defined(QUEST_DISABLE_DEPRECATION_WARNINGS) @@ -152,6 +155,7 @@ ! (QUEST_COMPILE_CUDA == 0 || QUEST_COMPILE_CUDA == 1) || \ ! (QUEST_COMPILE_HIP == 0 || QUEST_COMPILE_HIP == 1) || \ ! (QUEST_COMPILE_CUQUANTUM == 0 || QUEST_COMPILE_CUQUANTUM == 1) || \ + ! (QUEST_COMPILE_CHECKPOINTING == 0 || QUEST_COMPILE_CHECKPOINTING == 1) || \ ! (QUEST_ENABLE_NUMA == 0 || QUEST_ENABLE_NUMA == 1) || \ ! (QUEST_INCLUDE_DEPRECATED_FUNCTIONS == 0 || QUEST_INCLUDE_DEPRECATED_FUNCTIONS == 1) || \ ! (QUEST_DISABLE_DEPRECATION_WARNINGS == 0 || QUEST_DISABLE_DEPRECATION_WARNINGS == 1) diff --git a/quest/include/qureg.h b/quest/include/qureg.h index 4ff4c5627..b0e33aa1d 100644 --- a/quest/include/qureg.h +++ b/quest/include/qureg.h @@ -488,6 +488,51 @@ void getDensityQuregAmps(qcomp** outAmps, Qureg qureg, qindex startRow, qindex s /** @} */ + +/** + * @defgroup qureg_checkpoint Checkpointing + * @brief Functions for saving a Qureg to file and restoring it later. + * @details These functions are only available when QuEST is compiled with + * checkpointing support (CMake variable @c QUEST_ENABLE_CHECKPOINTING=ON), + * which additionally requires the ADIOS2 library. Calling them in a + * build without checkpointing support throws a validation error. + * @{ + */ + + +/** Writes the contents of @p qureg to the file @p fn, so that it may later be + * restored with createQuregFromFile(). The file records only the @p qureg + * dimension (number of qubits and whether it is a density matrix) and its full + * set of amplitudes; incidental deployment information (e.g. multithreading, + * GPU-acceleration, distribution) is not recorded. + * + * @param[in] qureg the Qureg to write to disk. + * @param[in] fn the output file path. + * @notyetdoced + * @notyettested + * @see + * - createQuregFromFile() to restore a Qureg saved by this function. + */ +void saveQuregToFile(Qureg qureg, const char* fn); + + +/** Creates a new Qureg from a file previously written by saveQuregToFile(), + * with automatically chosen deployments (independent of those used when the + * file was saved), and populates it with the stored amplitudes. + * + * @param[in] fn the input file path. + * @returns A new Qureg instance matching the saved dimension and amplitudes. + * @notyetdoced + * @notyettested + * @see + * - saveQuregToFile() to create a file readable by this function. + */ +Qureg createQuregFromFile(const char* fn); + + +/** @} */ + + // end de-mangler #ifdef __cplusplus } diff --git a/quest/src/api/environment.cpp b/quest/src/api/environment.cpp index c59334b55..700ece439 100644 --- a/quest/src/api/environment.cpp +++ b/quest/src/api/environment.cpp @@ -5,6 +5,7 @@ * @author Tyson Jones */ +#include "quest/include/config.h" #include "quest/include/environment.h" #include "quest/include/precision.h" #include "quest/include/modes.h" @@ -204,16 +205,23 @@ void printPrecisionInfo() { } +// reports whether QuEST was compiled with Qureg checkpointing support (ADIOS2) +static bool isCheckpointingCompiled() { + return (bool) QUEST_COMPILE_CHECKPOINTING; +} + + void printCompilationInfo() { print_table( "compilation", { - {"isOmpCompiled", cpu_isOpenmpCompiled()}, - {"isMpiCompiled", comm_isMpiCompiled()}, - {"isMpiSubCommCompiled", comm_isMpiSubCommCompiled()}, - {"isGpuCompiled", gpu_isGpuCompiled()}, - {"isHipCompiled", gpu_isHipCompiled()}, - {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, + {"isOmpCompiled", cpu_isOpenmpCompiled()}, + {"isMpiCompiled", comm_isMpiCompiled()}, + {"isMpiSubCommCompiled", comm_isMpiSubCommCompiled()}, + {"isGpuCompiled", gpu_isGpuCompiled()}, + {"isHipCompiled", gpu_isHipCompiled()}, + {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, + {"isCheckpointingCompiled", isCheckpointingCompiled()}, }); } diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp index 84bcd2bd0..70be4fd62 100644 --- a/quest/src/api/qureg.cpp +++ b/quest/src/api/qureg.cpp @@ -5,6 +5,7 @@ * @author Tyson Jones */ +#include "quest/include/config.h" #include "quest/include/qureg.h" #include "quest/include/modes.h" #include "quest/include/environment.h" @@ -25,6 +26,26 @@ #include #include +#if QUEST_COMPILE_CHECKPOINTING +#include +#if QUEST_COMPILE_MPI +#include +#endif +#endif + +#if QUEST_COMPILE_CHECKPOINTING +// In distributed builds, ADIOS2 must be given QuEST's communicator so that each +// node's call collectively writes/reads its own slice of the shared file. Without +// it, ADIOS2 runs serially per rank and the per-node slices never form one file. +static adios2::ADIOS makeAdios() { +#if QUEST_COMPILE_MPI + return adios2::ADIOS(MPI_COMM_WORLD); +#else + return adios2::ADIOS(); +#endif +} +#endif + using std::string; using std::vector; @@ -560,3 +581,110 @@ vector> getDensityQuregAmps(Qureg qureg, qindex startRow, qindex s getDensityQuregAmps(ptrs.data(), qureg, startRow, startCol, numRows, numCols); return out; } + + + +/* + * CHECKPOINTING + * + * which is compiled only when QUEST_ENABLE_CHECKPOINTING=ON (requiring ADIOS2). + * The API functions are always defined so that the validation layer can throw + * a clear error in non-checkpointing builds, rather than failing to link. + * + * These are defined with C linkage (matching their extern "C" declarations in + * qureg.h) so they remain callable from C consumers; the signatures pass no + * qcomp by value and so stay C-ABI-safe. + */ + + +extern "C" void saveQuregToFile(Qureg qureg, const char* fn) { + validate_quregCheckpointingIsCompiled(__func__); + +#if QUEST_COMPILE_CHECKPOINTING + validate_quregFields(qureg, __func__); + + // ensure the CPU amplitudes reflect any GPU-resident state before writing + syncQuregFromGpu(qureg); + + adios2::ADIOS adios = makeAdios(); + adios2::IO io = adios.DeclareIO("QuESTQuregSave"); + adios2::Engine engine = io.Open(fn, adios2::Mode::Write); + + // global single-value metadata; we deliberately record only the dimension + // and precision, never incidental deployment fields (the loader chooses its + // own deployment) nor derivable fields (like numAmps) + adios2::Variable vNumQubits = io.DefineVariable("numQubits"); + adios2::Variable vIsDensMatr = io.DefineVariable("isDensityMatrix"); + adios2::Variable vQrealBytes = io.DefineVariable("qrealBytes"); + + // amplitudes are stored as interleaved (real, imag) reals to stay agnostic + // to precision and to ADIOS2's complex-type support; each node writes only + // its local slice into the global array, avoiding excessive memory use + qindex globalReals = 2 * qureg.numAmps; + qindex localReals = 2 * qureg.numAmpsPerNode; + qindex startReal = 2 * ((qindex) qureg.rank) * qureg.numAmpsPerNode; + adios2::Variable vAmps = io.DefineVariable( + "amps", + { (size_t) globalReals }, + { (size_t) startReal }, + { (size_t) localReals }); + + int qrealBytes = (int) sizeof(qreal); + + engine.BeginStep(); + engine.Put(vNumQubits, qureg.numQubits); + engine.Put(vIsDensMatr, qureg.isDensityMatrix); + engine.Put(vQrealBytes, qrealBytes); + engine.Put(vAmps, reinterpret_cast(qureg.cpuAmps)); + engine.EndStep(); + engine.Close(); +#endif +} + + +extern "C" Qureg createQuregFromFile(const char* fn) { + validate_quregCheckpointingIsCompiled(__func__); + +#if QUEST_COMPILE_CHECKPOINTING + adios2::ADIOS adios = makeAdios(); + adios2::IO io = adios.DeclareIO("QuESTQuregLoad"); + adios2::Engine engine = io.Open(fn, adios2::Mode::Read); + + engine.BeginStep(); + + // read dimension + precision metadata first, so we can size the new Qureg + int numQubits = 0; + int isDensMatr = 0; + int fileQrealBytes = 0; + engine.Get(io.InquireVariable("numQubits"), numQubits); + engine.Get(io.InquireVariable("isDensityMatrix"), isDensMatr); + engine.Get(io.InquireVariable("qrealBytes"), fileQrealBytes); + engine.PerformGets(); + + validate_quregFileMatchesPrecision(fileQrealBytes, __func__); + + // create a matching-dimension Qureg with automatically chosen deployments, + // independent of those used when the file was saved + Qureg qureg = (isDensMatr)? + createDensityQureg(numQubits) : + createQureg(numQubits); + + // read only this node's slice of the global amplitude array into its buffer + qindex localReals = 2 * qureg.numAmpsPerNode; + qindex startReal = 2 * ((qindex) qureg.rank) * qureg.numAmpsPerNode; + adios2::Variable vAmps = io.InquireVariable("amps"); + vAmps.SetSelection({ { (size_t) startReal }, { (size_t) localReals } }); + engine.Get(vAmps, reinterpret_cast(qureg.cpuAmps)); + + engine.EndStep(); + engine.Close(); + + // propagate the restored CPU amplitudes to the GPU, if deployed + syncQuregToGpu(qureg); + + return qureg; +#else + // unreachable: the validation above always throws in non-checkpointing builds + return Qureg{}; +#endif +} diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index 62ff93166..aa1d0b4ec 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -7,6 +7,7 @@ * @author Kshitij Chhabra (patched v3 overflow bug) */ +#include "quest/include/config.h" #include "quest/include/modes.h" #include "quest/include/types.h" #include "quest/include/precision.h" @@ -277,6 +278,12 @@ namespace report { string QUREG_NOT_STATE_VECTOR = "Expected a statevector Qureg but received a density matrix."; + string QUREG_CHECKPOINTING_NOT_COMPILED = + "Qureg checkpointing (saveQuregToFile and createQuregFromFile) requires QuEST to be compiled with checkpointing support. Reconfigure with the CMake option -DQUEST_ENABLE_CHECKPOINTING=ON, which additionally requires the ADIOS2 library."; + + string QUREG_FILE_PRECISION_MISMATCH = + "The checkpoint file was written with a qreal precision of ${FILE_BYTES} bytes, but this QuEST build uses ${EXEC_BYTES} bytes. A Qureg can only be restored by a QuEST build using the same floating-point precision (QUEST_FLOAT_PRECISION) as the build which saved it."; + /* * MUTABLE OBJECT FLAGS @@ -1990,6 +1997,35 @@ void validate_quregIsDensityMatrix(Qureg qureg, const char* caller) { assertThat(qureg.isDensityMatrix, report::QUREG_NOT_DENSITY_MATRIX, caller); } +void validate_quregCheckpointingIsCompiled(const char* caller) { + + if (!global_isValidationEnabled) + return; + + // this validation must fire regardless of QUEST_COMPILE_CHECKPOINTING, so the + // user receives a clear error (rather than a linker error) when calling the + // checkpointing API in a build which did not compile it + #if QUEST_COMPILE_CHECKPOINTING + bool isCompiled = true; + #else + bool isCompiled = false; + #endif + + assertThat(isCompiled, report::QUREG_CHECKPOINTING_NOT_COMPILED, caller); +} + +void validate_quregFileMatchesPrecision(int fileQrealBytes, const char* caller) { + + if (!global_isValidationEnabled) + return; + + tokenSubs vars = { + {"${FILE_BYTES}", fileQrealBytes}, + {"${EXEC_BYTES}", (int) sizeof(qreal)}}; + + assertThat(fileQrealBytes == (int) sizeof(qreal), report::QUREG_FILE_PRECISION_MISMATCH, vars, caller); +} + /* diff --git a/quest/src/core/validation.hpp b/quest/src/core/validation.hpp index 87f81a0d6..e8eb7306d 100644 --- a/quest/src/core/validation.hpp +++ b/quest/src/core/validation.hpp @@ -137,6 +137,10 @@ void validate_quregIsStateVector(Qureg qureg, const char* caller); void validate_quregIsDensityMatrix(Qureg qureg, const char* caller); +void validate_quregCheckpointingIsCompiled(const char* caller); + +void validate_quregFileMatchesPrecision(int fileQrealBytes, const char* caller); + /* diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 59341759f..4e06fac9d 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -4,6 +4,7 @@ target_sources(tests PUBLIC calculations.cpp channels.cpp + checkpoint.cpp debug.cpp decoherence.cpp environment.cpp diff --git a/tests/unit/checkpoint.cpp b/tests/unit/checkpoint.cpp new file mode 100644 index 000000000..121cbd5fb --- /dev/null +++ b/tests/unit/checkpoint.cpp @@ -0,0 +1,137 @@ +/** @file + * Unit tests of Qureg checkpointing (saveQuregToFile / createQuregFromFile). + * + * These tests are only compiled when QuEST is built with the CMake option + * -DQUEST_ENABLE_CHECKPOINTING=ON (which additionally requires the ADIOS2 library). + * + * @author Ashmit JaiSarita Gupta + * + * @defgroup unitcheckpoint Checkpointing + * @ingroup unittests + */ + +#include "quest.h" + +#if QUEST_COMPILE_CHECKPOINTING + +#include + +#include "tests/utils/macros.hpp" +#include "tests/utils/cache.hpp" + +#include +#include +#include +#include +#include + + + +/* + * file constants and helpers + */ + +#define TEST_CATEGORY \ + LABEL_UNIT_TAG "[checkpoint]" + +namespace { + + const char* SV_FILE = "test_checkpoint_statevector.bp"; + const char* DM_FILE = "test_checkpoint_densitymatrix.bp"; + + qreal maxStatevectorAmpDiff(Qureg a, Qureg b) { + qreal m = 0; + for (qindex i = 0; i < a.numAmps; i++) + m = std::max(m, std::abs(getQuregAmp(a, i) - getQuregAmp(b, i))); + return m; + } + + qreal maxDensityMatrixAmpDiff(Qureg a, Qureg b) { + qreal m = 0; + qindex dim = (qindex) 1 << a.numQubits; + for (qindex r = 0; r < dim; r++) + for (qindex c = 0; c < dim; c++) + m = std::max(m, std::abs(getDensityQuregAmp(a, r, c) - getDensityQuregAmp(b, r, c))); + return m; + } + + // distributed-safe cleanup: a barrier guarantees every node has finished + // reading the shared file, only rank 0 deletes it (concurrent removal races), + // and a second barrier stops the next write racing a half-removed directory. + void removeCheckpointFile(const char* fn) { + syncQuESTEnv(); + if (getQuESTEnv().rank == 0) + std::filesystem::remove_all(fn); + syncQuESTEnv(); + } +} + + + +/** TESTS + * + * @ingroup unitcheckpoint + * @{ + */ + +TEST_CASE( "saveQuregToFile and createQuregFromFile", TEST_CATEGORY ) { + + SECTION( LABEL_CORRECTNESS ) { + + // iterate the cached Quregs so the save path is exercised under every + // deployment combination (serial, OMP, MPI, GPU and their mixtures); + // each restored Qureg chooses its own deployment independently + SECTION( LABEL_STATEVEC ) { + + for (auto& [label, q] : getCachedStatevecs()) { + DYNAMIC_SECTION( label ) { + + initRandomPureState(q); + + saveQuregToFile(q, SV_FILE); + Qureg r = createQuregFromFile(SV_FILE); + + CHECK( r.numQubits == q.numQubits ); + CHECK( r.isDensityMatrix == q.isDensityMatrix ); + CHECK( maxStatevectorAmpDiff(q, r) < 1e-12 ); + + destroyQureg(r); + removeCheckpointFile(SV_FILE); + } + } + } + + SECTION( LABEL_DENSMATR ) { + + for (auto& [label, q] : getCachedDensmatrs()) { + DYNAMIC_SECTION( label ) { + + initRandomPureState(q); // works even for density matrices + + saveQuregToFile(q, DM_FILE); + Qureg r = createQuregFromFile(DM_FILE); + + CHECK( r.numQubits == q.numQubits ); + CHECK( r.isDensityMatrix == q.isDensityMatrix ); + CHECK( maxDensityMatrixAmpDiff(q, r) < 1e-12 ); + + destroyQureg(r); + removeCheckpointFile(DM_FILE); + } + } + } + } + + SECTION( LABEL_VALIDATION ) { + + // The only checkpointing-specific validation - calling the API when QuEST + // was compiled without checkpointing - is unreachable here, since this + // file only compiles under QUEST_COMPILE_CHECKPOINTING. ADIOS2's own + // runtime errors (e.g. a missing file) are not QuEST validation errors. + SUCCEED( ); + } +} + +/** @} (end defgroup) */ + +#endif // QUEST_COMPILE_CHECKPOINTING