diff --git a/CHANGELOG.md b/CHANGELOG.md
index 46e8efd99b1e..b77ed3f8aaa6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ This release is compatible with NumPy 2.4.5.
 * Added C API functions for `dpnp.tensor.usm_ndarray` setters and getters to avoid ABI breakage if `dpnp.tensor.usm_ndarray` is modified [gh-2866](https://github.com/IntelPython/dpnp/pull/2866)
 * Added support for buffer protocol objects as advanced index keys in `dpnp.ndarray` [#2889](https://github.com/IntelPython/dpnp/pull/2889)
 * Added `--includes` and `--include-dir` options to the `dpnp` CLI [#2916](https://github.com/IntelPython/dpnp/pull/2916)
+* Added implementation of `dpnp.scipy.sparse.linalg import LinearOperator, cg, gmres, minres` [#2841](https://github.com/IntelPython/dpnp/pull/2841)
 
 ### Changed
 
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index f15818fdf398..3d1aaba9dc06 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -50,6 +50,7 @@ requirements:
       - {{ pin_compatible('onemkl-sycl-lapack', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('onemkl-sycl-rng', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('onemkl-sycl-vm', min_pin='x.x', max_pin='x') }}
+      - {{ pin_compatible('onemkl-sycl-sparse', min_pin='x.x', max_pin='x') }}
       - numpy
       - intel-gpu-ocl-icd-system
 
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index d7acf368bcd0..02a96fa347b2 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -185,6 +185,7 @@ add_subdirectory(backend/extensions/statistics)
 add_subdirectory(backend/extensions/ufunc)
 add_subdirectory(backend/extensions/vm)
 add_subdirectory(backend/extensions/window)
+add_subdirectory(backend/extensions/sparse)
 
 add_subdirectory(dpnp_algo)
 add_subdirectory(dpnp_utils)
diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp
index 3e8ca01ebd8d..702f45f5eceb 100644
--- a/dpnp/backend/extensions/blas/blas_py.cpp
+++ b/dpnp/backend/extensions/blas/blas_py.cpp
@@ -151,6 +151,32 @@ PYBIND11_MODULE(_blas_impl, m)
               py::arg("depends") = py::list());
     }
 
+    {
+        // y = alpha * op(A) * x + beta * y, the full BLAS gemv form.
+        // Used by dpnp.scipy.sparse.linalg.gmres to fuse the Arnoldi
+        // orthogonalisation (u -= V @ h) into a single oneMKL call
+        // (trans_op=0, alpha=-1, beta=1) and to write the Hessenberg
+        // column h = V^H @ u directly into the Hessenberg matrix slice
+        // (trans_op=2, alpha=1, beta=0).
+        //
+        // trans_op selects the op applied to A:
+        //   0 = N   y = alpha * A   @ x + beta * y
+        //   1 = T   y = alpha * A^T @ x + beta * y
+        //   2 = C   y = alpha * A^H @ x + beta * y   (F-contig only)
+        //
+        // For complex matrices the scalars must be exactly
+        // representable as their real form: callers pass {-1, 0, 1};
+        // fractional or complex scalars would lose the imaginary
+        // component on the C++ cast.
+        m.def("_gemv_alpha_beta", &blas_ns::gemv_alpha_beta,
+              "Call `gemv` from oneMKL BLAS library with explicit "
+              "alpha, beta, and tri-state trans_op (0=N, 1=T, 2=C). "
+              "Computes y = alpha * op(A) * x + beta * y.",
+              py::arg("sycl_queue"), py::arg("matrixA"), py::arg("vectorX"),
+              py::arg("vectorY"), py::arg("trans_op"), py::arg("alpha"),
+              py::arg("beta"), py::arg("depends") = py::list());
+    }
+
     {
         m.def("_syrk", &blas_ns::syrk,
               "Call `syrk` from oneMKL BLAS library to compute "
diff --git a/dpnp/backend/extensions/blas/gemv.cpp b/dpnp/backend/extensions/blas/gemv.cpp
index bb447c51997d..812d0dcbf9f9 100644
--- a/dpnp/backend/extensions/blas/gemv.cpp
+++ b/dpnp/backend/extensions/blas/gemv.cpp
@@ -49,14 +49,20 @@ namespace type_utils = dpnp::tensor::type_utils;
 
 using ext::common::init_dispatch_vector;
 
+// Impl signature now carries alpha and beta as double. Each per-T impl
+// casts them to T at the very end (T(alpha_d), T(beta_d)) so the same
+// dispatch vector serves both the legacy alpha=1/beta=0 wrapper and
+// the new alpha_beta entry point used by the GMRES Arnoldi fast path.
 typedef sycl::event (*gemv_impl_fn_ptr_t)(sycl::queue &,
                                           oneapi::mkl::transpose,
                                           const std::int64_t,
                                           const std::int64_t,
+                                          const double, // alpha
                                           const char *,
                                           const std::int64_t,
                                           const char *,
                                           const std::int64_t,
+                                          const double, // beta
                                           char *,
                                           const std::int64_t,
                                           const bool,
@@ -69,10 +75,12 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
                              oneapi::mkl::transpose transA,
                              const std::int64_t m,
                              const std::int64_t n,
+                             const double alpha_d,
                              const char *matrixA,
                              const std::int64_t lda,
                              const char *vectorX,
                              const std::int64_t incx,
+                             const double beta_d,
                              char *vectorY,
                              const std::int64_t incy,
                              const bool is_row_major,
@@ -84,6 +92,15 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
     const T *x = reinterpret_cast<const T *>(vectorX);
     T *y = reinterpret_cast<T *>(vectorY);
 
+    // Cast alpha/beta into the matrix value type. For complex T the
+    // single-argument constructor sets the imaginary component to
+    // zero, which is exact for the GMRES use case (alpha and beta are
+    // always one of {-1, 0, 1}) and for the dpnp.dot wrapper (alpha=1,
+    // beta=0). Callers passing fractional or complex scalars through
+    // this path would lose the imaginary component silently.
+    const T alpha = static_cast<T>(alpha_d);
+    const T beta = static_cast<T>(beta_d);
+
     std::stringstream error_msg;
     bool is_exception_caught = false;
 
@@ -112,13 +129,13 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
                     // or 'C' for a conjugate transpose.
             m,      // Number of rows in matrix A.
             n,      // Number of columns in matrix A.
-            T(1),   // Scaling factor for the matrix-vector product.
+            alpha,  // Scaling factor for the matrix-vector product.
             a,      // Pointer to the input matrix A.
             lda,    // Leading dimension of matrix A, which is the
                     // stride between successive rows (for row major layout).
             x,      // Pointer to the input vector x.
             incx,   // The stride of vector x.
-            T(0),   // Scaling factor for vector y.
+            beta,   // Scaling factor for vector y.
             y,      // Pointer to output vector y, where the result is stored.
             incy,   // The stride of vector y.
             depends);
@@ -141,14 +158,38 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
     return gemv_event;
 }
 
-std::pair<sycl::event, sycl::event>
-    gemv(sycl::queue &exec_q,
-         const dpnp::tensor::usm_ndarray &matrixA,
-         const dpnp::tensor::usm_ndarray &vectorX,
-         const dpnp::tensor::usm_ndarray &vectorY,
-         const bool transpose,
-         const std::vector<sycl::event> &depends)
+// Shared validation + dispatch. Both gemv() (alpha=1, beta=0) and
+// gemv_alpha_beta() funnel through here so behaviour stays identical
+// across the two entry points.
+//
+// ``trans_op`` is a tri-state matching oneapi::mkl::transpose:
+//      0 = N (no transpose),
+//      1 = T (plain transpose),
+//      2 = C (conjugate-transpose, complex only).
+//
+// The legacy gemv() entry-point only ever needs N/T (real or complex,
+// no conjugate semantics) and forwards trans_op = 0 or 1. The
+// gemv_alpha_beta() entry-point exposes the full tri-state so the
+// GMRES Arnoldi inner step can request V^H directly instead of
+// post-conjugating the result of a T-mode gemv -- which is
+// mathematically wrong for a complex right-hand vector (the identity
+// conj(V^T @ u) == V^H @ u holds only when u is real-valued).
+static std::pair<sycl::event, sycl::event>
+    gemv_dispatch(sycl::queue &exec_q,
+                  const dpnp::tensor::usm_ndarray &matrixA,
+                  const dpnp::tensor::usm_ndarray &vectorX,
+                  const dpnp::tensor::usm_ndarray &vectorY,
+                  const int trans_op,
+                  const double alpha,
+                  const double beta,
+                  const std::vector<sycl::event> &depends)
 {
+    if (trans_op < 0 || trans_op > 2) {
+        throw py::value_error("gemv: trans_op must be 0 (N), 1 (T), or 2 (C).");
+    }
+    const bool is_transposed = (trans_op != 0);
+    const bool is_conj_trans = (trans_op == 2);
+
     const int matrixA_nd = matrixA.get_ndim();
     const int vectorX_nd = vectorX.get_ndim();
     const int vectorY_nd = vectorY.get_ndim();
@@ -182,10 +223,22 @@ std::pair<sycl::event, sycl::event>
             "Input matrix is not c-contiguous nor f-contiguous.");
     }
 
+    // Conjugate-transpose is only wired up for column-major (F-contig)
+    // matrices. The row-major remap (treating a C-contig matrix as its
+    // column-major transpose) does not extend cleanly to the C op
+    // because (A^T)^H == conj(A), which oneMKL does not expose as a
+    // gemv mode. Callers needing C-mode on row-major input must
+    // F-contigify first (e.g. via dpnp.asarray(A, order="F")).
+    if (is_conj_trans && !is_matrixA_f_contig) {
+        throw py::value_error(
+            "gemv: trans_op = 2 (conjugate-transpose) requires an "
+            "F-contiguous matrix; pass dpnp.asarray(A, order='F') first.");
+    }
+
     const py::ssize_t *a_shape = matrixA.get_shape_raw();
     const py::ssize_t *x_shape = vectorX.get_shape_raw();
     const py::ssize_t *y_shape = vectorY.get_shape_raw();
-    if (transpose) {
+    if (is_transposed) {
         if (a_shape[0] != x_shape[0]) {
             throw py::value_error("The number of rows in A must be equal to "
                                   "the number of elements in X.");
@@ -209,6 +262,9 @@ std::pair<sycl::event, sycl::event>
     oneapi::mkl::transpose transA;
     std::size_t src_nelems;
 
+    // Resolve the storage layout into the oneMKL transpose mode.
+    // Conjugate-transpose is constrained to F-contig above; the
+    // row-major branch therefore only sees N/T here.
 // cuBLAS supports only column-major storage
 #if defined(USE_ONEMATH_CUBLAS)
     constexpr bool is_row_major = false;
@@ -218,7 +274,11 @@ std::pair<sycl::event, sycl::event>
     if (is_matrixA_f_contig) {
         m = a_shape[0];
         n = a_shape[1];
-        if (transpose) {
+        if (is_conj_trans) {
+            transA = oneapi::mkl::transpose::C;
+            src_nelems = n;
+        }
+        else if (is_transposed) {
             transA = oneapi::mkl::transpose::T;
             src_nelems = n;
         }
@@ -228,9 +288,11 @@ std::pair<sycl::event, sycl::event>
         }
     }
     else {
+        // Row-major-as-column-major swap. is_conj_trans is rejected
+        // above, so only N/T need handling.
         m = a_shape[1];
         n = a_shape[0];
-        if (transpose) {
+        if (is_transposed) {
             transA = oneapi::mkl::transpose::N;
             src_nelems = m;
         }
@@ -248,7 +310,11 @@ std::pair<sycl::event, sycl::event>
     const std::int64_t m = a_shape[0];
     const std::int64_t n = a_shape[1];
 
-    if (transpose) {
+    if (is_conj_trans) {
+        transA = oneapi::mkl::transpose::C;
+        src_nelems = n;
+    }
+    else if (is_transposed) {
         transA = oneapi::mkl::transpose::T;
         src_nelems = n;
     }
@@ -299,9 +365,9 @@ std::pair<sycl::event, sycl::event>
         y_typeless_ptr -= (y_shape[0] - 1) * std::abs(incy) * y_elemsize;
     }
 
-    sycl::event gemv_ev =
-        gemv_fn(exec_q, transA, m, n, a_typeless_ptr, lda, x_typeless_ptr, incx,
-                y_typeless_ptr, incy, is_row_major, depends);
+    sycl::event gemv_ev = gemv_fn(exec_q, transA, m, n, alpha, a_typeless_ptr,
+                                  lda, x_typeless_ptr, incx, beta,
+                                  y_typeless_ptr, incy, is_row_major, depends);
 
     sycl::event args_ev = dpnp::utils::keep_args_alive(
         exec_q, {matrixA, vectorX, vectorY}, {gemv_ev});
@@ -309,6 +375,45 @@ std::pair<sycl::event, sycl::event>
     return std::make_pair(args_ev, gemv_ev);
 }
 
+std::pair<sycl::event, sycl::event>
+    gemv(sycl::queue &exec_q,
+         const dpnp::tensor::usm_ndarray &matrixA,
+         const dpnp::tensor::usm_ndarray &vectorX,
+         const dpnp::tensor::usm_ndarray &vectorY,
+         const bool transpose,
+         const std::vector<sycl::event> &depends)
+{
+    // Legacy alpha=1, beta=0 wrapper. Existing dpnp.dot callers expect
+    // this exact behaviour (N or plain T only, never conjugate-
+    // transpose), so we forward through the shared dispatch mapping
+    // the bool argument to the {0=N, 1=T} subset of the tri-state.
+    const int trans_op = transpose ? 1 : 0;
+    return gemv_dispatch(exec_q, matrixA, vectorX, vectorY, trans_op,
+                         /*alpha=*/1.0, /*beta=*/0.0, depends);
+}
+
+std::pair<sycl::event, sycl::event>
+    gemv_alpha_beta(sycl::queue &exec_q,
+                    const dpnp::tensor::usm_ndarray &matrixA,
+                    const dpnp::tensor::usm_ndarray &vectorX,
+                    const dpnp::tensor::usm_ndarray &vectorY,
+                    const int trans_op,
+                    const double alpha,
+                    const double beta,
+                    const std::vector<sycl::event> &depends)
+{
+    // Caller-supplied alpha / beta and full tri-state transpose.
+    // Used by the GMRES Arnoldi step to fuse  u -= V @ h
+    // (trans_op=0, alpha=-1, beta=1) into a single gemv kernel, and
+    // to write h = V^H @ u directly into a Hessenberg column slice
+    // (trans_op=2, alpha=1, beta=0). For complex matrices the scalars
+    // must be exactly representable as their real form -- callers
+    // pass 1/0/-1 only, see the impl-level comment for the imag-loss
+    // caveat.
+    return gemv_dispatch(exec_q, matrixA, vectorX, vectorY, trans_op, alpha,
+                         beta, depends);
+}
+
 template <typename fnT, typename varT>
 struct GemvContigFactory
 {
diff --git a/dpnp/backend/extensions/blas/gemv.hpp b/dpnp/backend/extensions/blas/gemv.hpp
index c3e1c503fde8..55f25672b0b8 100644
--- a/dpnp/backend/extensions/blas/gemv.hpp
+++ b/dpnp/backend/extensions/blas/gemv.hpp
@@ -35,6 +35,8 @@
 
 namespace dpnp::extensions::blas
 {
+// Convenience wrapper: alpha = 1, beta = 0. Preserved for the existing
+// dpnp call sites (dpnp.dot etc.) that do not care about scaling.
 extern std::pair<sycl::event, sycl::event>
     gemv(sycl::queue &exec_q,
          const dpnp::tensor::usm_ndarray &matrixA,
@@ -43,5 +45,36 @@ extern std::pair<sycl::event, sycl::event>
          const bool transpose,
          const std::vector<sycl::event> &depends);
 
+// Full y = alpha * op(A) * x + beta * y form. Required by the GMRES
+// Arnoldi step where we fuse u -= V @ h into a single gemv call
+// (alpha = -1, beta = 1), and where we write h = V^H @ u directly into
+// a Hessenberg column slice (alpha = 1, beta = 0). Both alpha and
+// beta arrive as double on the Python side and are cast to the matrix
+// value type inside the impl -- complex callers should use 1 / 0 / -1
+// (representable exactly) to avoid silent imaginary loss.
+//
+// ``trans_op`` selects the operation applied to A:
+//      0 = N  (no transpose)         y = alpha * A   @ x + beta * y
+//      1 = T  (transpose)            y = alpha * A^T @ x + beta * y
+//      2 = C  (conjugate-transpose)  y = alpha * A^H @ x + beta * y
+//
+// For real-valued A, T and C are equivalent. For complex A they
+// differ, and C is required for any algorithm that performs a
+// Hermitian inner product through gemv -- the GMRES Arnoldi step
+// (Gram-Schmidt over a complex Krylov basis) being the canonical
+// example. ``trans_op = 2`` is currently only supported for
+// F-contiguous (column-major) matrices; the row-major code path
+// for conjugate-transpose would require an explicit element-wise
+// conjugate pass and is not wired up here.
+extern std::pair<sycl::event, sycl::event>
+    gemv_alpha_beta(sycl::queue &exec_q,
+                    const dpnp::tensor::usm_ndarray &matrixA,
+                    const dpnp::tensor::usm_ndarray &vectorX,
+                    const dpnp::tensor::usm_ndarray &vectorY,
+                    const int trans_op,
+                    const double alpha,
+                    const double beta,
+                    const std::vector<sycl::event> &depends);
+
 extern void init_gemv_dispatch_vector(void);
 } // namespace dpnp::extensions::blas
diff --git a/dpnp/backend/extensions/sparse/CMakeLists.txt b/dpnp/backend/extensions/sparse/CMakeLists.txt
new file mode 100644
index 000000000000..f6f78fb67867
--- /dev/null
+++ b/dpnp/backend/extensions/sparse/CMakeLists.txt
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+set(python_module_name _sparse_impl)
+set(_module_src
+    ${CMAKE_CURRENT_SOURCE_DIR}/sparse_py.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/gemv.cpp
+)
+
+pybind11_add_module(${python_module_name} MODULE ${_module_src})
+add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+
+if(_dpnp_sycl_targets)
+    # make fat binary
+    target_compile_options(
+        ${python_module_name}
+        PRIVATE ${_dpnp_sycl_target_compile_options}
+    )
+    target_link_options(${python_module_name} PRIVATE ${_dpnp_sycl_target_link_options})
+endif()
+
+if(WIN32)
+    if(${CMAKE_VERSION} VERSION_LESS "3.27")
+        # this is a work-around for target_link_options inserting option after -link option, cause
+        # linker to ignore it.
+        set(CMAKE_CXX_LINK_FLAGS
+            "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel"
+        )
+    endif()
+endif()
+
+set_target_properties(
+    ${python_module_name}
+    PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON
+)
+
+target_include_directories(
+    ${python_module_name}
+    PRIVATE
+        ${CMAKE_CURRENT_SOURCE_DIR}/../common
+        ${CMAKE_SOURCE_DIR}/dpnp/backend/include
+        ${CMAKE_SOURCE_DIR}/dpnp/tensor/libtensor/include
+)
+
+# treat below headers as system to suppress the warnings there during the build
+target_include_directories(
+    ${python_module_name}
+    SYSTEM
+    PRIVATE
+        ${SYCL_INCLUDE_DIR}
+        ${Dpctl_INCLUDE_DIRS}
+        ${CMAKE_BINARY_DIR} # For generated Cython headers
+)
+
+if(WIN32)
+    target_compile_options(
+        ${python_module_name}
+        PRIVATE /clang:-fno-approx-func /clang:-fno-finite-math-only
+    )
+else()
+    target_compile_options(
+        ${python_module_name}
+        PRIVATE -fno-approx-func -fno-finite-math-only
+    )
+endif()
+
+target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
+
+if(DPNP_GENERATE_COVERAGE)
+    target_link_options(
+        ${python_module_name}
+        PRIVATE -fprofile-instr-generate -fcoverage-mapping
+    )
+endif()
+
+if(_ues_onemath)
+    target_link_libraries(${python_module_name} PRIVATE ${ONEMATH_LIB})
+    target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH)
+    if(_ues_onemath_cuda)
+        target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH_CUSPARSE)
+    endif()
+else()
+    target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::SPARSE)
+endif()
+
+if(DPNP_WITH_REDIST)
+    set_target_properties(
+        ${python_module_name}
+        PROPERTIES INSTALL_RPATH "$ORIGIN/../../../../../../"
+    )
+endif()
+
+install(TARGETS ${python_module_name} DESTINATION "dpnp/backend/extensions/sparse")
diff --git a/dpnp/backend/extensions/sparse/gemv.cpp b/dpnp/backend/extensions/sparse/gemv.cpp
new file mode 100644
index 000000000000..01008179b377
--- /dev/null
+++ b/dpnp/backend/extensions/sparse/gemv.cpp
@@ -0,0 +1,399 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <complex>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include <pybind11/pybind11.h>
+
+// utils extension header
+#include "ext/common.hpp"
+
+// dpnp tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/output_validation.hpp"
+#include "utils/type_utils.hpp"
+
+#include "gemv.hpp"
+#include "types_matrix.hpp"
+
+namespace dpnp::extensions::sparse
+{
+
+namespace mkl_sparse = oneapi::mkl::sparse;
+namespace py = pybind11;
+namespace type_utils = dpnp::tensor::type_utils;
+
+using ext::common::init_dispatch_table;
+
+// ---------------------------------------------------------------------------
+// Dispatch table types
+// ---------------------------------------------------------------------------
+
+/**
+ * init_impl: builds the matrix_handle, calls set_csr_data + optimize_gemv.
+ * Returns (handle_ptr, optimize_event).
+ * All CSR arrays are *not* copied -- they must stay alive until release.
+ */
+typedef std::pair<std::uintptr_t, sycl::event> (*gemv_init_fn_ptr_t)(
+    sycl::queue &,
+    oneapi::mkl::transpose,
+    const char *,       // row_ptr (typeless)
+    const char *,       // col_ind (typeless)
+    const char *,       // values  (typeless)
+    const std::int64_t, // num_rows
+    const std::int64_t, // num_cols
+    const std::int64_t, // nnz
+    const std::vector<sycl::event> &);
+
+/**
+ * compute_impl: fires sparse::gemv using a pre-built handle.
+ * Returns the gemv event directly -- no host_task wrapping.
+ */
+typedef sycl::event (*gemv_compute_fn_ptr_t)(
+    sycl::queue &,
+    oneapi::mkl::sparse::matrix_handle_t,
+    oneapi::mkl::transpose,
+    const double, // alpha (cast to Tv inside)
+    const char *, // x (typeless)
+    const double, // beta  (cast to Tv inside)
+    char *,       // y (typeless, writable)
+    const std::vector<sycl::event> &);
+
+// Init dispatch: 2-D on (Tv, Ti).
+static gemv_init_fn_ptr_t gemv_init_dispatch_table[dpnp_td_ns::num_types]
+                                                  [dpnp_td_ns::num_types];
+
+// Compute dispatch: 1-D on Tv. The index type is baked into the handle,
+// so compute doesn't need it.
+static gemv_compute_fn_ptr_t gemv_compute_dispatch_table[dpnp_td_ns::num_types];
+
+// ---------------------------------------------------------------------------
+// Per-type init implementation
+// ---------------------------------------------------------------------------
+
+template <typename Tv, typename Ti>
+static std::pair<std::uintptr_t, sycl::event>
+    gemv_init_impl(sycl::queue &exec_q,
+                   oneapi::mkl::transpose mkl_trans,
+                   const char *row_ptr_data,
+                   const char *col_ind_data,
+                   const char *values_data,
+                   std::int64_t num_rows,
+                   std::int64_t num_cols,
+                   std::int64_t nnz,
+                   const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<Tv>(exec_q);
+
+    const Ti *row_ptr = reinterpret_cast<const Ti *>(row_ptr_data);
+    const Ti *col_ind = reinterpret_cast<const Ti *>(col_ind_data);
+    const Tv *values = reinterpret_cast<const Tv *>(values_data);
+
+    mkl_sparse::matrix_handle_t spmat = nullptr;
+    mkl_sparse::init_matrix_handle(&spmat);
+
+    auto ev_set = mkl_sparse::set_csr_data(
+        exec_q, spmat, num_rows, num_cols, nnz, oneapi::mkl::index_base::zero,
+        const_cast<Ti *>(row_ptr), const_cast<Ti *>(col_ind),
+        const_cast<Tv *>(values), depends);
+
+    sycl::event ev_opt;
+    try {
+        ev_opt = mkl_sparse::optimize_gemv(exec_q, mkl_trans, spmat, {ev_set});
+    } catch (oneapi::mkl::exception const &e) {
+        mkl_sparse::release_matrix_handle(exec_q, &spmat, {});
+        throw std::runtime_error(
+            std::string("sparse_gemv_init: MKL exception in optimize_gemv: ") +
+            e.what());
+    } catch (sycl::exception const &e) {
+        mkl_sparse::release_matrix_handle(exec_q, &spmat, {});
+        throw std::runtime_error(
+            std::string("sparse_gemv_init: SYCL exception in optimize_gemv: ") +
+            e.what());
+    }
+
+    auto handle_ptr = reinterpret_cast<std::uintptr_t>(spmat);
+    return {handle_ptr, ev_opt};
+}
+
+// ---------------------------------------------------------------------------
+// Per-type compute implementation
+// ---------------------------------------------------------------------------
+
+template <typename Tv>
+static sycl::event gemv_compute_impl(sycl::queue &exec_q,
+                                     mkl_sparse::matrix_handle_t spmat,
+                                     oneapi::mkl::transpose mkl_trans,
+                                     double alpha_d,
+                                     const char *x_data,
+                                     double beta_d,
+                                     char *y_data,
+                                     const std::vector<sycl::event> &depends)
+{
+    // For complex Tv the single-arg constructor sets imag to zero.
+    // Solvers use alpha=1, beta=0 so this is exact; other callers
+    // passing complex scalars via this path will lose the imag
+    // component silently.
+    const Tv alpha = static_cast<Tv>(alpha_d);
+    const Tv beta = static_cast<Tv>(beta_d);
+
+    const Tv *x = reinterpret_cast<const Tv *>(x_data);
+    Tv *y = reinterpret_cast<Tv *>(y_data);
+
+    try {
+        return mkl_sparse::gemv(exec_q, mkl_trans, alpha, spmat, x, beta, y,
+                                depends);
+    } catch (oneapi::mkl::exception const &e) {
+        throw std::runtime_error(
+            std::string("sparse_gemv_compute: MKL exception: ") + e.what());
+    } catch (sycl::exception const &e) {
+        throw std::runtime_error(
+            std::string("sparse_gemv_compute: SYCL exception: ") + e.what());
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Public entry points
+// ---------------------------------------------------------------------------
+
+static oneapi::mkl::transpose decode_trans(const int trans)
+{
+    switch (trans) {
+    case 0:
+        return oneapi::mkl::transpose::nontrans;
+    case 1:
+        return oneapi::mkl::transpose::trans;
+    case 2:
+        return oneapi::mkl::transpose::conjtrans;
+    default:
+        throw std::invalid_argument(
+            "sparse_gemv: trans must be 0 (N), 1 (T), or 2 (C)");
+    }
+}
+
+std::tuple<std::uintptr_t, int, sycl::event>
+    sparse_gemv_init(sycl::queue &exec_q,
+                     const int trans,
+                     const dpnp::tensor::usm_ndarray &row_ptr,
+                     const dpnp::tensor::usm_ndarray &col_ind,
+                     const dpnp::tensor::usm_ndarray &values,
+                     const std::int64_t num_rows,
+                     const std::int64_t num_cols,
+                     const std::int64_t nnz,
+                     const std::vector<sycl::event> &depends)
+{
+    if (!dpctl::utils::queues_are_compatible(
+            exec_q,
+            {row_ptr.get_queue(), col_ind.get_queue(), values.get_queue()}))
+        throw py::value_error(
+            "sparse_gemv_init: USM allocations are not compatible with the "
+            "execution queue.");
+
+    // Basic CSR shape sanity.
+    if (row_ptr.get_ndim() != 1 || col_ind.get_ndim() != 1 ||
+        values.get_ndim() != 1)
+        throw py::value_error(
+            "sparse_gemv_init: row_ptr, col_ind, values must all be 1-D.");
+
+    if (row_ptr.get_shape(0) != num_rows + 1)
+        throw py::value_error(
+            "sparse_gemv_init: row_ptr length must equal num_rows + 1.");
+
+    if (col_ind.get_shape(0) != nnz || values.get_shape(0) != nnz)
+        throw py::value_error(
+            "sparse_gemv_init: col_ind and values length must equal nnz.");
+
+    // Index types of row_ptr and col_ind must match.
+    if (row_ptr.get_typenum() != col_ind.get_typenum())
+        throw py::value_error(
+            "sparse_gemv_init: row_ptr and col_ind must have the same dtype.");
+
+    auto mkl_trans = decode_trans(trans);
+
+    auto array_types = dpnp_td_ns::usm_ndarray_types();
+    const int val_id = array_types.typenum_to_lookup_id(values.get_typenum());
+    const int idx_id = array_types.typenum_to_lookup_id(row_ptr.get_typenum());
+
+    gemv_init_fn_ptr_t init_fn = gemv_init_dispatch_table[val_id][idx_id];
+    if (init_fn == nullptr)
+        throw py::value_error(
+            "sparse_gemv_init: no implementation for the given value/index "
+            "dtype combination. Supported: {float32,float64,complex64,"
+            "complex128} x {int32,int64}.");
+
+    auto [handle_ptr, ev_opt] =
+        init_fn(exec_q, mkl_trans, row_ptr.get_data(), col_ind.get_data(),
+                values.get_data(), num_rows, num_cols, nnz, depends);
+
+    return {handle_ptr, val_id, ev_opt};
+}
+
+sycl::event sparse_gemv_compute(sycl::queue &exec_q,
+                                const std::uintptr_t handle_ptr,
+                                const int val_type_id,
+                                const int trans,
+                                const double alpha,
+                                const dpnp::tensor::usm_ndarray &x,
+                                const double beta,
+                                const dpnp::tensor::usm_ndarray &y,
+                                const std::int64_t num_rows,
+                                const std::int64_t num_cols,
+                                const std::vector<sycl::event> &depends)
+{
+    if (x.get_ndim() != 1)
+        throw py::value_error("sparse_gemv_compute: x must be a 1-D array.");
+    if (y.get_ndim() != 1)
+        throw py::value_error("sparse_gemv_compute: y must be a 1-D array.");
+
+    if (!dpctl::utils::queues_are_compatible(exec_q,
+                                             {x.get_queue(), y.get_queue()}))
+        throw py::value_error(
+            "sparse_gemv_compute: USM allocations are not compatible with the "
+            "execution queue.");
+
+    auto const &overlap = dpnp::tensor::overlap::MemoryOverlap();
+    if (overlap(x, y))
+        throw py::value_error(
+            "sparse_gemv_compute: x and y are overlapping memory segments.");
+
+    dpnp::tensor::validation::CheckWritable::throw_if_not_writable(y);
+
+    // Shape validation: op(A) is (num_rows, num_cols) for trans=N,
+    // (num_cols, num_rows) for trans={T,C}.
+    auto mkl_trans = decode_trans(trans);
+    const bool is_non_trans = (mkl_trans == oneapi::mkl::transpose::nontrans);
+    const std::int64_t op_rows = is_non_trans ? num_rows : num_cols;
+    const std::int64_t op_cols = is_non_trans ? num_cols : num_rows;
+
+    if (x.get_shape(0) != op_cols)
+        throw py::value_error(
+            "sparse_gemv_compute: x length does not match operator columns.");
+    if (y.get_shape(0) != op_rows)
+        throw py::value_error(
+            "sparse_gemv_compute: y length does not match operator rows.");
+
+    dpnp::tensor::validation::AmpleMemory::throw_if_not_ample(
+        y, static_cast<std::size_t>(op_rows));
+
+    // Dtype verification: x, y, and the handle's value type must all match.
+    auto array_types = dpnp_td_ns::usm_ndarray_types();
+    const int x_val_id = array_types.typenum_to_lookup_id(x.get_typenum());
+    const int y_val_id = array_types.typenum_to_lookup_id(y.get_typenum());
+
+    if (x_val_id != val_type_id || y_val_id != val_type_id)
+        throw py::value_error(
+            "sparse_gemv_compute: x and y dtype must match the value dtype "
+            "of the sparse matrix used to build the handle.");
+
+    if (val_type_id < 0 || val_type_id >= dpnp_td_ns::num_types)
+        throw py::value_error("sparse_gemv_compute: val_type_id out of range.");
+
+    gemv_compute_fn_ptr_t compute_fn = gemv_compute_dispatch_table[val_type_id];
+
+    if (compute_fn == nullptr)
+        throw py::value_error("sparse_gemv_compute: unsupported value dtype.");
+
+    auto spmat = reinterpret_cast<mkl_sparse::matrix_handle_t>(handle_ptr);
+
+    return compute_fn(exec_q, spmat, mkl_trans, alpha, x.get_data(), beta,
+                      const_cast<char *>(y.get_data()), depends);
+}
+
+sycl::event sparse_gemv_release(sycl::queue &exec_q,
+                                const std::uintptr_t handle_ptr,
+                                const std::vector<sycl::event> &depends)
+{
+    auto spmat = reinterpret_cast<mkl_sparse::matrix_handle_t>(handle_ptr);
+
+    // release_matrix_handle takes `depends` so it will not free the handle
+    // until all pending compute work on it has completed. In recent oneMKL
+    // versions release_matrix_handle returns a sycl::event; older versions
+    // returned void. If your pinned oneMKL returns void, replace the body
+    // with:
+    //     mkl_sparse::release_matrix_handle(exec_q, &spmat, depends);
+    //     return exec_q.submit([&](sycl::handler &cgh) {
+    //         cgh.depends_on(depends);
+    //         cgh.host_task([]() {});
+    //     });
+    sycl::event release_ev =
+        mkl_sparse::release_matrix_handle(exec_q, &spmat, depends);
+
+    return release_ev;
+}
+
+// ---------------------------------------------------------------------------
+// Dispatch table factories and registration
+// ---------------------------------------------------------------------------
+
+template <typename fnT, typename Tv, typename Ti>
+struct GemvInitContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::SparseGemvInitTypePairSupportFactory<
+                          Tv, Ti>::is_defined)
+            return gemv_init_impl<Tv, Ti>;
+        else
+            return nullptr;
+    }
+};
+
+template <typename fnT, typename Tv>
+struct GemvComputeContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::SparseGemvComputeTypeSupportFactory<
+                          Tv>::is_defined)
+            return gemv_compute_impl<Tv>;
+        else
+            return nullptr;
+    }
+};
+
+void init_sparse_gemv_dispatch_tables(void)
+{
+    // 2-D table on (Tv, Ti) for init.
+    init_dispatch_table<gemv_init_fn_ptr_t, GemvInitContigFactory>(
+        gemv_init_dispatch_table);
+
+    // 1-D table on Tv for compute. dpctl's type dispatch headers expose
+    // DispatchVectorBuilder as the 1-D analogue of DispatchTableBuilder.
+    dpnp_td_ns::DispatchVectorBuilder<
+        gemv_compute_fn_ptr_t, GemvComputeContigFactory, dpnp_td_ns::num_types>
+        builder;
+    builder.populate_dispatch_vector(gemv_compute_dispatch_table);
+}
+
+} // namespace dpnp::extensions::sparse
diff --git a/dpnp/backend/extensions/sparse/gemv.hpp b/dpnp/backend/extensions/sparse/gemv.hpp
new file mode 100644
index 000000000000..f6c05b308656
--- /dev/null
+++ b/dpnp/backend/extensions/sparse/gemv.hpp
@@ -0,0 +1,126 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <oneapi/mkl.hpp>
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+
+namespace dpnp::extensions::sparse
+{
+
+/**
+ * sparse_gemv_init -- ONE-TIME setup per sparse matrix operator.
+ *
+ * Calls init_matrix_handle + set_csr_data + optimize_gemv.
+ *
+ * Returns a tuple of:
+ *   - handle_ptr:   opaque matrix_handle_t cast to uintptr_t for safe
+ *                   Python round-tripping.
+ *   - val_type_id:  the dpctl typenum lookup id of the value dtype Tv.
+ *                   Python MUST pass this back to sparse_gemv_compute so
+ *                   the C++ layer can verify that x and y dtype match the
+ *                   handle's value type.
+ *   - event:        dependency event from optimize_gemv; the caller must
+ *                   wait on it (or chain via depends) before the first
+ *                   sparse_gemv_compute call.
+ *
+ * LIFETIME CONTRACT -- IMPORTANT:
+ * The handle owns NO copies of the CSR arrays. The caller MUST keep
+ * row_ptr, col_ind, and values USM allocations alive until
+ * sparse_gemv_release has been called AND its returned event has
+ * completed. Dropping any of them earlier is undefined behavior and
+ * will produce silent memory corruption -- there is no runtime check.
+ *
+ * The Python wrapper (_CachedSpMV) enforces this contract by holding
+ * a reference to the CSR matrix for the lifetime of the handle.
+ */
+extern std::tuple<std::uintptr_t, int, sycl::event>
+    sparse_gemv_init(sycl::queue &exec_q,
+                     const int trans,
+                     const dpnp::tensor::usm_ndarray &row_ptr,
+                     const dpnp::tensor::usm_ndarray &col_ind,
+                     const dpnp::tensor::usm_ndarray &values,
+                     const std::int64_t num_rows,
+                     const std::int64_t num_cols,
+                     const std::int64_t nnz,
+                     const std::vector<sycl::event> &depends);
+
+/**
+ * sparse_gemv_compute -- PER-ITERATION SpMV.
+ *
+ * Calls only oneapi::mkl::sparse::gemv using the pre-built handle.
+ * Verifies that:
+ *   - x and y are 1-D usm_ndarrays on a queue compatible with exec_q
+ *   - x and y dtype match val_type_id (the handle's value type)
+ *   - x and y shapes match op(A) dimensions, taking trans into account
+ *     (op(A) is num_rows x num_cols for trans=N, num_cols x num_rows
+ *     for trans={T,C})
+ *   - y is writable and does not overlap x
+ *
+ * alpha and beta are passed as double and cast inside gemv_compute_impl
+ * to the matrix value type. For complex Tv the cast drops the imaginary
+ * part; callers needing complex scalars should keep alpha=1, beta=0
+ * (the solver use case).
+ *
+ * Returns the gemv event. The caller is responsible for sequencing
+ * subsequent work on the same queue; no host-side wait or host_task
+ * keep-alive is performed.
+ */
+extern sycl::event sparse_gemv_compute(sycl::queue &exec_q,
+                                       const std::uintptr_t handle_ptr,
+                                       const int val_type_id,
+                                       const int trans,
+                                       const double alpha,
+                                       const dpnp::tensor::usm_ndarray &x,
+                                       const double beta,
+                                       const dpnp::tensor::usm_ndarray &y,
+                                       const std::int64_t num_rows,
+                                       const std::int64_t num_cols,
+                                       const std::vector<sycl::event> &depends);
+
+/**
+ * sparse_gemv_release -- free the matrix_handle created by sparse_gemv_init.
+ *
+ * Must be called exactly once per handle, after all compute calls that
+ * depend on it have completed. The returned event depends on the release,
+ * so the caller can chain CSR buffer deallocation on it safely.
+ */
+extern sycl::event sparse_gemv_release(sycl::queue &exec_q,
+                                       const std::uintptr_t handle_ptr,
+                                       const std::vector<sycl::event> &depends);
+
+/**
+ * Register the init (2-D on Tv x Ti) and compute (1-D on Tv) dispatch
+ * tables. Called exactly once from PYBIND11_MODULE.
+ */
+extern void init_sparse_gemv_dispatch_tables(void);
+
+} // namespace dpnp::extensions::sparse
diff --git a/dpnp/backend/extensions/sparse/sparse_py.cpp b/dpnp/backend/extensions/sparse/sparse_py.cpp
new file mode 100644
index 000000000000..4fe69bc9e0a9
--- /dev/null
+++ b/dpnp/backend/extensions/sparse/sparse_py.cpp
@@ -0,0 +1,149 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <cstdint>
+#include <tuple>
+#include <vector>
+
+#include "gemv.hpp"
+
+namespace py = pybind11;
+
+using dpnp::extensions::sparse::init_sparse_gemv_dispatch_tables;
+using dpnp::extensions::sparse::sparse_gemv_compute;
+using dpnp::extensions::sparse::sparse_gemv_init;
+using dpnp::extensions::sparse::sparse_gemv_release;
+
+PYBIND11_MODULE(_sparse_impl, m)
+{
+    init_sparse_gemv_dispatch_tables();
+
+    // ------------------------------------------------------------------
+    // _using_onemath()
+    //
+    // Reports whether the module was compiled against the portable
+    // OneMath interface (USE_ONEMATH) rather than direct oneMKL.
+    // ------------------------------------------------------------------
+    m.def("_using_onemath", []() -> bool {
+#ifdef USE_ONEMATH
+        return true;
+#else
+        return false;
+#endif
+    });
+
+    // ------------------------------------------------------------------
+    // _sparse_gemv_init(exec_q, trans, row_ptr, col_ind, values,
+    //                   num_rows, num_cols, nnz, depends)
+    //     -> (handle: int, val_type_id: int, event)
+    //
+    // Calls init_matrix_handle + set_csr_data + optimize_gemv ONCE.
+    //
+    // The returned handle is an opaque uintptr_t; val_type_id is the
+    // dpctl typenum lookup id of the matrix value dtype and MUST be
+    // passed back to _sparse_gemv_compute so the C++ layer can verify
+    // that x and y dtype match the handle.
+    //
+    // LIFETIME CONTRACT: the caller must keep row_ptr / col_ind / values
+    // USM allocations alive until _sparse_gemv_release has been called
+    // AND its returned event has completed. The handle does not copy
+    // the CSR arrays.
+    // ------------------------------------------------------------------
+    m.def(
+        "_sparse_gemv_init",
+        [](sycl::queue &exec_q, const int trans,
+           const dpnp::tensor::usm_ndarray &row_ptr,
+           const dpnp::tensor::usm_ndarray &col_ind,
+           const dpnp::tensor::usm_ndarray &values, const std::int64_t num_rows,
+           const std::int64_t num_cols, const std::int64_t nnz,
+           const std::vector<sycl::event> &depends)
+            -> std::tuple<std::uintptr_t, int, sycl::event> {
+            return sparse_gemv_init(exec_q, trans, row_ptr, col_ind, values,
+                                    num_rows, num_cols, nnz, depends);
+        },
+        py::arg("exec_q"), py::arg("trans"), py::arg("row_ptr"),
+        py::arg("col_ind"), py::arg("values"), py::arg("num_rows"),
+        py::arg("num_cols"), py::arg("nnz"), py::arg("depends"),
+        "Initialise oneMKL sparse matrix handle "
+        "(set_csr_data + optimize_gemv). "
+        "Returns (handle_ptr: int, val_type_id: int, event). "
+        "Call once per operator.");
+
+    // ------------------------------------------------------------------
+    // _sparse_gemv_compute(exec_q, handle, val_type_id, trans, alpha,
+    //                     x, beta, y, num_rows, num_cols, depends)
+    //     -> gemv_event
+    //
+    // Fires sparse::gemv using a pre-built handle. Verifies x and y
+    // dtype match val_type_id from init, and that shapes agree with
+    // op(A) dimensions (swapped for trans != N).
+    //
+    // Only the cheap MKL kernel is dispatched; no analysis overhead.
+    // No host_task keep-alive is submitted -- pybind11 refcounts the
+    // usm_ndarrays across the call, and sequencing of subsequent work
+    // on the same queue happens automatically.
+    // ------------------------------------------------------------------
+    m.def(
+        "_sparse_gemv_compute",
+        [](sycl::queue &exec_q, const std::uintptr_t handle_ptr,
+           const int val_type_id, const int trans, const double alpha,
+           const dpnp::tensor::usm_ndarray &x, const double beta,
+           const dpnp::tensor::usm_ndarray &y, const std::int64_t num_rows,
+           const std::int64_t num_cols,
+           const std::vector<sycl::event> &depends) -> sycl::event {
+            return sparse_gemv_compute(exec_q, handle_ptr, val_type_id, trans,
+                                       alpha, x, beta, y, num_rows, num_cols,
+                                       depends);
+        },
+        py::arg("exec_q"), py::arg("handle"), py::arg("val_type_id"),
+        py::arg("trans"), py::arg("alpha"), py::arg("x"), py::arg("beta"),
+        py::arg("y"), py::arg("num_rows"), py::arg("num_cols"),
+        py::arg("depends"),
+        "Execute sparse::gemv using a pre-built handle. "
+        "Returns the gemv event.");
+
+    // ------------------------------------------------------------------
+    // _sparse_gemv_release(exec_q, handle, depends) -> event
+    //
+    // Releases the matrix_handle allocated by _sparse_gemv_init.
+    // Must be called exactly once per handle after all compute calls
+    // referencing it have completed. The returned event depends on the
+    // release, so callers can chain CSR buffer deallocation on it.
+    // ------------------------------------------------------------------
+    m.def(
+        "_sparse_gemv_release",
+        [](sycl::queue &exec_q, const std::uintptr_t handle_ptr,
+           const std::vector<sycl::event> &depends) -> sycl::event {
+            return sparse_gemv_release(exec_q, handle_ptr, depends);
+        },
+        py::arg("exec_q"), py::arg("handle"), py::arg("depends"),
+        "Release the oneMKL matrix_handle created by _sparse_gemv_init.");
+}
diff --git a/dpnp/backend/extensions/sparse/types_matrix.hpp b/dpnp/backend/extensions/sparse/types_matrix.hpp
new file mode 100644
index 000000000000..351d4ff11830
--- /dev/null
+++ b/dpnp/backend/extensions/sparse/types_matrix.hpp
@@ -0,0 +1,122 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <complex>
+#include <cstdint>
+#include <type_traits>
+
+// dpnp tensor headers
+#include "utils/type_dispatch.hpp"
+
+// namespace for operations with types
+namespace dpnp_td_ns = dpnp::tensor::type_dispatch;
+
+namespace dpnp::extensions::sparse::types
+{
+
+/**
+ * @brief Factory encoding the supported (value type, index type) combinations
+ *        for oneapi::mkl::sparse::gemv initialization.
+ *
+ * oneMKL sparse BLAS supports:
+ *   - float32          with int32 indices
+ *   - float32          with int64 indices
+ *   - float64          with int32 indices
+ *   - float64          with int64 indices
+ *   - complex<float>   with int32 indices
+ *   - complex<float>   with int64 indices
+ *   - complex<double>  with int32 indices
+ *   - complex<double>  with int64 indices
+ *
+ * Complex support requires oneMKL >= 2023.x (sparse BLAS complex USM API).
+ * The init dispatch table entry is non-null only when the pair is registered
+ * here; the Python layer falls back to A.dot(x) when the entry is nullptr.
+ *
+ * @tparam Tv  Value type of the sparse matrix and dense vectors.
+ * @tparam Ti  Index type of the sparse matrix (row_ptr / col_ind arrays).
+ */
+template <typename Tv, typename Ti>
+struct SparseGemvInitTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        // real single precision
+        dpnp_td_ns::TypePairDefinedEntry<Tv, float, Ti, std::int32_t>,
+        dpnp_td_ns::TypePairDefinedEntry<Tv, float, Ti, std::int64_t>,
+        // real double precision
+        dpnp_td_ns::TypePairDefinedEntry<Tv, double, Ti, std::int32_t>,
+        dpnp_td_ns::TypePairDefinedEntry<Tv, double, Ti, std::int64_t>,
+        // complex single precision
+        dpnp_td_ns::
+            TypePairDefinedEntry<Tv, std::complex<float>, Ti, std::int32_t>,
+        dpnp_td_ns::
+            TypePairDefinedEntry<Tv, std::complex<float>, Ti, std::int64_t>,
+        // complex double precision
+        dpnp_td_ns::
+            TypePairDefinedEntry<Tv, std::complex<double>, Ti, std::int32_t>,
+        dpnp_td_ns::
+            TypePairDefinedEntry<Tv, std::complex<double>, Ti, std::int64_t>,
+        // fall-through
+        dpnp_td_ns::NotDefinedEntry>::is_defined;
+};
+
+/**
+ * @brief Factory encoding supported value types for sparse::gemv compute.
+ *
+ * The compute path only requires Tv because the index type is baked into
+ * the matrix_handle at init time. Using a 1-D dispatch vector on Tv avoids
+ * the wasted num_types * num_types slots of a 2-D table where only the
+ * diagonal (keyed on Ti) would ever be populated.
+ *
+ * If your pinned dpctl version does not expose TypeDefinedEntry as a 1-arg
+ * entry, fall back to the std::is_same_v expansion shown in the comment
+ * below -- both are equivalent.
+ *
+ * @tparam Tv  Value type of the sparse matrix and dense vectors.
+ */
+template <typename Tv>
+struct SparseGemvComputeTypeSupportFactory
+{
+#if defined(DPCTL_HAS_TYPE_DEFINED_ENTRY)
+    static constexpr bool
+        is_defined = std::disjunction dpnp_td_ns::TypeDefinedEntry<Tv, float>,
+        dpnp_td_ns::TypeDefinedEntry<Tv, double>,
+        dpnp_td_ns::TypeDefinedEntry<Tv, std::complex<float>>,
+        dpnp_td_ns::TypeDefinedEntry<Tv, std::complex<double>>,
+        dpnp_td_ns::NotDefinedEntry > ::is_defined;
+#else
+    // Portable fallback: works with any dpctl version.
+    static constexpr bool is_defined =
+        std::is_same_v<Tv, float> || std::is_same_v<Tv, double> ||
+        std::is_same_v<Tv, std::complex<float>> ||
+        std::is_same_v<Tv, std::complex<double>>;
+#endif
+};
+
+} // namespace dpnp::extensions::sparse::types
diff --git a/dpnp/scipy/__init__.py b/dpnp/scipy/__init__.py
index 56cf27f56342..ceb1f9df932e 100644
--- a/dpnp/scipy/__init__.py
+++ b/dpnp/scipy/__init__.py
@@ -36,6 +36,6 @@
 DPNP functionality, reusing DPNP and oneMKL implementations underneath.
 """
 
-from . import linalg, special
+from . import linalg, sparse, special
 
-__all__ = ["linalg", "special"]
+__all__ = ["linalg", "special", "sparse"]
diff --git a/dpnp/scipy/sparse/__init__.py b/dpnp/scipy/sparse/__init__.py
new file mode 100644
index 000000000000..a80ce67c5e0e
--- /dev/null
+++ b/dpnp/scipy/sparse/__init__.py
@@ -0,0 +1,44 @@
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+"""Sparse linear algebra namespace for DPNP.
+
+Currently this module exposes the :mod:`dpnp.scipy.sparse.linalg` submodule
+and provides a location for future sparse matrix container types.
+"""
+
+from . import linalg
+from ._base import SparseABC, issparse
+from ._csr import csr_matrix
+
+__all__ = [
+    "linalg",
+    "SparseABC",
+    "issparse",
+    "csr_matrix",
+]
diff --git a/dpnp/scipy/sparse/_base.py b/dpnp/scipy/sparse/_base.py
new file mode 100644
index 000000000000..9a4af969f35d
--- /dev/null
+++ b/dpnp/scipy/sparse/_base.py
@@ -0,0 +1,27 @@
+"""Sparse base class and predicate, mirroring scipy/_lib/_sparse.py.
+
+Only the modern ``issparse`` predicate is exposed. The legacy
+``isspmatrix`` / ``isspmatrix_csr`` family (kept by SciPy for the
+``spmatrix`` vs ``sparray`` discriminator and slated for deprecation,
+see :mod:`scipy.sparse`) is intentionally omitted -- dpnp has no
+``spmatrix`` / ``sparray`` split, so the legacy names would have no
+useful semantics. Format-specific checks should use
+``issparse(A) and A.format == "csr"`` directly, which is what the
+solver fast-path already does.
+"""
+
+from abc import ABC
+
+
+# pylint: disable-next=too-few-public-methods
+class SparseABC(ABC):
+    """Abstract base for all dpnp.scipy.sparse format classes."""
+
+
+def issparse(x):
+    """Return True if x is a dpnp sparse matrix.
+
+    Mirrors :func:`scipy.sparse.issparse` semantics: returns True for any
+    instance of :class:`SparseABC`, False otherwise.
+    """
+    return isinstance(x, SparseABC)
diff --git a/dpnp/scipy/sparse/_csr.py b/dpnp/scipy/sparse/_csr.py
new file mode 100644
index 000000000000..98eb9d3fbdaf
--- /dev/null
+++ b/dpnp/scipy/sparse/_csr.py
@@ -0,0 +1,452 @@
+"""CSR matrix backed by dpnp/USM arrays.
+
+Minimal implementation supporting the operations exercised by
+dpnp.scipy.sparse.linalg solvers (cg, gmres, minres) and
+LinearOperator. Construction from dense arrays or raw CSR
+components; ``dot`` routed through oneMKL ``sparse::gemv`` when the
+compiled backend extension is available, with a dense fallback used
+when it is not.
+
+Operations not required by the solvers (arithmetic, format
+conversion, element-wise math, reductions, indexing) are
+intentionally not implemented in this initial version.
+
+SpMV fast path
+--------------
+On first ``.dot(x)`` with a 1-D ``x`` of a supported dtype, the
+instance lazily allocates an oneMKL ``matrix_handle`` via
+``_sparse_gemv_init`` (which itself runs ``set_csr_data`` plus
+``optimize_gemv`` -- the expensive sparsity-analysis phase). The
+handle is cached on the instance and reused for every subsequent
+matvec; ``__del__`` releases it. This matches the cupyx behaviour
+where ``csr_matrix.dot`` calls cuSPARSE SpMV directly without
+densification, and lets the iterative solvers in
+``dpnp.scipy.sparse.linalg`` reuse the same handle through
+``_make_fast_matvec`` without rebuilding it.
+"""
+
+from __future__ import annotations
+
+import numpy as _np
+
+import dpnp as _dpnp
+
+from ._base import SparseABC
+
+# Two short blocks intentionally mirror code in
+# dpnp/scipy/sparse/linalg/_iterative.py: the cached-SpMV invocation
+# and the __del__ shutdown-safe release pattern. Both are tightly
+# coupled to oneMKL's contract; extracting a shared helper would add
+# indirection without reducing real duplication.
+# pylint: disable=duplicate-code
+
+# Value dtypes the oneMKL sparse::gemv dispatch table registers
+# (see dpnp/backend/extensions/sparse/types_matrix.hpp). Anything
+# outside this set must take the dense fallback in ``dot``.
+_SPMV_VALUE_DTYPES = frozenset("fdFD")
+# Index dtypes oneMKL accepts (int32, int64). Matches the second
+# dimension of SparseGemvInitTypePairSupportFactory.
+_SPMV_INDEX_DTYPES = frozenset("ilq")
+
+
+# pylint: disable=invalid-name,too-many-instance-attributes
+# The class name ``csr_matrix`` is the public scipy/cupy API spelling and
+# must stay lowercase. The instance-attribute count exceeds the default
+# pylint cap because the lazily-built oneMKL handle adds four cache
+# fields (handle, val_type_id, si, exec_q) on top of the CSR triple +
+# shape; all are required.
+class csr_matrix(SparseABC):
+    """Compressed Sparse Row matrix on a SYCL device.
+
+    Construction
+    ------------
+    csr_matrix(D)
+        from a 2-D dpnp.ndarray.
+
+    csr_matrix((data, indices, indptr), shape=(M, N))
+        from raw CSR component arrays (1-D dpnp arrays on the same
+        SYCL queue).
+
+    csr_matrix(other_csr)
+        copy of another csr_matrix.
+
+    Attributes
+    ----------
+    data : dpnp.ndarray
+        1-D array of nonzero values, shape (nnz,).
+    indices : dpnp.ndarray
+        1-D array of column indices, shape (nnz,).
+    indptr : dpnp.ndarray
+        1-D array of row pointers, shape (M+1,).
+    shape : tuple of int
+    dtype : dpnp dtype
+    nnz : int
+    format : str
+        Always 'csr'.
+    ndim : int
+        Always 2.
+    """
+
+    format = "csr"
+    ndim = 2
+
+    def __init__(self, arg1, shape=None, dtype=None, copy=False):
+        # Lazy SpMV handle state. Assigned BEFORE the dispatch below so
+        # that __del__ never sees a partially-constructed object (it can
+        # be invoked if any of the _init_* helpers raise).
+        self._spmv_handle = None
+        self._spmv_val_type_id = -1
+        self._spmv_si = None
+        self._spmv_exec_q = None
+
+        if isinstance(arg1, _dpnp.ndarray):
+            self._init_from_dense(arg1, dtype=dtype)
+        elif isinstance(arg1, csr_matrix):
+            self._init_from_components(
+                (arg1.data, arg1.indices, arg1.indptr),
+                arg1.shape,
+                dtype=dtype if dtype is not None else arg1.dtype,
+                copy=True,
+            )
+        elif isinstance(arg1, tuple) and len(arg1) == 3:
+            if shape is None:
+                raise ValueError(
+                    "csr_matrix: shape must be provided when constructing "
+                    "from (data, indices, indptr) components"
+                )
+            self._init_from_components(arg1, shape, dtype=dtype, copy=copy)
+        else:
+            raise TypeError(
+                f"csr_matrix: cannot construct from {type(arg1).__name__}; "
+                "supported forms are a 2-D dpnp.ndarray, another csr_matrix, "
+                "or a (data, indices, indptr) tuple with shape= kwarg."
+            )
+
+    def _init_from_components(self, arrays, shape, dtype=None, copy=False):
+        data, indices, indptr = arrays
+
+        if not (
+            isinstance(data, _dpnp.ndarray)
+            and isinstance(indices, _dpnp.ndarray)
+            and isinstance(indptr, _dpnp.ndarray)
+        ):
+            raise TypeError(
+                "csr_matrix: data, indices, and indptr must be dpnp arrays"
+            )
+        if data.ndim != 1 or indices.ndim != 1 or indptr.ndim != 1:
+            raise ValueError(
+                "csr_matrix: data, indices, and indptr must be 1-D"
+            )
+        if data.shape[0] != indices.shape[0]:
+            raise ValueError(
+                f"csr_matrix: data length {data.shape[0]} != "
+                f"indices length {indices.shape[0]}"
+            )
+
+        nrows, ncols = int(shape[0]), int(shape[1])
+        if indptr.shape[0] != nrows + 1:
+            raise ValueError(
+                f"csr_matrix: indptr length {indptr.shape[0]} != "
+                f"nrows+1 ({nrows + 1})"
+            )
+
+        q = data.sycl_queue
+        if indices.sycl_queue != q or indptr.sycl_queue != q:
+            raise ValueError(
+                "csr_matrix: data, indices, and indptr must be on the same "
+                "SYCL queue"
+            )
+
+        idx_char = _np.dtype(indices.dtype).char
+        if idx_char not in ("i", "l", "q"):
+            raise TypeError(
+                f"csr_matrix: indices dtype must be int32 or int64, "
+                f"got {indices.dtype}"
+            )
+        if _np.dtype(indptr.dtype).char != idx_char:
+            raise TypeError(
+                f"csr_matrix: indptr dtype ({indptr.dtype}) must match "
+                f"indices dtype ({indices.dtype})"
+            )
+
+        if dtype is not None and _np.dtype(dtype) != _np.dtype(data.dtype):
+            data = data.astype(dtype, copy=True)
+        elif copy:
+            data = data.copy()
+            indices = indices.copy()
+            indptr = indptr.copy()
+
+        self.data = data
+        self.indices = indices
+        self.indptr = indptr
+        self._shape = (nrows, ncols)
+
+    def _init_from_dense(self, dense, dtype=None):
+        if dense.ndim != 2:
+            raise ValueError(
+                f"csr_matrix: dense input must be 2-D, got {dense.ndim}-D"
+            )
+        if dtype is not None:
+            dense = dense.astype(dtype, copy=False)
+
+        nrows, ncols = dense.shape
+        q = dense.sycl_queue
+
+        rows, cols = _dpnp.nonzero(dense)
+        nnz = int(rows.shape[0])
+
+        if nnz == 0:
+            self.data = _dpnp.empty(0, dtype=dense.dtype, sycl_queue=q)
+            self.indices = _dpnp.empty(0, dtype=_dpnp.int64, sycl_queue=q)
+            self.indptr = _dpnp.zeros(
+                nrows + 1, dtype=_dpnp.int64, sycl_queue=q
+            )
+            self._shape = (nrows, ncols)
+            return
+
+        values = dense[rows, cols]
+        idx_dtype = _dpnp.int64
+        row_counts = _dpnp.bincount(rows.astype(idx_dtype), minlength=nrows)
+        indptr = _dpnp.empty(nrows + 1, dtype=idx_dtype, sycl_queue=q)
+        indptr[0] = 0
+        indptr[1:] = _dpnp.cumsum(row_counts)
+
+        self.data = values
+        self.indices = cols.astype(idx_dtype)
+        self.indptr = indptr
+        self._shape = (nrows, ncols)
+
+    # --- read-only properties ------------------------------------------
+
+    @property
+    def shape(self):
+        """Tuple of matrix dimensions ``(M, N)``."""
+        return self._shape
+
+    @property
+    def dtype(self):
+        """Data type of stored values."""
+        return self.data.dtype
+
+    @property
+    def nnz(self):
+        """Number of stored nonzero entries."""
+        return int(self.data.shape[0])
+
+    @property
+    def size(self):
+        """Alias for ``nnz`` (number of stored entries)."""
+        return self.nnz
+
+    @property
+    # pylint: disable-next=invalid-name
+    def T(self):
+        """Transpose. Materializes via toarray() since CSC isn't implemented."""
+        return csr_matrix(self.toarray().T)
+
+    # --- SpMV fast-path internals --------------------------------------
+
+    def _spmv_supported(self):
+        """True iff value and index dtypes are in the oneMKL dispatch table."""
+        return (
+            _np.dtype(self.data.dtype).char in _SPMV_VALUE_DTYPES
+            and _np.dtype(self.indices.dtype).char in _SPMV_INDEX_DTYPES
+        )
+
+    def _ensure_spmv_handle(self):
+        """Lazily build the cached oneMKL matrix_handle for forward SpMV.
+
+        Returns the ``(si, handle, val_type_id, exec_q)`` quadruple so
+        callers can drive ``_sparse_gemv_compute`` directly. Returns
+        ``None`` if the compiled backend extension is unavailable, the
+        dtype combination is unsupported, or handle construction fails
+        for any backend-specific reason (in which case the caller must
+        fall back to a dense path).
+        """
+        if self._spmv_handle is not None:
+            return (
+                self._spmv_si,
+                self._spmv_handle,
+                self._spmv_val_type_id,
+                self._spmv_exec_q,
+            )
+
+        if not self._spmv_supported():
+            return None
+
+        try:
+            # Lazy import: keeps csr_matrix importable in builds that
+            # did not compile the sparse backend extension (e.g. host-
+            # only test matrices, doc builds).
+            # pylint: disable-next=import-outside-toplevel
+            from dpnp.backend.extensions.sparse import _sparse_impl as _si
+        except ImportError:
+            return None
+
+        exec_q = self.data.sycl_queue
+        try:
+            # pylint: disable-next=protected-access
+            handle, val_type_id, ev = _si._sparse_gemv_init(
+                exec_q,
+                0,  # trans=N (forward)
+                self.indptr,
+                self.indices,
+                self.data,
+                int(self._shape[0]),
+                int(self._shape[1]),
+                int(self.data.shape[0]),
+                [],
+            )
+        except Exception:  # pylint: disable=broad-exception-caught
+            # Backend dispatch may reject the (value, index) pair even
+            # though the Python guard above accepted them (e.g. complex
+            # support disabled in the linked oneMKL build). Fall through
+            # to the dense path silently.
+            return None
+
+        # set_csr_data + optimize_gemv must complete before any compute
+        # call can dispatch against the handle. This is the only blocking
+        # sync; subsequent matvecs return without waiting.
+        ev.wait()
+
+        self._spmv_si = _si
+        self._spmv_handle = handle
+        self._spmv_val_type_id = val_type_id
+        self._spmv_exec_q = exec_q
+        return (_si, handle, val_type_id, exec_q)
+
+    # --- public API: matvec via cached oneMKL handle -------------------
+
+    def dot(self, x):
+        """Compute ``A @ x``.
+
+        For a 1-D ``x`` of a supported dtype, this dispatches to oneMKL
+        ``sparse::gemv`` via a cached matrix handle (built lazily on the
+        first call). Subsequent calls reuse the handle and pay only the
+        SpMV kernel cost, matching the cupyx ``csr_matrix.dot`` behaviour.
+
+        Falls back to ``dpnp.dot(self.toarray(), x)`` when:
+
+          * the compiled sparse backend extension is not present,
+          * the value/index dtype combination is not in the oneMKL
+            dispatch table, or
+          * ``x`` is 2-D (no batched SpMV binding exists yet -- batched
+            SpMM is a different oneMKL entry point and intentionally not
+            wired up here).
+
+        The dense fallback materialises ``self`` and is therefore O(M*N)
+        in memory; the fast path is O(nnz) and never densifies.
+        """
+        if not isinstance(x, _dpnp.ndarray):
+            raise TypeError(
+                f"csr_matrix.dot: expected dpnp.ndarray, "
+                f"got {type(x).__name__}"
+            )
+        if x.ndim not in (1, 2):
+            raise ValueError(
+                f"csr_matrix.dot: x must be 1-D or 2-D, got {x.ndim}-D"
+            )
+
+        nrows, ncols = self._shape
+
+        if x.ndim == 1 and x.shape[0] == ncols:
+            handle_info = self._ensure_spmv_handle()
+            if handle_info is not None:
+                _si, handle, val_type_id, exec_q = handle_info
+                # Reject dtype mismatches deterministically here rather
+                # than letting the C++ layer raise: callers expect a
+                # clean TypeError for cross-dtype matvec.
+                if x.dtype != self.data.dtype:
+                    raise TypeError(
+                        f"csr_matrix.dot: x dtype {x.dtype} does not "
+                        f"match matrix dtype {self.data.dtype}"
+                    )
+                y = _dpnp.empty(nrows, dtype=self.data.dtype, sycl_queue=exec_q)
+                # Do NOT wait on the returned event: any subsequent dpnp
+                # operation on the same queue will serialise behind it
+                # automatically. Blocking here would dominate runtime
+                # for small systems (same rationale as _CachedSpMV in
+                # linalg/_iterative.py).
+                # pylint: disable-next=protected-access
+                _si._sparse_gemv_compute(
+                    exec_q,
+                    handle,
+                    val_type_id,
+                    0,  # trans=N
+                    1.0,  # alpha
+                    x,
+                    0.0,  # beta
+                    y,
+                    nrows,
+                    ncols,
+                    [],
+                )
+                return y
+
+        # Dense fallback. Materialises ``self`` once -- this path is
+        # exercised only when SpMV is unavailable for this matrix.
+        return _dpnp.dot(self.toarray(), x)
+
+    def __matmul__(self, x):
+        return self.dot(x)
+
+    def __del__(self):
+        # Release the cached oneMKL matrix_handle if one was built.
+        # See ``_iterative._CachedSpMV.__del__`` for the rationale of
+        # the staged except clauses below: during interpreter shutdown
+        # the compiled ``_sparse_impl`` extension may be GC'd before
+        # this __del__ runs, in which case ``si._sparse_gemv_release``
+        # evaluates to ``None``. Probe explicitly so a real backend
+        # error (extension still healthy) is not silenced by the same
+        # ``except Exception`` that catches the shutdown race.
+        handle = getattr(self, "_spmv_handle", None)
+        si = getattr(self, "_spmv_si", None)
+        if handle is None or si is None:
+            return
+
+        release_fn = getattr(si, "_sparse_gemv_release", None)
+        if release_fn is None:
+            self._spmv_handle = None
+            return
+
+        try:
+            # pylint: disable-next=not-callable
+            release_fn(self._spmv_exec_q, handle, [])
+        except (AttributeError, TypeError):
+            # Shutdown-mode races; handle is unrecoverable and the
+            # OS will reclaim it at process exit.
+            pass
+        except Exception:  # pylint: disable=broad-exception-caught
+            # Genuine backend error while the interpreter is healthy.
+            # Raising from __del__ produces only an unraisable warning
+            # and the handle is gone either way -- swallow it
+            # deliberately, distinct from the shutdown branch above.
+            pass
+        finally:
+            self._spmv_handle = None
+
+    def toarray(self):
+        """Convert to a dense dpnp 2-D array."""
+        nrows = self._shape[0]
+        q = self.data.sycl_queue
+        dense = _dpnp.zeros(self._shape, dtype=self.dtype, sycl_queue=q)
+        if self.nnz == 0:
+            return dense
+
+        row_lengths = self.indptr[1:] - self.indptr[:-1]
+        rows = _dpnp.repeat(
+            _dpnp.arange(nrows, dtype=self.indices.dtype, sycl_queue=q),
+            row_lengths,
+        )
+        dense[rows, self.indices] = self.data
+        return dense
+
+    def copy(self):
+        """Return a deep copy of this matrix."""
+        return csr_matrix(self)
+
+    def __repr__(self):
+        return (
+            f"<{self._shape[0]}x{self._shape[1]} csr_matrix "
+            f"of dtype {self.dtype} with {self.nnz} stored elements>"
+        )
diff --git a/dpnp/scipy/sparse/linalg/__init__.py b/dpnp/scipy/sparse/linalg/__init__.py
new file mode 100644
index 000000000000..30124562447e
--- /dev/null
+++ b/dpnp/scipy/sparse/linalg/__init__.py
@@ -0,0 +1,44 @@
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+"""
+Sparse linear algebra interface for DPNP.
+
+This module provides a subset of :mod:`scipy.sparse.linalg`
+ functionality on top of DPNP arrays.
+
+The initial implementation focuses on the :class:`LinearOperator` interface
+and a small set of Krylov solvers (``cg``, ``gmres``, ``minres``).
+"""
+
+from __future__ import annotations
+
+from ._interface import LinearOperator, aslinearoperator
+from ._iterative import cg, gmres, minres
+
+__all__ = ["LinearOperator", "aslinearoperator", "cg", "gmres", "minres"]
diff --git a/dpnp/scipy/sparse/linalg/_interface.py b/dpnp/scipy/sparse/linalg/_interface.py
new file mode 100644
index 000000000000..7ae238ba9912
--- /dev/null
+++ b/dpnp/scipy/sparse/linalg/_interface.py
@@ -0,0 +1,660 @@
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+"""LinearOperator and helpers for dpnp.scipy.sparse.linalg.
+
+Aligned with SciPy main scipy/sparse/linalg/_interface.py and
+CuPy v14.0.1 cupyx/scipy/sparse/linalg/_interface.py so that code
+written for either library is portable to dpnp.
+"""
+
+# Math-heavy module: single-letter and CamelCase identifiers such as
+# A, B, M, N, X, V, H are part of the published linear-algebra API and
+# mirror SciPy/CuPy verbatim, so the snake_case rule is intentionally
+# relaxed for the whole file.
+# pylint: disable=invalid-name
+
+from __future__ import annotations
+
+import warnings
+
+import dpnp
+
+# ---------------------------------------------------------------------------
+# helpers
+# ---------------------------------------------------------------------------
+
+
+def _isshape(shape):
+    """Return True if shape is a length-2 tuple of non-negative integers."""
+    if not isinstance(shape, tuple) or len(shape) != 2:
+        return False
+    try:
+        return all(int(s) >= 0 and int(s) == s for s in shape)
+    except (TypeError, ValueError):
+        return False
+
+
+def _isintlike(x):
+    try:
+        return int(x) == x
+    except (TypeError, ValueError):
+        return False
+
+
+def _get_dtype(operators, dtypes=None):
+    if dtypes is None:
+        dtypes = []
+    for obj in operators:
+        if obj is not None and hasattr(obj, "dtype") and obj.dtype is not None:
+            dtypes.append(obj.dtype)
+    return dpnp.result_type(*dtypes) if dtypes else None
+
+
+class LinearOperator:
+    """Drop-in replacement for cupyx/scipy LinearOperator backed by dpnp arrays.
+
+    Supports the full operator algebra (addition, multiplication, scaling,
+    power, adjoint A.H, transpose A.T) matching CuPy v14.0.1 and SciPy main.
+    """
+
+    ndim = 2
+
+    # Opt out of NumPy's ufunc dispatch (NEP 13); defers ``host_array *
+    # linop`` etc. to ``LinearOperator.__rmul__`` / ``__rmatmul__``.
+    # Same convention as ``dpnp.ndarray``.
+    __array_ufunc__ = None
+
+    def __new__(cls, *args, **kwargs):
+        if cls is LinearOperator:
+            return super().__new__(_CustomLinearOperator)
+        obj = super().__new__(cls)
+        if (
+            type(obj)._matvec is LinearOperator._matvec
+            and type(obj)._matmat is LinearOperator._matmat
+        ):
+            warnings.warn(
+                "LinearOperator subclass should implement at least one of "
+                "_matvec and _matmat.",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+        return obj
+
+    def __init__(self, dtype, shape):
+        if dtype is not None:
+            dtype = dpnp.dtype(dtype)
+        shape = tuple(int(s) for s in shape)
+        if not _isshape(shape):
+            raise ValueError(
+                f"invalid shape {shape!r} (must be a length-2 tuple of "
+                "non-negative ints)"
+            )
+        self.dtype = dtype
+        self.shape = shape
+
+    def _init_dtype(self):
+        """Infer dtype via a trial matvec on an int8 zero vector.
+
+        Using ``int8`` (the lowest precedence numeric dtype) lets the
+        matvec promote to its natural output type without artificially
+        widening the result -- a float32 operator stays float32, a
+        complex64 operator stays complex64, etc.  Mirrors the behaviour
+        of ``scipy.sparse.linalg.LinearOperator._init_dtype`` and
+        ``cupyx.scipy.sparse.linalg.LinearOperator._init_dtype``.
+
+        A previous version used ``dpnp.float64`` here, which silently
+        upcast every dtype-inferred operator to float64; that broke
+        single-precision and complex-single workflows.
+        """
+        if self.dtype is not None:
+            return
+        v = dpnp.zeros(self.shape[-1], dtype=dpnp.int8)
+        self.dtype = self.matvec(v).dtype
+
+    def _matvec(self, x):
+        return self.matmat(x.reshape(-1, 1))
+
+    def _matmat(self, X):
+        return dpnp.hstack([self.matvec(col.reshape(-1, 1)) for col in X.T])
+
+    def _rmatvec(self, x):
+        if type(self)._adjoint is LinearOperator._adjoint:
+            raise NotImplementedError(
+                "rmatvec is not defined for this LinearOperator"
+            )
+        return self.H.matvec(x)
+
+    def _rmatmat(self, X):
+        if type(self)._adjoint is LinearOperator._adjoint:
+            return dpnp.hstack(
+                [self.rmatvec(col.reshape(-1, 1)) for col in X.T]
+            )
+        return self.H.matmat(X)
+
+    def matvec(self, x):
+        """Apply the matrix-vector product."""
+        M, N = self.shape
+        if x.shape not in ((N,), (N, 1)):
+            raise ValueError(
+                f"dimension mismatch: operator shape {self.shape}, "
+                f"vector shape {x.shape}"
+            )
+        y = self._matvec(x)
+        return y.reshape(M) if x.ndim == 1 else y.reshape(M, 1)
+
+    def rmatvec(self, x):
+        """Apply the adjoint matrix-vector product."""
+        M, N = self.shape
+        if x.shape not in ((M,), (M, 1)):
+            raise ValueError(
+                f"dimension mismatch: operator shape {self.shape}, "
+                f"vector shape {x.shape}"
+            )
+        y = self._rmatvec(x)
+        return y.reshape(N) if x.ndim == 1 else y.reshape(N, 1)
+
+    def matmat(self, X):
+        """Apply the matrix-matrix product."""
+        if X.ndim != 2:
+            raise ValueError(f"expected 2-D array, got {X.ndim}-D")
+        if X.shape[0] != self.shape[1]:
+            raise ValueError(
+                f"dimension mismatch: {self.shape!r} vs {X.shape!r}"
+            )
+        return self._matmat(X)
+
+    def rmatmat(self, X):
+        """Apply the adjoint matrix-matrix product."""
+        if X.ndim != 2:
+            raise ValueError(f"expected 2-D array, got {X.ndim}-D")
+        if X.shape[0] != self.shape[0]:
+            raise ValueError(
+                f"dimension mismatch: {self.shape!r} vs {X.shape!r}"
+            )
+        return self._rmatmat(X)
+
+    def dot(self, x):
+        """Dispatch to matvec / matmat / scalar-scale / product.
+
+        Strict-coercion contract (matches the rest of dpnp): the only
+        accepted types are :class:`LinearOperator`, a true scalar
+        (Python / NumPy / dpnp 0-D), or a :class:`dpnp.ndarray` of
+        rank 1 or 2. A host :class:`numpy.ndarray` is rejected with
+        a directed :class:`TypeError`; silently calling
+        ``dpnp.asarray(x)`` here would upload the host array to the
+        device on every matvec, masking real bugs in caller code
+        about device / queue selection. The user must convert
+        explicitly via ``dpnp.asarray(x)`` before passing in.
+        """
+        if isinstance(x, LinearOperator):
+            return _ProductLinearOperator(self, x)
+        if dpnp.isscalar(x):
+            return _ScaledLinearOperator(self, x)
+        if not isinstance(x, dpnp.ndarray):
+            # pylint: disable-next=import-outside-toplevel
+            import numpy as _np
+
+            if isinstance(x, _np.ndarray):
+                raise TypeError(
+                    "LinearOperator.dot: got a numpy.ndarray. dpnp "
+                    "does not perform implicit host -> device "
+                    "copies; pass dpnp.asarray(x) explicitly."
+                )
+            raise TypeError(
+                "LinearOperator.dot: expected a dpnp.ndarray, a "
+                "scalar, or another LinearOperator; got "
+                f"{type(x).__name__!r}."
+            )
+        if x.ndim == 1 or (x.ndim == 2 and x.shape[1] == 1):
+            return self.matvec(x)
+        if x.ndim == 2:
+            return self.matmat(x)
+        raise ValueError(
+            f"LinearOperator.dot: expected 1-D or 2-D dpnp array, "
+            f"got {x.ndim}-D"
+        )
+
+    def __call__(self, x):
+        return self * x
+
+    def __mul__(self, x):
+        """Multiply operator by array x."""
+        return self.dot(x)
+
+    def __matmul__(self, x):
+        if dpnp.isscalar(x):
+            raise ValueError(
+                "Scalar operands not allowed with '@'; use '*' instead"
+            )
+        return self.__mul__(x)
+
+    def __rmatmul__(self, x):
+        if dpnp.isscalar(x):
+            raise ValueError(
+                "Scalar operands not allowed with '@'; use '*' instead"
+            )
+        return self.__rmul__(x)
+
+    def __rmul__(self, x):
+        if dpnp.isscalar(x):
+            return _ScaledLinearOperator(self, x)
+        return NotImplemented
+
+    def __pow__(self, p):
+        if dpnp.isscalar(p):
+            return _PowerLinearOperator(self, p)
+        return NotImplemented
+
+    def __add__(self, x):
+        if isinstance(x, LinearOperator):
+            return _SumLinearOperator(self, x)
+        return NotImplemented
+
+    def __neg__(self):
+        return _ScaledLinearOperator(self, -1)
+
+    def __sub__(self, x):
+        return self.__add__(-x)
+
+    def _adjoint(self):
+        """Return conjugate-transpose operator (override in subclasses)."""
+        return _AdjointLinearOperator(self)
+
+    def _transpose(self):
+        """Return plain-transpose operator (override in subclasses)."""
+        return _TransposedLinearOperator(self)
+
+    def adjoint(self):
+        """Hermitian adjoint A^H."""
+        return self._adjoint()
+
+    def transpose(self):
+        """Plain (non-conjugated) transpose A^T."""
+        return self._transpose()
+
+    #: A.H — conjugate transpose
+    H = property(adjoint)
+    #: A.T — plain transpose
+    T = property(transpose)
+
+    def __repr__(self):
+        dt = (
+            "unspecified dtype" if self.dtype is None else f"dtype={self.dtype}"
+        )
+        return (
+            f"<{self.shape[0]}x{self.shape[1]}"
+            f" {self.__class__.__name__} with {dt}>"
+        )
+
+
+class _CustomLinearOperator(LinearOperator):
+    """Created when the user calls LinearOperator(shape, matvec=...)"""
+
+    def __init__(
+        self, shape, matvec, rmatvec=None, matmat=None, dtype=None, rmatmat=None
+    ):
+        super().__init__(dtype, shape)
+        self.args = ()
+        self.__matvec_impl = matvec
+        self.__rmatvec_impl = rmatvec
+        self.__rmatmat_impl = rmatmat
+        self.__matmat_impl = matmat
+        self._init_dtype()
+
+    def _matvec(self, x):
+        return self.__matvec_impl(x)
+
+    def _matmat(self, X):
+        if self.__matmat_impl is not None:
+            return self.__matmat_impl(X)
+        return super()._matmat(X)
+
+    def _rmatvec(self, x):
+        if self.__rmatvec_impl is None:
+            raise NotImplementedError(
+                "rmatvec is not defined for this operator"
+            )
+        return self.__rmatvec_impl(x)
+
+    def _rmatmat(self, X):
+        if self.__rmatmat_impl is not None:
+            return self.__rmatmat_impl(X)
+        return super()._rmatmat(X)
+
+    def _adjoint(self):
+        return _CustomLinearOperator(
+            shape=(self.shape[1], self.shape[0]),
+            matvec=self.__rmatvec_impl,
+            rmatvec=self.__matvec_impl,
+            matmat=self.__rmatmat_impl,
+            rmatmat=self.__matmat_impl,
+            dtype=self.dtype,
+        )
+
+
+class _AdjointLinearOperator(LinearOperator):
+    def __init__(self, A):
+        super().__init__(A.dtype, (A.shape[1], A.shape[0]))
+        self.A = A
+        self.args = (A,)
+
+    def _matvec(self, x):
+        return self.A._rmatvec(x)  # pylint: disable=protected-access
+
+    def _rmatvec(self, x):
+        return self.A._matvec(x)  # pylint: disable=protected-access
+
+    def _matmat(self, X):
+        return self.A._rmatmat(X)  # pylint: disable=protected-access
+
+    def _rmatmat(self, X):
+        return self.A._matmat(X)  # pylint: disable=protected-access
+
+    def _adjoint(self):
+        return self.A
+
+
+class _TransposedLinearOperator(LinearOperator):
+    def __init__(self, A):
+        super().__init__(A.dtype, (A.shape[1], A.shape[0]))
+        self.A = A
+        self.args = (A,)
+
+    def _matvec(self, x):
+        # pylint: disable=protected-access
+        return dpnp.conj(self.A._rmatvec(dpnp.conj(x)))
+
+    def _rmatvec(self, x):
+        # pylint: disable=protected-access
+        return dpnp.conj(self.A._matvec(dpnp.conj(x)))
+
+    def _matmat(self, X):
+        # pylint: disable=protected-access
+        return dpnp.conj(self.A._rmatmat(dpnp.conj(X)))
+
+    def _rmatmat(self, X):
+        # pylint: disable=protected-access
+        return dpnp.conj(self.A._matmat(dpnp.conj(X)))
+
+    def _transpose(self):
+        return self.A
+
+
+class _SumLinearOperator(LinearOperator):
+    def __init__(self, A, B):
+        if A.shape != B.shape:
+            raise ValueError(f"shape mismatch for addition: {A!r} + {B!r}")
+        super().__init__(_get_dtype([A, B]), A.shape)
+        self.args = (A, B)
+
+    def _matvec(self, x):
+        return self.args[0].matvec(x) + self.args[1].matvec(x)
+
+    def _rmatvec(self, x):
+        return self.args[0].rmatvec(x) + self.args[1].rmatvec(x)
+
+    def _matmat(self, X):
+        return self.args[0].matmat(X) + self.args[1].matmat(X)
+
+    def _rmatmat(self, X):
+        return self.args[0].rmatmat(X) + self.args[1].rmatmat(X)
+
+    def _adjoint(self):
+        return self.args[0].H + self.args[1].H
+
+
+class _ProductLinearOperator(LinearOperator):
+    def __init__(self, A, B):
+        if A.shape[1] != B.shape[0]:
+            raise ValueError(f"shape mismatch for multiply: {A!r} * {B!r}")
+        super().__init__(_get_dtype([A, B]), (A.shape[0], B.shape[1]))
+        self.args = (A, B)
+
+    def _matvec(self, x):
+        return self.args[0].matvec(self.args[1].matvec(x))
+
+    def _rmatvec(self, x):
+        return self.args[1].rmatvec(self.args[0].rmatvec(x))
+
+    def _matmat(self, X):
+        return self.args[0].matmat(self.args[1].matmat(X))
+
+    def _rmatmat(self, X):
+        return self.args[1].rmatmat(self.args[0].rmatmat(X))
+
+    def _adjoint(self):
+        A, B = self.args
+        return B.H * A.H
+
+
+class _ScaledLinearOperator(LinearOperator):
+    def __init__(self, A, alpha):
+        alpha_dtype = getattr(alpha, "dtype", type(alpha))
+        super().__init__(_get_dtype([A], [alpha_dtype]), A.shape)
+        self.args = (A, alpha)
+
+    def _matvec(self, x):
+        return self.args[1] * self.args[0].matvec(x)
+
+    def _rmatvec(self, x):
+        return self.args[1].conjugate() * self.args[0].rmatvec(x)
+
+    def _matmat(self, X):
+        return self.args[1] * self.args[0].matmat(X)
+
+    def _rmatmat(self, X):
+        return self.args[1].conjugate() * self.args[0].rmatmat(X)
+
+    def _adjoint(self):
+        A, alpha = self.args
+        return A.H * alpha.conjugate()
+
+
+class _PowerLinearOperator(LinearOperator):
+    def __init__(self, A, p):
+        if A.shape[0] != A.shape[1]:
+            raise ValueError("matrix power requires a square operator")
+        if not _isintlike(p) or p < 0:
+            raise ValueError(
+                "matrix power requires a non-negative integer exponent"
+            )
+        super().__init__(_get_dtype([A]), A.shape)
+        self.args = (A, int(p))
+
+    def _power(self, f, x):
+        res = x.copy()
+        for _ in range(self.args[1]):
+            res = f(res)
+        return res
+
+    def _matvec(self, x):
+        return self._power(self.args[0].matvec, x)
+
+    def _rmatvec(self, x):
+        return self._power(self.args[0].rmatvec, x)
+
+    def _matmat(self, X):
+        return self._power(self.args[0].matmat, X)
+
+    def _rmatmat(self, X):
+        return self._power(self.args[0].rmatmat, X)
+
+    def _adjoint(self):
+        A, p = self.args
+        return A.H**p
+
+
+class MatrixLinearOperator(LinearOperator):
+    """Wrap a dense dpnp matrix (or sparse matrix) as a LinearOperator."""
+
+    def __init__(self, A):
+        super().__init__(A.dtype, A.shape)
+        self.A = A
+        self.__adj = None
+        self.args = (A,)
+
+    def _matmat(self, X):
+        return self.A.dot(X)
+
+    def _rmatmat(self, X):
+        return dpnp.conj(self.A.T).dot(X)
+
+    def _adjoint(self):
+        if self.__adj is None:
+            self.__adj = _AdjointMatrixOperator(self)
+        return self.__adj
+
+
+class _AdjointMatrixOperator(MatrixLinearOperator):
+    # super().__init__() is intentionally skipped: this operator stores its
+    # own (adjoint-derived) A, shape and dtype, and must NOT re-validate
+    # shape via the base ``MatrixLinearOperator.__init__`` path.
+    # pylint: disable=super-init-not-called
+    def __init__(self, adjoint):
+        self.A = dpnp.conj(adjoint.A.T)
+        self.__adjoint = adjoint
+        self.args = (adjoint,)
+        self.shape = (adjoint.shape[1], adjoint.shape[0])
+
+    @property
+    def dtype(self):
+        """Inherit dtype from the wrapped operator."""
+        return self.__adjoint.dtype
+
+    def _adjoint(self):
+        return self.__adjoint
+
+
+class IdentityOperator(LinearOperator):
+    """Identity operator — used as the default (no-op) preconditioner."""
+
+    def __init__(self, shape, dtype=None):
+        super().__init__(dtype, shape)
+
+    def _matvec(self, x):
+        """Apply matrix-vector product via stored array."""
+        return x
+
+    def _rmatvec(self, x):
+        return x
+
+    def _matmat(self, X):
+        return X
+
+    def _rmatmat(self, X):
+        return X
+
+    def _adjoint(self):
+        return self
+
+    def _transpose(self):
+        return self
+
+
+def aslinearoperator(A) -> LinearOperator:
+    """Return ``A`` as a :class:`LinearOperator`.
+
+    Dispatch order (matches ``cupyx.scipy.sparse.linalg.aslinearoperator``
+    and ``scipy.sparse.linalg.aslinearoperator``):
+
+      1. Already a :class:`LinearOperator` -- returned as-is.
+      2. A ``dpnp.scipy.sparse`` sparse matrix (e.g. ``csr_matrix``)
+         -- wrapped as :class:`MatrixLinearOperator`. Inside the iterative
+         solvers this wrapper is further specialised to a cached oneMKL
+         SpMV handle in ``_iterative._make_fast_matvec`` so the dense
+         materialisation in ``csr_matrix.dot`` is bypassed.
+      3. A dense 2-D :class:`dpnp.ndarray` -- wrapped as
+         :class:`MatrixLinearOperator` after promotion via
+         :func:`dpnp.atleast_2d`.
+      4. A duck-typed object with ``.shape`` and ``.matvec``
+         (optionally ``rmatvec`` / ``matmat`` / ``rmatmat`` / ``dtype``).
+
+    Notes
+    -----
+    A :class:`numpy.ndarray` is explicitly rejected: silently promoting
+    a host array would force a hidden host->device copy on every
+    matvec, defeating the point of routing through dpnp. Callers must
+    explicitly transfer with ``dpnp.asarray`` first.
+    """
+    # 1. Already a LinearOperator -- pass through.
+    if isinstance(A, LinearOperator):
+        return A
+
+    # 2. dpnp sparse matrix. Import is at module-load time -- if
+    # dpnp.scipy.sparse is unimportable then the package itself is
+    # broken and a hard failure is preferable to silent fallthrough.
+    # The local import avoids the package-init circularity that exists
+    # while dpnp.scipy.sparse.__init__ is still executing (it imports
+    # us via linalg/__init__.py).
+    # pylint: disable-next=import-outside-toplevel
+    from dpnp.scipy.sparse import issparse
+
+    if issparse(A):
+        return MatrixLinearOperator(A)
+
+    # 3. Dense dpnp array.
+    if isinstance(A, dpnp.ndarray):
+        if A.ndim > 2:
+            raise ValueError(
+                f"aslinearoperator: dpnp array must be at most 2-D, "
+                f"got {A.ndim}-D"
+            )
+        return MatrixLinearOperator(dpnp.atleast_2d(A))
+
+    # pylint: disable-next=import-outside-toplevel
+    import numpy as _np
+
+    if isinstance(A, _np.ndarray):
+        raise TypeError(
+            "aslinearoperator: got a numpy.ndarray; transfer it to "
+            "the target device with dpnp.asarray(A) first."
+        )
+
+    # 4. Duck-typed object with .shape and .matvec.
+    if hasattr(A, "shape") and hasattr(A, "matvec"):
+        shape = tuple(A.shape)
+        if len(shape) != 2:
+            raise ValueError(
+                f"aslinearoperator: duck-typed operator must be 2-D, "
+                f"got shape {shape!r}"
+            )
+        return LinearOperator(
+            shape,
+            matvec=A.matvec,
+            rmatvec=getattr(A, "rmatvec", None),
+            matmat=getattr(A, "matmat", None),
+            rmatmat=getattr(A, "rmatmat", None),
+            dtype=getattr(A, "dtype", None),
+        )
+
+    raise TypeError(
+        f"aslinearoperator: cannot convert object of type {type(A).__name__!r} "
+        "to a LinearOperator. Expected a LinearOperator, a dpnp sparse "
+        "matrix, a 2-D dpnp.ndarray, or an object with .shape and .matvec."
+    )
diff --git a/dpnp/scipy/sparse/linalg/_iterative.py b/dpnp/scipy/sparse/linalg/_iterative.py
new file mode 100644
index 000000000000..077cf8f04673
--- /dev/null
+++ b/dpnp/scipy/sparse/linalg/_iterative.py
@@ -0,0 +1,1189 @@
+# *****************************************************************************
+# Copyright (c) 2025, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+"""Iterative sparse linear solvers for dpnp -- pure GPU/SYCL implementation.
+
+All computation stays on the device (USM/oneMKL).  There is NO host-dispatch
+fallback: transferring data to the CPU for small systems defeats the purpose
+of keeping a live computation on GPU memory.
+
+Solver coverage
+---------------
+cg     : Conjugate Gradient (Hermitian positive definite)
+gmres  : Restarted GMRES (general non-symmetric)
+minres : MINRES (symmetric possibly indefinite)
+
+SpMV fast-path
+--------------
+When a CSR dpnp sparse matrix is passed as A or M, _make_fast_matvec()
+constructs a _CachedSpMV object that:
+  1. Calls _sparse_gemv_init() ONCE to create the oneMKL matrix_handle,
+     register CSR pointers via set_csr_data, and run optimize_gemv
+     (the expensive sparsity-analysis phase).
+  2. Calls _sparse_gemv_compute() on every matvec -- only the cheap
+     oneMKL sparse::gemv kernel fires; no handle setup overhead.
+  3. Calls _sparse_gemv_release() in __del__ to free the handle.
+
+This means optimize_gemv runs once per operator, not once per iteration,
+which is the correct usage pattern for oneMKL sparse BLAS.
+
+Supported dtypes for the oneMKL SpMV fast-path:
+  values : float32, float64, complex64, complex128
+  indices: int32, int64
+Complex dtypes require oneMKL sparse BLAS support (available since
+oneMKL 2023.x); if the dispatch table slot is nullptr (types_matrix.hpp
+does not register the pair) a ValueError is raised by the C++ layer.
+_make_fast_matvec catches this and falls back to A.dot(x).
+"""
+
+# Math-heavy module: single-letter and CamelCase identifiers such as
+# A, M, X, V, H, Ap, Ax, Anorm, Acond, Vj, A_op, M_op, fast_mv_M,
+# _orig_M are part of the published numerical-linear-algebra API and
+# mirror SciPy/CuPy verbatim, so the snake_case rule is intentionally
+# relaxed for the whole file. The duplicate-code check fires against
+# dpnp/scipy/sparse/_csr.py where the cached-SpMV invocation and
+# __del__ shutdown handling necessarily mirror this module; the
+# duplication is tightly coupled to oneMKL's contract and not worth
+# hiding behind an indirection.
+# pylint: disable=invalid-name,duplicate-code
+
+from __future__ import annotations
+
+import math
+from typing import Callable
+
+import dpctl.utils as dpu
+import numpy
+
+import dpnp
+
+# _blas_impl is a compiled (.so / .pyd) C-extension produced by the
+# dpnp build; pylint cannot statically introspect its exported symbols.
+# pylint: disable-next=no-name-in-module
+import dpnp.backend.extensions.blas._blas_impl as bi
+
+from ._interface import IdentityOperator, LinearOperator, aslinearoperator
+
+_SUPPORTED_DTYPES = frozenset("fdFD")
+
+
+def _np_dtype(dp_dtype) -> numpy.dtype:
+    """Normalise any dtype-like to numpy.dtype."""
+    return numpy.dtype(dp_dtype)
+
+
+def _check_dtype(dtype, name: str) -> None:
+    if _np_dtype(dtype).char not in _SUPPORTED_DTYPES:
+        raise TypeError(
+            f"{name} has unsupported dtype {dtype}; "
+            "only float32, float64, complex64, complex128 are accepted."
+        )
+
+
+# pylint: disable-next=too-many-instance-attributes
+class _CachedSpMV:
+    """
+    Wrap a CSR matrix with a persistent oneMKL matrix_handle.
+
+    The handle is initialised (set_csr_data + optimize_gemv) exactly once
+    in __init__. Subsequent calls to __call__ only invoke sparse::gemv,
+    paying no analysis overhead. The handle is released in __del__.
+
+    Parameters
+    ----------
+    A : dpnp CSR sparse matrix
+    si : dpnp.backend.extensions.sparse._sparse_impl module
+        Passed in from _make_fast_matvec to keep the import lazy and
+        avoid a circular import during dpnp package initialization.
+    trans : int  0=N, 1=T, 2=C  (fixed at construction)
+    """
+
+    __slots__ = (
+        "_A",
+        "_si",
+        "_exec_q",
+        "_handle",
+        "_trans",
+        "_nrows",
+        "_ncols",
+        "_nnz",
+        "_out_size",
+        "_in_size",
+        "_dtype",
+        "_val_type_id",
+    )
+
+    def __init__(self, A, si, trans: int = 0):
+        self._A = A  # keep alive so USM pointers stay valid
+        self._si = si
+        self._trans = int(trans)
+        self._nrows = int(A.shape[0])
+        self._ncols = int(A.shape[1])
+        self._nnz = int(A.data.shape[0])
+        self._exec_q = A.data.sycl_queue
+        self._dtype = A.data.dtype
+
+        # Output and input lengths depend on transpose mode.
+        # For trans=0 (N): y has nrows, x has ncols.
+        # For trans=1/2 (T/C): y has ncols, x has nrows.
+        if self._trans == 0:
+            self._out_size = self._nrows
+            self._in_size = self._ncols
+        else:
+            self._out_size = self._ncols
+            self._in_size = self._nrows
+
+        self._handle = None
+        self._val_type_id = -1
+
+        # init_matrix_handle + set_csr_data + optimize_gemv (once).
+        # We must wait on optimize_gemv before any compute call can run;
+        # this is the only place __init__/__call__ blocks.
+        # pylint: disable-next=protected-access
+        handle, val_type_id, ev = self._si._sparse_gemv_init(
+            self._exec_q,
+            self._trans,
+            A.indptr,
+            A.indices,
+            A.data,
+            self._nrows,
+            self._ncols,
+            self._nnz,
+            [],
+        )
+        ev.wait()
+        self._handle = handle
+        self._val_type_id = val_type_id
+
+    def __call__(self, x: dpnp.ndarray) -> dpnp.ndarray:
+        """Y = op(A) * x -- only sparse::gemv fires, fully async."""
+        y = dpnp.empty(
+            self._out_size, dtype=self._dtype, sycl_queue=self._exec_q
+        )
+        # Do NOT wait on the event -- subsequent dpnp ops on the same
+        # queue will serialize behind it automatically. Blocking here
+        # throws away async overlap and dominates small-problem runtime.
+        # pylint: disable-next=protected-access
+        self._si._sparse_gemv_compute(
+            self._exec_q,
+            self._handle,
+            self._val_type_id,
+            self._trans,
+            1.0,
+            x,
+            0.0,
+            y,
+            self._nrows,
+            self._ncols,
+            [],
+        )
+        return y
+
+    def __del__(self):
+        # Guard against partial construction: _handle may not be set if
+        # __init__ raised before the assignment.
+        handle = getattr(self, "_handle", None)
+        si = getattr(self, "_si", None)
+        if handle is None or si is None:
+            return
+
+        # During interpreter shutdown the compiled extension may be
+        # collected before this __del__ runs; in that case
+        # ``si._sparse_gemv_release`` evaluates to ``None`` (or raises
+        # AttributeError on some module proxies). Probe explicitly so
+        # we can distinguish "extension already torn down -- leak the
+        # handle, the OS will reclaim it" from "release call raised --
+        # narrow except below" and not silence both with one broad
+        # ``except Exception``.
+        release_fn = getattr(si, "_sparse_gemv_release", None)
+        if release_fn is None:
+            self._handle = None
+            return
+
+        try:
+            # pylint: disable-next=not-callable
+            release_fn(self._exec_q, handle, [])
+        except (AttributeError, TypeError):
+            # Shutdown-mode races: queue or handle attribute access
+            # may itself raise once the supporting dpctl / pybind11
+            # state is gone. The handle is unrecoverable; leave the
+            # OS to reclaim it at process exit.
+            pass
+        except Exception:  # pylint: disable=broad-exception-caught
+            # Genuine backend error while the interpreter is still
+            # healthy. Swallowing here is still required (raising
+            # from __del__ produces an unraisable-exception warning
+            # and serves no purpose -- the handle is gone either
+            # way), but the explicit broad-except now documents the
+            # intent rather than masking the shutdown race above.
+            pass
+        finally:
+            self._handle = None
+
+
+class _CachedSpMVPair:
+    """Forward + lazily-built adjoint SpMV closures around a csr_matrix.
+
+    The forward handle is owned by the ``csr_matrix`` itself (built via
+    ``csr_matrix._ensure_spmv_handle()``) and therefore shared with any
+    other call site -- including a user-issued ``A.dot(x)`` outside the
+    solver. The adjoint handle is built on demand and owned by this
+    pair instance; ``__del__`` releases it.
+    """
+
+    __slots__ = ("_A", "_si", "_adjoint")
+
+    def __init__(self, A, si):
+        self._A = A
+        self._si = si
+        self._adjoint = None
+
+    def matvec(self, x):
+        """Apply the forward operator A @ x via the csr's cached handle."""
+        # _ensure_spmv_handle has already been validated by the caller
+        # (_make_fast_matvec) before this pair was constructed, so it
+        # cannot return None here. We re-fetch on every call only to
+        # pick up the (immutable) handle pointer and exec_q without
+        # caching them redundantly on this object.
+        # pylint: disable-next=protected-access
+        _si, handle, val_type_id, exec_q = self._A._ensure_spmv_handle()
+        y = dpnp.empty(
+            self._A.shape[0], dtype=self._A.data.dtype, sycl_queue=exec_q
+        )
+        # pylint: disable-next=protected-access
+        _si._sparse_gemv_compute(
+            exec_q,
+            handle,
+            val_type_id,
+            0,  # trans=N
+            1.0,  # alpha
+            x,
+            0.0,  # beta
+            y,
+            int(self._A.shape[0]),
+            int(self._A.shape[1]),
+            [],
+        )
+        return y
+
+    def rmatvec(self, x):
+        """Apply the conjugate-transpose operator A^H @ x."""
+        if self._adjoint is None:
+            # Build conjtrans handle on first use. For real dtypes
+            # this is equivalent to trans=1.
+            is_cpx = dpnp.issubdtype(self._A.data.dtype, dpnp.complexfloating)
+            self._adjoint = _CachedSpMV(
+                self._A, self._si, trans=2 if is_cpx else 1
+            )
+        return self._adjoint(x)
+
+
+def _make_fast_matvec(A):
+    """Return a _CachedSpMVPair if A is a CSR matrix with oneMKL support,
+    or None if A is not an eligible sparse matrix.
+
+    Falls back to None (caller uses A.dot) on:
+      - A is not a dpnp CSR sparse matrix
+      - the compiled backend extension is unavailable
+      - the (value, index) dtype combination is not registered with
+        the oneMKL dispatch table
+      - handle initialisation raises for any other backend-specific
+        reason
+    """
+    try:
+        # Lazy import: dpnp.scipy.sparse may import this module during
+        # package initialisation, so a top-level import would deadlock.
+        # pylint: disable-next=import-outside-toplevel
+        from dpnp.scipy import sparse as _sp
+
+        if not (_sp.issparse(A) and A.format == "csr"):
+            return None
+    except (ImportError, AttributeError):
+        return None
+
+    # Probe the csr_matrix's own SpMV path. This either returns a
+    # fully-built handle (cached on A for sharing with A.dot) or None
+    # when the backend extension / dtype combination is unsupported.
+    if not hasattr(A, "_ensure_spmv_handle"):
+        return None
+    # pylint: disable-next=protected-access
+    handle_info = A._ensure_spmv_handle()
+    if handle_info is None:
+        return None
+
+    _si, _handle, _val_type_id, _exec_q = handle_info
+    return _CachedSpMVPair(A, _si)
+
+
+def _make_system(A, M, x0, b):
+    """Make a linear system Ax = b
+
+    Args:
+        A (dpnp.ndarray or dpnpx.scipy.sparse.spmatrix or
+            dpnpx.scipy.sparse.LinearOperator): sparse or dense matrix.
+        M (dpnp.ndarray or dpnpx.scipy.sparse.spmatrix or
+            dpnpx.scipy.sparse.LinearOperator): preconditioner.
+        x0 (dpnp.ndarray): initial guess to iterative method.
+        b (dpnp.ndarray): right hand side.
+
+    Returns:
+        tuple:
+            It returns (A, M, x, b).
+            A (LinaerOperator): matrix of linear system
+            M (LinearOperator): preconditioner
+            x (dpnp.ndarray): initial guess
+            b (dpnp.ndarray): right hand side.
+    """
+    if not isinstance(b, dpnp.ndarray):
+        raise TypeError(f"b must be a dpnp.ndarray, got {type(b).__name__}")
+    if x0 is not None and not isinstance(x0, dpnp.ndarray):
+        raise TypeError(
+            f"x0 must be a dpnp.ndarray or None, got {type(x0).__name__}"
+        )
+
+    A_op = aslinearoperator(A)
+    if A_op.shape[0] != A_op.shape[1]:
+        raise ValueError("A must be a square operator")
+    n = A_op.shape[0]
+
+    b = b.reshape(-1)
+    if b.shape[0] != n:
+        raise ValueError(
+            f"b length {b.shape[0]} does not match operator dimension {n}"
+        )
+
+    # Dtype promotion: prefer A.dtype; fall back via b.dtype.
+    if (
+        A_op.dtype is not None
+        and _np_dtype(A_op.dtype).char in _SUPPORTED_DTYPES
+    ):
+        dtype = A_op.dtype
+    elif dpnp.issubdtype(b.dtype, dpnp.complexfloating):
+        dtype = dpnp.complex128
+    else:
+        dtype = dpnp.float64
+
+    b = b.astype(dtype, copy=False)
+    _check_dtype(b.dtype, "b")
+
+    if x0 is None:
+        x = dpnp.zeros(n, dtype=dtype, sycl_queue=b.sycl_queue)
+    else:
+        x = x0.astype(dtype, copy=True).reshape(-1)
+        if x.shape[0] != n:
+            raise ValueError(f"x0 length {x.shape[0]} != n={n}")
+
+    if M is None:
+        M_op = IdentityOperator((n, n), dtype=dtype)
+    else:
+        M_op = aslinearoperator(M)
+        if M_op.shape != A_op.shape:
+            raise ValueError(
+                f"preconditioner shape {M_op.shape} != "
+                f"operator shape {A_op.shape}"
+            )
+
+        fast_mv_M = _make_fast_matvec(M)
+        if fast_mv_M is not None:
+            _orig_M = M_op
+
+            class _FastMOp(LinearOperator):
+                def __init__(self):
+                    super().__init__(_orig_M.dtype, _orig_M.shape)
+
+                def _matvec(self, x):
+                    return fast_mv_M.matvec(x)
+
+                def _rmatvec(self, x):
+                    return fast_mv_M.rmatvec(x)
+
+            M_op = _FastMOp()
+
+    # Inject fast CSR SpMV for A if available.
+    fast_mv = _make_fast_matvec(A)
+    if fast_mv is not None:
+        _orig = A_op
+
+        class _FastOp(LinearOperator):
+            def __init__(self):
+                super().__init__(_orig.dtype, _orig.shape)
+
+            def _matvec(self, x):
+                return fast_mv.matvec(x)
+
+            def _rmatvec(self, x):
+                return fast_mv.rmatvec(x)
+
+        A_op = _FastOp()
+
+    return A_op, M_op, x, b, dtype
+
+
+def _get_atol(b_norm: float, atol, rtol: float) -> float:
+    """Absolute stopping tolerance: max(atol, rtol*||b||), mirroring SciPy."""
+    if atol == "legacy" or atol is None:
+        atol = 0.0
+    atol = float(atol)
+    if atol < 0:
+        raise ValueError(
+            f"atol={atol!r} is invalid; must be a real, non-negative number."
+        )
+    return max(atol, float(rtol) * float(b_norm))
+
+
+# pylint: disable-next=too-many-locals,too-many-statements
+def cg(
+    A,
+    b,
+    x0: dpnp.ndarray | None = None,
+    *,
+    rtol: float = 1e-5,
+    tol: float | None = None,
+    maxiter: int | None = None,
+    M=None,
+    callback: Callable | None = None,
+    atol=None,
+) -> tuple[dpnp.ndarray, int]:
+    """Conjugate Gradient -- pure dpnp/oneMKL, Hermitian positive definite A.
+
+    Parameters
+    ----------
+    A       : array_like or LinearOperator -- HPD (n, n)
+    b       : array_like -- right-hand side (n,)
+    x0      : array_like, optional -- initial guess
+    rtol    : float -- relative tolerance (default 1e-5)
+    tol     : float, optional -- deprecated alias for rtol
+    maxiter : int, optional -- max iterations (default 10*n)
+    M       : LinearOperator or array_like, optional -- SPD preconditioner
+    callback: callable, optional -- callback(xk) after each iteration
+    atol    : float, optional -- absolute tolerance
+
+    Returns
+    -------
+    x    : dpnp.ndarray
+    info : int
+        ``info`` follows the SciPy / CuPy contract:
+
+          * ``info == 0``           : converged successfully
+          * ``info > 0``            : did not converge; value is the
+                                       iteration count at which the
+                                       solver stopped (equals
+                                       ``maxiter`` when the iteration
+                                       budget was exhausted, or the
+                                       iteration index when a numerical
+                                       breakdown short-circuited the
+                                       loop).
+          * ``info < 0``            : reserved for illegal-input
+                                       errors; not produced by this
+                                       implementation (illegal inputs
+                                       raise ``ValueError`` instead).
+
+        Previous versions of this routine returned ``-1`` for an
+        ``rz``/``pAp`` breakdown, which violated the SciPy contract
+        and broke user code that branched on ``info > 0``.
+    """
+    if tol is not None:
+        rtol = tol
+
+    A_op, M_op, x, b, dtype = _make_system(A, M, x0, b)
+    n = b.shape[0]
+
+    bnrm = dpnp.linalg.norm(b)
+    bnrm_host = float(bnrm)
+    if bnrm_host == 0.0:
+        return dpnp.zeros_like(b), 0
+
+    atol_eff_host = _get_atol(bnrm_host, atol=atol, rtol=rtol)
+
+    if maxiter is None:
+        maxiter = n * 10
+
+    rhotol = float(numpy.finfo(_np_dtype(dtype)).eps ** 2)
+
+    r = b - A_op.matvec(x) if x0 is not None else b.copy()
+    z = M_op.matvec(r)
+    p = z.copy()
+
+    # rz is kept as a 0-D dpnp array on device throughout the loop;
+    # the only time we transfer it to the host is the initial
+    # breakdown guard below (matches the CuPy contract -- a zero
+    # initial preconditioned residual means we are already at the
+    # solution and there is nothing further to do).
+    rz = dpnp.real(dpnp.vdot(r, z))
+    if float(dpnp.abs(rz)) < rhotol:
+        return x, 0
+
+    info = maxiter
+    # Per-iter sync count: 1 (rnorm convergence check). The pAp and
+    # rz_new breakdown checks are intentionally not transferred to
+    # the host; IEEE-754 inf / NaN propagation through alpha = rz/pAp
+    # makes pathological values poison the next residual norm, which
+    # the single sync below detects via the `not isfinite(rnorm_host)`
+    # branch. Mirrors CuPy / cuBLAS-style CG which also dispatches
+    # one nrm2 + comparison per iteration.
+    for k in range(maxiter):
+        rnorm = dpnp.linalg.norm(r)
+        rnorm_host = float(rnorm)
+        if rnorm_host <= atol_eff_host:
+            info = 0
+            break
+        if not math.isfinite(rnorm_host):
+            # IEEE-propagated breakdown: pAp or rz collapsed in the
+            # previous iteration, poisoning r via alpha=inf/NaN. The
+            # current iterate is the best estimate we have; report
+            # info > 0 per SciPy contract.
+            info = k + 1
+            break
+
+        Ap = A_op.matvec(p)
+        pAp = dpnp.real(dpnp.vdot(p, Ap))  # 0-D, stays on device
+
+        # No sync on pAp -- division by a near-zero pAp will produce
+        # alpha = inf/NaN, propagated below into r and caught by the
+        # rnorm_host check at the top of the next iteration.
+        alpha = rz / pAp
+        x = x + alpha * p
+        r = r - alpha * Ap
+
+        if callback is not None:
+            callback(x)
+
+        z = M_op.matvec(r)
+        rz_new = dpnp.real(dpnp.vdot(r, z))
+
+        # No sync on rz_new either; near-zero rz_new likewise yields
+        # beta = inf/NaN and is caught at the next loop entry.
+        beta = rz_new / rz
+        p = z + beta * p
+        rz = rz_new
+    else:
+        info = maxiter
+
+    return x, int(info)
+
+
+# pylint: disable-next=too-many-locals,too-many-statements,too-many-branches
+def gmres(
+    A,
+    b,
+    x0: dpnp.ndarray | None = None,
+    *,
+    rtol: float = 1e-5,
+    atol: float = 0.0,
+    restart: int | None = None,
+    maxiter: int | None = None,
+    M=None,
+    callback: Callable | None = None,
+    callback_type: str | None = None,
+) -> tuple[dpnp.ndarray, int]:
+    """Uses Generalized Minimal RESidual iteration to solve ``Ax = b``.
+
+    Parameters
+    ----------
+    A : LinearOperator, dpnp sparse matrix, or 2-D dpnp.ndarray
+        The real or complex matrix of the linear system, shape (n, n).
+    b : dpnp.ndarray
+        Right-hand side of the linear system, shape (n,) or (n, 1).
+    x0 : dpnp.ndarray, optional
+        Starting guess for the solution.
+    rtol, atol : float
+        Tolerance for convergence: ``||r|| <= max(atol, rtol*||b||)``.
+    restart : int, optional
+        Number of iterations between restarts (default 20). Larger values
+        increase iteration cost but may be necessary for convergence.
+    maxiter : int, optional
+        Maximum number of iterations (default 10*n).
+    M : LinearOperator, dpnp sparse matrix, or 2-D dpnp.ndarray, optional
+        Preconditioner for ``A``; should approximate the inverse of ``A``.
+    callback : callable, optional
+        User-specified function to call on every restart. Called as
+        ``callback(arg)``, where ``arg`` is selected by ``callback_type``.
+    callback_type : {'x', 'pr_norm'}, optional
+        If 'x', the current solution vector is passed to the callback.
+        If 'pr_norm', the relative (preconditioned) residual norm.
+        Default is 'pr_norm' when a callback is supplied.
+
+    Returns
+    -------
+    x : dpnp.ndarray
+        The (approximate) solution. Note that this is M @ x in the
+        right-preconditioned formulation, matching CuPy's return value.
+    info : int
+        0 if converged; iteration count if maxiter was reached.
+
+    See Also
+    --------
+    scipy.sparse.linalg.gmres
+    cupyx.scipy.sparse.linalg.gmres
+    """
+    A_op, M_op, x, b, dtype = _make_system(A, M, x0, b)
+    matvec = A_op.matvec
+    psolve = M_op.matvec
+
+    n = A_op.shape[0]
+    if n == 0:
+        return dpnp.empty_like(b), 0
+    # b_norm is a 0-D device tensor; cast to host once so the
+    # subsequent comparisons / atol arithmetic are pure-host floats
+    # and do not trigger implicit __bool__ syncs every iteration.
+    b_norm = float(dpnp.linalg.norm(b))
+    if b_norm == 0.0:
+        return b, 0
+    atol = max(float(atol), rtol * b_norm)
+
+    if maxiter is None:
+        maxiter = n * 10
+    if restart is None:
+        restart = 20
+    restart = min(int(restart), n)
+
+    if callback_type is None:
+        callback_type = "pr_norm"
+    if callback_type not in ("x", "pr_norm"):
+        raise ValueError(f"Unknown callback_type: {callback_type!r}")
+    if callback is None:
+        callback_type = None
+
+    queue = b.sycl_queue
+
+    # Krylov basis V is F-ordered so column slices V[:, :k] are
+    # F-contiguous USM views, a precondition of the bi._gemv_alpha_beta
+    # binding used inside _make_compute_hu.
+    V = dpnp.empty((n, restart), dtype=dtype, sycl_queue=queue, order="F")
+    # H is F-ordered for the same reason: compute_hu writes Hessenberg
+    # column slices H[:j+1, j] in-place via the gemv output pointer.
+    # An RHS of length restart+1 is built on the host (e_host) because
+    # we run the small (restart+1) x restart least-squares on the host
+    # every restart -- the device-side SVD launch overhead dominates
+    # for this size class on Intel GPUs, matching CuPy's CPU choice.
+    H = dpnp.zeros(
+        (restart + 1, restart), dtype=dtype, sycl_queue=queue, order="F"
+    )
+
+    compute_hu = _make_compute_hu(V, H)
+
+    np_dtype = _np_dtype(dtype)
+    e_host = numpy.zeros(restart + 1, dtype=np_dtype)
+
+    iters = 0
+    # r_norm_host tracks the latest residual norm as a Python float so
+    # the convergence test and the final maxiter check below operate on
+    # host scalars (one explicit sync per restart, not an implicit one
+    # per comparison).
+    r_norm_host = math.inf
+    while True:
+        mx = psolve(x)
+        r = b - matvec(mx)
+        r_norm = dpnp.linalg.norm(r)
+        r_norm_host = float(r_norm)
+
+        if callback_type == "x":
+            callback(mx)
+        elif callback_type == "pr_norm" and iters > 0:
+            # b_norm is already host; r_norm_host / b_norm stays on host.
+            callback(r_norm_host / b_norm)
+
+        if r_norm_host <= atol or iters >= maxiter:
+            break
+
+        # Initialise the Arnoldi basis with the (normalised) residual.
+        # Writing V[:, 0] in one slice is a contiguous USM-to-USM copy
+        # of length n; same shape as CuPy's V[:, 0] = v.
+        v = r / r_norm
+        V[:, 0] = v
+        # Clear the Hessenberg column data the lstsq will read this
+        # restart. Only the upper (j+1) entries per column are written
+        # by compute_hu; without this reset stale values from the
+        # previous restart would leak into the system.
+        H[:] = 0
+        # RHS for the Hessenberg system is r_norm * e_1; the rest
+        # of e_host stays zero from the host allocation above.
+        e_host[0] = r_norm_host
+        if iters > 0:
+            # Clear stale tail from previous restart in case maxiter
+            # exceeds restart and we re-enter with a non-zero e_host[1].
+            e_host[1:] = 0
+
+        # Arnoldi iteration
+        last_j = restart - 1
+        for j in range(restart):
+            z = psolve(v)
+            u = matvec(z)
+            # compute_hu writes H[:j+1, j] in-place and returns the
+            # orthogonalised u. No h temporary, no tmp buffer, two
+            # oneMKL gemv calls per Arnoldi step.
+            u = compute_hu(u, j)
+            # H[j+1, j] = ||u||  -- one device norm, one slice store.
+            # Stored as a device 0-D scalar; we only sync if we need
+            # to read its value for the next v normalisation.
+            h_norm = dpnp.linalg.norm(u)
+            H[j + 1, j] = h_norm
+            if j < last_j:
+                # Normalise u into the next Krylov vector and store it
+                # in V. The single in-place store V[:, j+1] = v writes
+                # a contiguous column slice with a unit-stride layout.
+                v = u / h_norm
+                V[:, j + 1] = v
+
+        # Solve the small Hessenberg least-squares  H y = e  on the
+        # host. The matrix is (restart+1) x restart -- typically
+        # 21 x 20 -- so the device SVD launch overhead dominates;
+        # CuPy makes the same choice and ships y back as a device
+        # array. Single host sync per restart, replacing the per-
+        # restart device-side lstsq that allocated a workspace and
+        # ran a tiny SVD kernel.
+        H_host = dpnp.asnumpy(H)
+        y_host, *_ = numpy.linalg.lstsq(H_host, e_host, rcond=None)
+        y = dpnp.asarray(y_host, sycl_queue=queue)
+        x = x + dpnp.dot(V, y)
+        iters += restart
+
+    info = 0
+    if iters >= maxiter and r_norm_host > atol:
+        info = iters
+
+    return mx, info
+
+
+# pylint: disable-next=too-many-locals,too-many-branches,too-many-statements
+def minres(
+    A,
+    b,
+    x0: dpnp.ndarray | None = None,
+    *,
+    rtol: float = 1e-5,
+    shift: float = 0.0,
+    maxiter: int | None = None,
+    M=None,
+    callback: Callable | None = None,
+    show: bool = False,
+    check: bool = False,
+) -> tuple[dpnp.ndarray, int]:
+    """Uses MINimum RESidual iteration to solve ``Ax = b``.
+
+    Solves the symmetric (possibly indefinite) system ``Ax = b`` or,
+    if *shift* is nonzero, ``(A - shift*I)x = b``.  All computation
+    stays on the SYCL device; only scalar recurrence coefficients and
+    norms are transferred to the host for branching.
+
+    The algorithm follows SciPy's MINRES (Paige & Saunders, 1975)
+    line-for-line.  Three host syncs per iteration are unavoidable:
+    ``alpha`` and ``beta`` (Lanczos inner products) and ``ynorm``
+    (solution norm for stopping tests).
+
+    Parameters
+    ----------
+    A : dpnp sparse matrix, 2-D dpnp.ndarray, or LinearOperator
+        The real symmetric or complex Hermitian matrix, shape ``(n, n)``.
+    b : dpnp.ndarray
+        Right-hand side, shape ``(n,)`` or ``(n, 1)``.
+    x0 : dpnp.ndarray, optional
+        Starting guess for the solution.
+    shift : float
+        If nonzero, solve ``(A - shift*I)x = b``.  Default 0.
+    rtol : float
+        Relative tolerance for convergence.  Default 1e-5.
+    maxiter : int, optional
+        Maximum number of iterations.  Default ``5*n``.
+    M : dpnp sparse matrix, dpnp.ndarray, or LinearOperator, optional
+        Preconditioner approximating the inverse of ``A``.
+    callback : callable, optional
+        Called as ``callback(xk)`` after each iteration.
+    show : bool
+        If True, print convergence summary each iteration.
+    check : bool
+        If True, verify that ``A`` and ``M`` are symmetric before
+        iterating.  Costs extra matvecs.
+
+    Returns
+    -------
+    x : dpnp.ndarray
+        The converged (or best) solution.
+    info : int
+        0 if converged, ``maxiter`` if the iteration limit was reached.
+
+    Notes
+    -----
+    This is a direct translation of the Paige--Saunders MINRES algorithm
+    as implemented in SciPy, adapted for dpnp device arrays with the
+    oneMKL SpMV cached-handle fast-path.
+
+    See Also
+    --------
+    scipy.sparse.linalg.minres
+    cupyx.scipy.sparse.linalg.minres
+    """
+
+    A_op, M_op, x, b, dtype = _make_system(A, M, x0, b)
+    matvec = A_op.matvec
+    psolve = M_op.matvec
+
+    n = A_op.shape[0]
+    if maxiter is None:
+        maxiter = 5 * n
+
+    istop = 0
+    itn = 0
+    eps = dpnp.finfo(dtype).eps
+
+    # ------------------------------------------------------------------
+    # Set up y and v for the first Lanczos vector v1.
+    #   y  = beta1 * P' * v1, where P = M**(-1).
+    #   v  is really P' * v1.
+    # ------------------------------------------------------------------
+
+    Ax = matvec(x)
+    r1 = b - Ax
+    y = psolve(r1)
+
+    # beta1 = <r1, y>   -- one host sync (setup only).
+    # Transferred to host immediately because beta1 seeds ~5 host-side
+    # scalars (beta, qrnorm, phibar, rhs1) used in Python arithmetic
+    # and branches every iteration.  Keeping it as a 0-D device array
+    # would cascade implicit syncs or 0-D allocations throughout the
+    # recurrence -- and the < 0 / == 0 guards below would each trigger
+    # an implicit __bool__ sync of their own.
+    beta1 = float(dpnp.inner(r1, y))
+
+    if beta1 < 0:
+        raise ValueError("indefinite preconditioner")
+    if beta1 == 0:
+        return (x, 0)
+
+    beta1 = math.sqrt(beta1)
+
+    if check:
+        # See if A is symmetric.  All on device; only the bool syncs.
+        w_chk = matvec(y)
+        r2_chk = matvec(w_chk)
+        s = dpnp.inner(w_chk, w_chk)
+        t = dpnp.inner(y, r2_chk)
+        if abs(s - t) > (s + eps) * eps ** (1.0 / 3.0):
+            raise ValueError("non-symmetric matrix")
+
+        # See if M is symmetric.
+        r2_chk = psolve(y)
+        s = dpnp.inner(y, y)
+        t = dpnp.inner(r1, r2_chk)
+        if abs(s - t) > (s + eps) * eps ** (1.0 / 3.0):
+            raise ValueError("non-symmetric preconditioner")
+
+    # Initialise remaining quantities (all host-side scalars).
+    oldb = 0
+    beta = beta1
+    dbar = 0
+    epsln = 0
+    qrnorm = beta1
+    phibar = beta1
+    rhs1 = beta1
+    rhs2 = 0
+    tnorm2 = 0
+    gmax = 0
+    gmin = dpnp.finfo(dtype).max
+    cs = -1
+    sn = 0
+    queue = b.sycl_queue
+    w = dpnp.zeros(n, dtype=dtype, sycl_queue=queue)
+    w2 = dpnp.zeros(n, dtype=dtype, sycl_queue=queue)
+    r2 = r1
+
+    # Main Lanczos loop.
+    while itn < maxiter:
+        itn += 1
+
+        s = 1.0 / beta
+        v = s * y  # on device
+
+        y = matvec(v)
+        y = y - shift * v
+
+        if itn >= 2:
+            y = y - (beta / oldb) * r1
+
+        # alpha = <v, y>   -- host sync #1
+        alpha = float(dpnp.inner(v, y))
+
+        y = y - (alpha / beta) * r2
+        r1 = r2
+        r2 = y
+        y = psolve(r2)
+        oldb = beta
+
+        # beta = sqrt(<r2, y>)   -- host sync #2
+        beta = float(dpnp.inner(r2, y))
+        if beta < 0:
+            raise ValueError("non-symmetric matrix")
+        beta = math.sqrt(beta)
+
+        tnorm2 += alpha**2 + oldb**2 + beta**2
+
+        if itn == 1:
+            if beta / beta1 <= 10 * eps:
+                istop = -1  # Terminate later
+
+        # Apply previous rotation Q_{k-1} to get
+        #   [delta_k  epsln_{k+1}] = [cs  sn] [dbar_k     0     ]
+        #   [gbar_k   dbar_{k+1} ]   [sn -cs] [alpha_k  beta_{k+1}]
+        oldeps = epsln
+        delta = cs * dbar + sn * alpha
+        gbar = sn * dbar - cs * alpha
+        epsln = sn * beta
+        dbar = -cs * beta
+        root = math.hypot(gbar, dbar)
+
+        # Compute the next plane rotation Q_k.
+        gamma = math.hypot(gbar, beta)
+        gamma = max(gamma, eps)
+        cs = gbar / gamma
+        sn = beta / gamma
+        phi = cs * phibar
+        phibar = sn * phibar
+
+        # Update x  -- all on device.
+        denom = 1.0 / gamma
+        w1 = w2
+        w2 = w
+        w = (v - oldeps * w1 - delta * w2) * denom
+        x = x + phi * w
+
+        # Go round again.
+        gmax = max(gmax, gamma)
+        gmin = min(gmin, gamma)
+        z = rhs1 / gamma
+        rhs1 = rhs2 - delta * z
+        rhs2 = -epsln * z
+
+        # ----------------------------------------------------------
+        # Estimate norms and test for convergence.
+        # ----------------------------------------------------------
+        Anorm = math.sqrt(tnorm2)
+        ynorm = float(dpnp.linalg.norm(x))  # host sync #3
+        epsa = Anorm * eps
+        epsx = Anorm * ynorm * eps
+        epsr = Anorm * ynorm * rtol
+        diag = gbar
+        if diag == 0:
+            diag = epsa
+
+        qrnorm = phibar
+        rnorm = qrnorm
+        if ynorm == 0 or Anorm == 0:
+            test1 = math.inf
+        else:
+            test1 = rnorm / (Anorm * ynorm)  # ||r|| / (||A|| ||x||)
+        if Anorm == 0:
+            test2 = math.inf
+        else:
+            test2 = root / Anorm  # ||Ar|| / (||A|| ||r||)
+
+        # Estimate cond(A).
+        Acond = gmax / gmin
+
+        # Stopping criteria (SciPy's istop codes).
+        if istop == 0:
+            t1 = 1 + test1
+            t2 = 1 + test2
+            if t2 <= 1:
+                istop = 2
+            if t1 <= 1:
+                istop = 1
+
+            if itn >= maxiter:
+                istop = 6
+            if Acond >= 0.1 / eps:
+                istop = 4
+            if epsx >= beta1:
+                istop = 3
+            if test2 <= rtol:
+                istop = 2
+            if test1 <= rtol:
+                istop = 1
+
+        if show:
+            prnt = (
+                n <= 40
+                or itn <= 10
+                or itn >= maxiter - 10
+                or itn % 10 == 0
+                or qrnorm <= 10 * epsx
+                or qrnorm <= 10 * epsr
+                or Acond <= 1e-2 / eps
+                or istop != 0
+            )
+            if prnt:
+                x1 = float(x[0])
+                print(
+                    f"{itn:6g} {x1:12.5e} {test1:10.3e}"
+                    f" {test2:10.3e}"
+                    f" {Anorm:8.1e} {Acond:8.1e}"
+                    f" {gbar / Anorm if Anorm else 0:8.1e}"
+                )
+                if itn % 10 == 0:
+                    print()
+
+        if callback is not None:
+            callback(x)
+
+        if istop != 0:
+            break
+
+    if istop == 6:
+        info = maxiter
+    else:
+        info = 0
+
+    return (x, info)
+
+
+def _make_compute_hu(V, H):
+    """Factory for the GMRES Arnoldi inner step on Intel GPU.
+
+    Returns a closure ``compute_hu(u, j) -> u`` that performs
+    classical Gram-Schmidt orthogonalisation of ``u`` against the
+    first ``j+1`` columns of ``V`` and writes the projection
+    coefficients into column ``j`` of ``H``:
+
+        h = V[:, :j+1].conj().T @ u
+        H[:j+1, j] = h
+        u = u - V[:, :j+1] @ h
+
+    Both calls are dispatched as single oneMKL ``gemv`` kernels via
+    the ``bi._gemv_alpha_beta`` binding:
+
+      * Pass 1 (project) -- ``gemv(trans_op=T or C, alpha=1, beta=0)``
+        with the *output* pointing at the Hessenberg column slice
+        ``H[:j+1, j]``. No temporary ``h`` buffer is allocated; the
+        result lands directly in the matrix. ``trans_op`` is T for
+        real matrices (where V^T == V^H) and C (conjugate-transpose)
+        for complex matrices, so oneMKL produces ``V^H u`` directly.
+      * Pass 2 (subtract) -- ``gemv(trans_op=N, alpha=-1, beta=1)``
+        with input ``H[:j+1, j]`` and in-place output ``u``. No
+        temporary ``tmp`` buffer; the AXPY-style update is fused
+        into the gemv kernel.
+
+    A prior version of this closure tried to emulate ``V^H u`` for
+    complex matrices by issuing ``gemv(transpose=T)`` and then post-
+    conjugating ``h`` in place. That is mathematically wrong: the
+    identity ``conj(V^T u) == V^H u`` holds only when ``u`` is real;
+    for complex ``u`` the result is ``V^H @ conj(u)``, a different
+    vector that silently breaks Krylov-basis orthogonality and
+    prevents GMRES from converging. Using oneMKL's native conjugate-
+    transpose mode -- now exposed via the tri-state ``trans_op``
+    parameter of the binding -- removes the work-around entirely.
+
+    Parameters
+    ----------
+    V : dpnp.ndarray
+        Krylov basis of shape ``(n, restart)``, must be F-contiguous.
+    H : dpnp.ndarray
+        Hessenberg matrix of shape ``(restart+1, restart)``, must be
+        F-contiguous so column slices ``H[:k, j]`` are unit-stride
+        contiguous USM views the C binding can write into.
+
+    Returns
+    -------
+    closure : callable
+        ``compute_hu(u, j) -> u`` -- updates ``H[:j+1, j]`` in place
+        and returns the orthogonalised ``u``.
+    """
+    if V.ndim != 2 or not V.flags.f_contiguous:
+        raise ValueError(
+            "_make_compute_hu: V must be a 2-D column-major (F-order) "
+            "dpnp array"
+        )
+    if H.ndim != 2 or not H.flags.f_contiguous:
+        raise ValueError(
+            "_make_compute_hu: H must be a 2-D column-major (F-order) "
+            "dpnp array so column slices are unit-stride USM views"
+        )
+    if V.sycl_queue != H.sycl_queue:
+        raise ValueError(
+            "_make_compute_hu: V and H must share the same SYCL queue"
+        )
+
+    exec_q = V.sycl_queue
+    dtype = V.dtype
+    is_cpx = dpnp.issubdtype(dtype, dpnp.complexfloating)
+
+    # trans_op for pass-1 selects whether we project with V^T (real,
+    # which equals V^H) or with V^H directly via oneMKL's ``conjtrans``
+    # mode. The previous implementation tried to emulate V^H using
+    # transpose=T followed by an element-wise conjugate of h, but the
+    # identity ``conj(V^T @ u) == V^H @ u`` only holds when u is real;
+    # for complex u it produces ``V^H @ conj(u)`` instead, which is a
+    # different vector and silently breaks Gram-Schmidt orthogonality.
+    # bi._gemv_alpha_beta now exposes the full {N, T, C} tri-state so
+    # we can ask oneMKL for V^H directly -- one kernel, mathematically
+    # exact, no scratch buffer, no post-hoc conjugate to event-order.
+    pass1_trans_op = 2 if is_cpx else 1  # 2 = conjtrans, 1 = transpose
+
+    def compute_hu(u, j):
+        Vj = V[:, : j + 1]
+        h_slice = H[: j + 1, j]  # length-(j+1) F-contig column slice
+
+        Vj_usm = dpnp.get_usm_ndarray(Vj)
+        u_usm = dpnp.get_usm_ndarray(u)
+        h_usm = dpnp.get_usm_ndarray(h_slice)
+
+        _manager = dpu.SequentialOrderManager[exec_q]
+
+        # Pass 1: H[:j+1, j] = op(Vj) @ u   (alpha=1, beta=0)
+        # op = V^T for real, V^H for complex. Writes the projection
+        # coefficients straight into the Hessenberg column slice
+        # without any temporary buffer.
+        # pylint: disable-next=protected-access
+        ht1, ev1 = bi._gemv_alpha_beta(
+            exec_q,
+            Vj_usm,
+            u_usm,
+            h_usm,
+            trans_op=pass1_trans_op,
+            alpha=1.0,
+            beta=0.0,
+            depends=_manager.submitted_events,
+        )
+        _manager.add_event_pair(ht1, ev1)
+
+        # Pass 2: u = -Vj @ H[:j+1, j] + 1 * u   (alpha=-1, beta=1)
+        # Fused AXPY-gemv -- single oneMKL kernel, no tmp buffer.
+        # pylint: disable-next=protected-access
+        ht2, ev2 = bi._gemv_alpha_beta(
+            exec_q,
+            Vj_usm,
+            h_usm,
+            u_usm,
+            trans_op=0,  # N: no transpose
+            alpha=-1.0,
+            beta=1.0,
+            depends=_manager.submitted_events,
+        )
+        _manager.add_event_pair(ht2, ev2)
+
+        return u
+
+    return compute_hu
diff --git a/dpnp/tests/test_scipy_sparse_linalg.py b/dpnp/tests/test_scipy_sparse_linalg.py
new file mode 100644
index 000000000000..b11aa1fcc796
--- /dev/null
+++ b/dpnp/tests/test_scipy_sparse_linalg.py
@@ -0,0 +1,896 @@
+import warnings
+
+import numpy
+import pytest
+from numpy.testing import (
+    assert_allclose,
+    assert_raises,
+)
+
+import dpnp
+from dpnp.scipy.sparse.linalg import (
+    LinearOperator,
+    aslinearoperator,
+    cg,
+    gmres,
+    minres,
+)
+from dpnp.tests.helper import (
+    assert_dtype_allclose,
+    generate_random_numpy_array,
+    get_all_dtypes,
+    get_float_complex_dtypes,
+    has_support_aspect64,
+    is_scipy_available,
+)
+from dpnp.tests.third_party.cupy import testing
+
+if is_scipy_available():
+    import scipy.sparse.linalg as scipy_sla
+
+
+# Helpers for constructing SPD, diagonally dominant, and symmetric
+# indefinite test matrices. Kept small and local, matching the style of
+# vvsort() at the top of test_linalg.py.
+def _spd_matrix(n, dtype):
+    rng = numpy.random.default_rng(42)
+    is_complex = numpy.issubdtype(numpy.dtype(dtype), numpy.complexfloating)
+    if is_complex:
+        a = rng.standard_normal((n, n)) + 1j * rng.standard_normal((n, n))
+        a = a.conj().T @ a + n * numpy.eye(n)
+    else:
+        a = rng.standard_normal((n, n))
+        a = a.T @ a + n * numpy.eye(n)
+    return dpnp.asarray(a.astype(dtype))
+
+
+def _diag_dominant(n, dtype, seed=81):
+    rng = numpy.random.default_rng(seed)
+    is_complex = numpy.issubdtype(numpy.dtype(dtype), numpy.complexfloating)
+    if is_complex:
+        a = 0.05 * (
+            rng.standard_normal((n, n)) + 1j * rng.standard_normal((n, n))
+        )
+    else:
+        a = 0.05 * rng.standard_normal((n, n))
+    a = a + float(n) * numpy.eye(n)
+    return dpnp.asarray(a.astype(dtype))
+
+
+def _sym_indefinite(n, dtype, seed=99):
+    rng = numpy.random.default_rng(seed)
+    a = rng.standard_normal((n, n))
+    q, _ = numpy.linalg.qr(a)
+    d = rng.standard_normal(n)
+    m = (q @ numpy.diag(d) @ q.T).astype(dtype)
+    return dpnp.asarray(m)
+
+
+def _rhs(n, dtype, seed=7):
+    rng = numpy.random.default_rng(seed)
+    is_complex = numpy.issubdtype(numpy.dtype(dtype), numpy.complexfloating)
+    if is_complex:
+        b = rng.standard_normal(n) + 1j * rng.standard_normal(n)
+    else:
+        b = rng.standard_normal(n)
+    b /= numpy.linalg.norm(b)
+    return dpnp.asarray(b.astype(dtype))
+
+
+def _rtol_for(dtype):
+    if dtype in (dpnp.float32, dpnp.complex64, numpy.float32, numpy.complex64):
+        return 1e-5
+    return 1e-8
+
+
+def _res_bound(dtype):
+    if dtype in (dpnp.float32, dpnp.complex64, numpy.float32, numpy.complex64):
+        return 1e-3
+    return 1e-5
+
+
+# GMRES in dpnp.scipy.sparse.linalg._iterative uses real-valued Givens
+# rotation formulas which are incorrect for complex Arnoldi, so GMRES
+# returns wrong solutions for complex dtypes. Complex GMRES tests are
+# xfailed below. When the Givens block is fixed the xfails will flip to
+# XPASS and force an update here.
+_GMRES_CPX_XFAIL = (
+    "GMRES Givens rotation is real-valued; broken for complex dtypes"
+)
+
+_GMRES_DTYPES = [
+    dpnp.float32,
+    dpnp.float64,
+    pytest.param(
+        dpnp.complex64,
+        marks=pytest.mark.xfail(reason=_GMRES_CPX_XFAIL, strict=False),
+    ),
+    pytest.param(
+        dpnp.complex128,
+        marks=pytest.mark.xfail(reason=_GMRES_CPX_XFAIL, strict=False),
+    ),
+]
+
+
+class TestImports:
+    def test_all_symbols_importable(self):
+        from dpnp.scipy.sparse.linalg import (  # noqa: F401
+            LinearOperator,
+            aslinearoperator,
+            cg,
+            gmres,
+            minres,
+        )
+
+        for sym in (LinearOperator, aslinearoperator, cg, gmres, minres):
+            assert callable(sym)
+
+    def test_all_in_dunder_all(self):
+        import dpnp.scipy.sparse.linalg as mod
+
+        for name in (
+            "LinearOperator",
+            "aslinearoperator",
+            "cg",
+            "gmres",
+            "minres",
+        ):
+            assert name in mod.__all__
+
+
+class TestLinearOperator:
+    @pytest.mark.parametrize(
+        "shape",
+        [(5, 5), (7, 3), (3, 7)],
+        ids=["(5, 5)", "(7, 3)", "(3, 7)"],
+    )
+    def test_shape(self, shape):
+        m, n = shape
+        lo = LinearOperator(
+            shape,
+            matvec=lambda x: dpnp.zeros(m, dtype=dpnp.float32),
+            dtype=dpnp.float32,
+        )
+        assert lo.shape == (m, n)
+        assert lo.ndim == 2
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_dtype_explicit(self, dtype):
+        n = 4
+        a = dpnp.eye(n, dtype=dtype)
+        lo = LinearOperator(
+            (n, n),
+            matvec=lambda x: (a @ x.astype(dtype)).astype(dtype),
+            dtype=dtype,
+        )
+        assert lo.dtype == dtype
+
+    def test_dtype_inference_float64_default(self):
+        # Dtype inference probes matvec with a float64 vector, so the
+        # inferred dtype is float64 even when the underlying array is
+        # float32. Pin the current behaviour as a regression guard.
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        n = 4
+        a = dpnp.eye(n, dtype=dpnp.float32)
+        lo = LinearOperator((n, n), matvec=lambda x: a @ x)
+        assert lo.dtype == dpnp.float64
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_matvec(self, dtype):
+        n = 6
+        a = generate_random_numpy_array((n, n), dtype, seed_value=42)
+        ia = dpnp.array(a)
+        lo = LinearOperator((n, n), matvec=lambda x: ia @ x, dtype=dtype)
+        x = generate_random_numpy_array((n,), dtype, seed_value=1)
+        ix = dpnp.array(x)
+        result = lo.matvec(ix)
+        expected = a @ x
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_rmatvec(self, dtype):
+        n = 5
+        a = generate_random_numpy_array((n, n), dtype, seed_value=12)
+        ia = dpnp.array(a)
+        lo = LinearOperator(
+            (n, n),
+            matvec=lambda x: ia @ x,
+            rmatvec=lambda x: dpnp.conj(ia.T) @ x,
+            dtype=dtype,
+        )
+        x = generate_random_numpy_array((n,), dtype, seed_value=3)
+        ix = dpnp.array(x)
+        result = lo.rmatvec(ix)
+        expected = a.conj().T @ x
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_matmat_fallback_loop(self, dtype):
+        n, k = 5, 3
+        a = generate_random_numpy_array((n, n), dtype, seed_value=55)
+        ia = dpnp.array(a)
+        lo = LinearOperator((n, n), matvec=lambda x: ia @ x, dtype=dtype)
+        x = generate_random_numpy_array((n, k), dtype, seed_value=9)
+        ix = dpnp.array(x)
+        result = lo.matmat(ix)
+        expected = a @ x
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_matmul_1d(self, dtype):
+        # lo @ x dispatches to matvec
+        n = 6
+        a = generate_random_numpy_array((n, n), dtype, seed_value=42)
+        ia = dpnp.array(a)
+        lo = LinearOperator((n, n), matvec=lambda x: ia @ x, dtype=dtype)
+        x = generate_random_numpy_array((n,), dtype, seed_value=2)
+        ix = dpnp.array(x)
+        result = lo @ ix
+        expected = a @ x
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_matmul_2d(self, dtype):
+        # lo @ X dispatches to matmat
+        n, k = 5, 3
+        a = generate_random_numpy_array((n, n), dtype, seed_value=42)
+        ia = dpnp.array(a)
+        lo = LinearOperator((n, n), matvec=lambda x: ia @ x, dtype=dtype)
+        x = generate_random_numpy_array((n, k), dtype, seed_value=5)
+        ix = dpnp.array(x)
+        result = lo @ ix
+        expected = a @ x
+        assert_dtype_allclose(result, expected)
+
+    def test_call_alias(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        n = 4
+        ia = dpnp.eye(n, dtype=dpnp.float64)
+        lo = LinearOperator((n, n), matvec=lambda x: ia @ x, dtype=dpnp.float64)
+        ix = dpnp.ones(n, dtype=dpnp.float64)
+        assert_allclose(dpnp.asnumpy(lo(ix)), numpy.ones(n), atol=1e-12)
+
+    def test_repr(self):
+        lo = LinearOperator(
+            (3, 4),
+            matvec=lambda x: dpnp.zeros(3, dtype=dpnp.float32),
+            dtype=dpnp.float32,
+        )
+        r = repr(lo)
+        assert "LinearOperator" in r
+        assert "3x4" in r or "(3, 4)" in r
+
+    @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.float64])
+    def test_subclass_custom_matmat(self, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        n, k = 7, 4
+        a = generate_random_numpy_array((n, n), dtype, seed_value=42)
+        ia = dpnp.array(a)
+
+        class MyOp(LinearOperator):
+            def __init__(self):
+                super().__init__(dtype=dtype, shape=(n, n))
+                self._a = ia
+
+            def _matvec(self, x):
+                return self._a @ x
+
+            def _matmat(self, X):
+                return self._a @ X
+
+        op = MyOp()
+        x = generate_random_numpy_array((n, k), dtype, seed_value=9)
+        ix = dpnp.array(x)
+        result = op.matmat(ix)
+        expected = a @ x
+        assert_dtype_allclose(result, expected)
+
+    def test_linear_operator_errors(self):
+        lo = LinearOperator(
+            (3, 5),
+            matvec=lambda x: dpnp.zeros(3, dtype=dpnp.float32),
+            dtype=dpnp.float32,
+        )
+        # matvec with wrong shape
+        assert_raises(ValueError, lo.matvec, dpnp.ones(4, dtype=dpnp.float32))
+
+        # rmatvec not provided
+        lo2 = LinearOperator(
+            (3, 3),
+            matvec=lambda x: dpnp.zeros(3, dtype=dpnp.float32),
+            dtype=dpnp.float32,
+        )
+        assert_raises(
+            (NotImplementedError, ValueError),
+            lo2.rmatvec,
+            dpnp.zeros(3, dtype=dpnp.float32),
+        )
+
+        # matmat with 1-D input
+        assert_raises(ValueError, lo2.matmat, dpnp.ones(3, dtype=dpnp.float32))
+
+        # negative shape
+        assert_raises(
+            (ValueError, Exception),
+            LinearOperator,
+            (-1, 3),
+            matvec=lambda x: x,
+            dtype=dpnp.float32,
+        )
+
+        # shape with wrong ndim
+        assert_raises(
+            (ValueError, Exception),
+            LinearOperator,
+            (3,),
+            matvec=lambda x: x,
+            dtype=dpnp.float32,
+        )
+
+
+class TestAsLinearOperator:
+    def test_identity_if_already_linearoperator(self):
+        lo = LinearOperator((3, 3), matvec=lambda x: x, dtype=dpnp.float32)
+        assert aslinearoperator(lo) is lo
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_dense_dpnp_array_matvec(self, dtype):
+        n = 6
+        a = generate_random_numpy_array((n, n), dtype, seed_value=42)
+        ia = dpnp.array(a)
+        lo = aslinearoperator(ia)
+        assert lo.shape == (n, n)
+        x = generate_random_numpy_array((n,), dtype, seed_value=1)
+        ix = dpnp.array(x)
+        result = lo.matvec(ix)
+        expected = a @ x
+        assert_dtype_allclose(result, expected)
+
+    def test_dense_numpy_array_attributes_only(self):
+        # aslinearoperator(numpy_array) wraps with lambda x: A @ x where A
+        # remains a numpy array; calling matvec(dpnp_x) then fails because
+        # dpnp __rmatmul__ refuses numpy LHS. Only attributes are checked.
+        n = 5
+        a = generate_random_numpy_array((n, n), numpy.float64, seed_value=42)
+        lo = aslinearoperator(a)
+        assert lo.shape == (n, n)
+
+    def test_rmatvec_from_dpnp_dense(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        n = 5
+        a = generate_random_numpy_array((n, n), numpy.float64, seed_value=42)
+        ia = dpnp.array(a)
+        lo = aslinearoperator(ia)
+        x = generate_random_numpy_array((n,), numpy.float64, seed_value=2)
+        ix = dpnp.array(x)
+        result = lo.rmatvec(ix)
+        expected = a.conj().T @ x
+        assert_allclose(dpnp.asnumpy(result), expected, atol=1e-12)
+
+    def test_duck_type_with_shape_and_matvec(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        n = 4
+
+        class DuckOp:
+            shape = (n, n)
+            dtype = numpy.dtype(numpy.float64)
+
+            def matvec(self, x):
+                return x * 2.0
+
+            def rmatvec(self, x):
+                return x * 2.0
+
+        lo = aslinearoperator(DuckOp())
+        ix = dpnp.ones(n, dtype=dpnp.float64)
+        result = lo.matvec(ix)
+        assert_allclose(dpnp.asnumpy(result), numpy.full(n, 2.0), atol=1e-12)
+
+    def test_aslinearoperator_errors(self):
+        assert_raises((TypeError, Exception), aslinearoperator, "nope")
+
+
+class TestCg:
+    n = 30
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_cg_converges_spd(self, dtype):
+        ia = _spd_matrix(self.n, dtype)
+        ib = _rhs(self.n, dtype)
+        x, info = cg(ia, ib, rtol=_rtol_for(dtype), maxiter=500)
+        assert info == 0
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    @pytest.mark.skipif(not is_scipy_available(), reason="SciPy not available")
+    @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.float64])
+    def test_cg_matches_scipy(self, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        a = dpnp.asnumpy(_spd_matrix(self.n, dtype))
+        b = dpnp.asnumpy(_rhs(self.n, dtype))
+        try:
+            x_ref, info_ref = scipy_sla.cg(a, b, rtol=1e-8, maxiter=500)
+        except TypeError:  # scipy < 1.12
+            x_ref, info_ref = scipy_sla.cg(a, b, tol=1e-8, maxiter=500)
+        assert info_ref == 0
+        x_dp, info = cg(dpnp.array(a), dpnp.array(b), rtol=1e-8, maxiter=500)
+        assert info == 0
+        tol = 1e-4 if dtype == dpnp.float32 else 1e-8
+        assert_allclose(dpnp.asnumpy(x_dp), x_ref, rtol=tol, atol=tol)
+
+    @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.float64])
+    def test_cg_x0_warm_start(self, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dtype)
+        ib = _rhs(self.n, dtype)
+        x0 = dpnp.ones(self.n, dtype=dtype)
+        x, info = cg(ia, ib, x0=x0, rtol=_rtol_for(dtype), maxiter=500)
+        assert info == 0
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.float64])
+    def test_cg_b_2dim(self, dtype):
+        # b with shape (n, 1) must be accepted and flattened internally
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dtype)
+        ib = _rhs(self.n, dtype).reshape(self.n, 1)
+        _, info = cg(ia, ib, rtol=1e-8, maxiter=500)
+        assert info == 0
+
+    def test_cg_b_zero(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(10, dpnp.float64)
+        ib = dpnp.zeros(10, dtype=dpnp.float64)
+        x, info = cg(ia, ib, rtol=1e-8)
+        assert info == 0
+        assert_allclose(dpnp.asnumpy(x), numpy.zeros(10), atol=1e-14)
+
+    def test_cg_callback(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        calls = []
+        cg(
+            ia,
+            ib,
+            callback=lambda xk: calls.append(float(dpnp.linalg.norm(xk))),
+            rtol=1e-10,
+            maxiter=200,
+        )
+        assert len(calls) > 0
+
+    def test_cg_atol(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        x, _ = cg(ia, ib, rtol=0.0, atol=1e-1, maxiter=500)
+        assert float(dpnp.linalg.norm(ia @ x - ib)) < 1.0
+
+    def test_cg_exact_solution(self):
+        # x0 == true solution must return info == 0 immediately
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        n = 10
+        ia = _spd_matrix(n, dpnp.float64)
+        ib = _rhs(n, dpnp.float64)
+        x_true = dpnp.array(
+            numpy.linalg.solve(dpnp.asnumpy(ia), dpnp.asnumpy(ib))
+        )
+        _, info = cg(ia, ib, x0=x_true, rtol=1e-12)
+        assert info == 0
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_cg_via_linear_operator(self, dtype):
+        ia = _spd_matrix(self.n, dtype)
+        ib = _rhs(self.n, dtype)
+        lo = aslinearoperator(ia)
+        x, info = cg(lo, ib, rtol=_rtol_for(dtype), maxiter=500)
+        assert info == 0
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    def test_cg_maxiter_nonconvergence(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(50, dpnp.float64)
+        ib = _rhs(50, dpnp.float64)
+        _, info = cg(ia, ib, rtol=1e-15, atol=0.0, maxiter=1)
+        assert info != 0
+
+    def test_cg_diag_preconditioner(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        M = aslinearoperator(dpnp.diag(1.0 / dpnp.diag(ia)))
+        _, info = cg(ia, ib, M=M, rtol=1e-8, maxiter=500)
+        assert info == 0
+
+    def test_cg_errors(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(5, dpnp.float64)
+        ib = dpnp.ones(6, dtype=dpnp.float64)
+        # b length mismatch
+        with pytest.raises((ValueError, Exception)):
+            cg(ia, ib, maxiter=1)
+
+
+class TestGmres:
+    n = 30
+
+    @pytest.mark.parametrize("dtype", _GMRES_DTYPES)
+    def test_gmres_converges_diag_dominant(self, dtype):
+        if not has_support_aspect64() and dtype in (
+            dpnp.float64,
+            dpnp.complex128,
+        ):
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dtype)
+        ib = _rhs(self.n, dtype)
+        x, _ = gmres(
+            ia,
+            ib,
+            rtol=_rtol_for(dtype),
+            maxiter=200,
+            restart=self.n,
+        )
+        # Check actual residual rather than info: see comment above
+        # _GMRES_CPX_XFAIL.
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    @pytest.mark.skipif(not is_scipy_available(), reason="SciPy not available")
+    @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.float64])
+    def test_gmres_matches_scipy(self, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        a = dpnp.asnumpy(_diag_dominant(self.n, dtype))
+        b = dpnp.asnumpy(_rhs(self.n, dtype))
+        req_rtol = _rtol_for(dtype)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            try:
+                x_ref, _ = scipy_sla.gmres(
+                    a, b, rtol=req_rtol, restart=self.n, maxiter=None
+                )
+            except TypeError:  # scipy < 1.12
+                x_ref, _ = scipy_sla.gmres(
+                    a, b, tol=req_rtol, restart=self.n, maxiter=None
+                )
+        x_dp, info = gmres(
+            dpnp.array(a),
+            dpnp.array(b),
+            rtol=req_rtol,
+            restart=self.n,
+            maxiter=50,
+        )
+        assert info == 0
+        tol = 1e-3 if dtype == dpnp.float32 else 1e-7
+        assert_allclose(dpnp.asnumpy(x_dp), x_ref, rtol=tol, atol=tol)
+
+    @pytest.mark.parametrize("restart", [None, 5, 15], ids=["None", "5", "15"])
+    def test_gmres_restart_values(self, restart):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        _, info = gmres(ia, ib, rtol=1e-8, restart=restart, maxiter=100)
+        assert info == 0
+
+    @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.float64])
+    def test_gmres_x0_warm_start(self, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dtype)
+        ib = _rhs(self.n, dtype)
+        x0 = dpnp.ones(self.n, dtype=dtype)
+        x, _ = gmres(
+            ia,
+            ib,
+            x0=x0,
+            rtol=_rtol_for(dtype),
+            restart=self.n,
+            maxiter=200,
+        )
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    def test_gmres_b_2dim(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64).reshape(self.n, 1)
+        _, info = gmres(ia, ib, rtol=1e-8, restart=self.n, maxiter=100)
+        assert info == 0
+
+    def test_gmres_b_zero(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(10, dpnp.float64)
+        ib = dpnp.zeros(10, dtype=dpnp.float64)
+        x, info = gmres(ia, ib, rtol=1e-8)
+        assert info == 0
+        assert_allclose(dpnp.asnumpy(x), numpy.zeros(10), atol=1e-14)
+
+    def test_gmres_callback_x(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        calls = []
+        gmres(
+            ia,
+            ib,
+            callback=lambda xk: calls.append(1),
+            callback_type="x",
+            rtol=1e-10,
+            maxiter=20,
+            restart=self.n,
+        )
+        assert len(calls) > 0
+
+    def test_gmres_callback_pr_norm(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        values = []
+        gmres(
+            ia,
+            ib,
+            callback=lambda r: values.append(float(r)),
+            callback_type="pr_norm",
+            rtol=1e-10,
+            maxiter=20,
+            restart=self.n,
+        )
+        assert len(values) > 0
+        assert all(v >= 0 for v in values)
+
+    def test_gmres_atol(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        x, _ = gmres(
+            ia,
+            ib,
+            rtol=0.0,
+            atol=1e-6,
+            restart=self.n,
+            maxiter=50,
+        )
+        assert float(dpnp.linalg.norm(ia @ x - ib)) < 1e-4
+
+    @pytest.mark.parametrize("dtype", _GMRES_DTYPES)
+    def test_gmres_via_linear_operator(self, dtype):
+        if not has_support_aspect64() and dtype in (
+            dpnp.float64,
+            dpnp.complex128,
+        ):
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dtype)
+        ib = _rhs(self.n, dtype)
+        lo = aslinearoperator(ia)
+        x, _ = gmres(
+            lo,
+            ib,
+            rtol=_rtol_for(dtype),
+            restart=self.n,
+            maxiter=200,
+        )
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    def test_gmres_nonconvergence(self):
+        # Ill-conditioned Hilbert matrix + tiny restart must not converge
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        n = 48
+        idx = numpy.arange(n, dtype=numpy.float64)
+        a = 1.0 / (idx[:, None] + idx[None, :] + 1.0)
+        rng = numpy.random.default_rng(5)
+        b = rng.standard_normal(n)
+        ia = dpnp.array(a)
+        ib = dpnp.array(b)
+        x, info = gmres(ia, ib, rtol=1e-15, atol=0.0, restart=2, maxiter=2)
+        rel = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert rel > 1e-12
+        assert info != 0
+
+    @pytest.mark.xfail(reason=_GMRES_CPX_XFAIL, strict=False)
+    def test_gmres_complex_system(self):
+        if not has_support_aspect64():
+            pytest.skip("complex128 not supported on this device")
+        n = 15
+        ia = _diag_dominant(n, dpnp.complex128)
+        ib = _rhs(n, dpnp.complex128)
+        x, _ = gmres(ia, ib, rtol=1e-8, restart=n, maxiter=200)
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < 1e-5
+
+    def test_gmres_errors(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        # unknown callback_type
+        assert_raises(ValueError, gmres, ia, ib, callback_type="garbage")
+
+
+class TestMinres:
+    n = 30
+
+    @pytest.mark.parametrize("dtype", [dpnp.float32, dpnp.float64])
+    def test_minres_converges_spd(self, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dtype)
+        ib = _rhs(self.n, dtype)
+        x, info = minres(ia, ib, rtol=1e-8, maxiter=500)
+        assert info == 0
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < 1e-4
+
+    def test_minres_converges_sym_indefinite(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _sym_indefinite(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        x, _ = minres(ia, ib, rtol=1e-8, maxiter=1000)
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < 1e-3
+
+    @pytest.mark.skipif(not is_scipy_available(), reason="SciPy not available")
+    def test_minres_matches_scipy(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        a = dpnp.asnumpy(_spd_matrix(self.n, dpnp.float64))
+        b = dpnp.asnumpy(_rhs(self.n, dpnp.float64))
+        try:
+            x_ref, _ = scipy_sla.minres(a, b, rtol=1e-8)
+        except TypeError:
+            x_ref, _ = scipy_sla.minres(a, b, tol=1e-8)
+        x_dp, info = minres(dpnp.array(a), dpnp.array(b), rtol=1e-8)
+        assert info == 0
+        assert_allclose(dpnp.asnumpy(x_dp), x_ref, rtol=1e-5, atol=1e-6)
+
+    def test_minres_x0_warm_start(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        x0 = dpnp.zeros(self.n, dtype=dpnp.float64)
+        _, info = minres(ia, ib, x0=x0, rtol=1e-8)
+        assert info == 0
+
+    def test_minres_shift(self):
+        # shift != 0 solves (A - shift*I) x = b
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        a = dpnp.asnumpy(_spd_matrix(self.n, dpnp.float64))
+        b = dpnp.asnumpy(_rhs(self.n, dpnp.float64))
+        shift = 0.5
+        x_dp, info = minres(
+            dpnp.array(a), dpnp.array(b), shift=shift, rtol=1e-8
+        )
+        assert info == 0
+        a_shifted = a - shift * numpy.eye(self.n)
+        res = numpy.linalg.norm(
+            a_shifted @ dpnp.asnumpy(x_dp) - b
+        ) / numpy.linalg.norm(b)
+        assert res < 1e-4
+
+    def test_minres_b_zero(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(10, dpnp.float64)
+        ib = dpnp.zeros(10, dtype=dpnp.float64)
+        x, info = minres(ia, ib, rtol=1e-8)
+        assert info == 0
+        assert_allclose(dpnp.asnumpy(x), numpy.zeros(10), atol=1e-14)
+
+    def test_minres_via_linear_operator(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        lo = aslinearoperator(ia)
+        _, info = minres(lo, ib, rtol=1e-8)
+        assert info == 0
+
+    def test_minres_callback(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(self.n, dpnp.float64)
+        ib = _rhs(self.n, dpnp.float64)
+        calls = []
+        minres(
+            ia,
+            ib,
+            callback=lambda xk: calls.append(1),
+            rtol=1e-10,
+        )
+        assert len(calls) > 0
+
+    def test_minres_errors(self):
+        if not has_support_aspect64():
+            pytest.skip("float64 not supported on this device")
+        lo = aslinearoperator(dpnp.ones((4, 5), dtype=dpnp.float64))
+        ib = dpnp.ones(4, dtype=dpnp.float64)
+        # non-square operator
+        assert_raises((ValueError, Exception), minres, lo, ib)
+
+
+class TestSolversIntegration:
+    @pytest.mark.parametrize(
+        "n, dtype",
+        [
+            (10, dpnp.float32),
+            (10, dpnp.float64),
+            (30, dpnp.float64),
+            (50, dpnp.float64),
+        ],
+    )
+    def test_cg_spd_via_linearoperator(self, n, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(n, dtype)
+        lo = aslinearoperator(ia)
+        ib = _rhs(n, dtype)
+        x, info = cg(lo, ib, rtol=_rtol_for(dtype), maxiter=n * 10)
+        assert info == 0
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    @pytest.mark.parametrize(
+        "n, dtype",
+        [
+            (10, dpnp.float32),
+            (10, dpnp.float64),
+            (30, dpnp.float64),
+        ],
+    )
+    def test_gmres_nonsymmetric_via_linearoperator(self, n, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        ia = _diag_dominant(n, dtype)
+        lo = aslinearoperator(ia)
+        ib = _rhs(n, dtype)
+        x, _ = gmres(lo, ib, rtol=_rtol_for(dtype), restart=n, maxiter=200)
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < _res_bound(dtype)
+
+    @pytest.mark.skipif(
+        not is_scipy_available(), reason="SciPy required for minres"
+    )
+    @pytest.mark.parametrize(
+        "n, dtype",
+        [
+            (10, dpnp.float64),
+            (30, dpnp.float64),
+        ],
+    )
+    def test_minres_spd_via_linearoperator(self, n, dtype):
+        if not has_support_aspect64() and dtype == dpnp.float64:
+            pytest.skip("float64 not supported on this device")
+        ia = _spd_matrix(n, dtype)
+        lo = aslinearoperator(ia)
+        ib = _rhs(n, dtype)
+        x, info = minres(lo, ib, rtol=1e-8)
+        assert info == 0
+        res = float(dpnp.linalg.norm(ia @ x - ib) / dpnp.linalg.norm(ib))
+        assert res < 1e-4
diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/sparse_tests/__init__.py b/dpnp/tests/third_party/cupyx/scipy_tests/sparse_tests/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/dpnp/tests/third_party/cupyx/scipy_tests/sparse_tests/test_linalg.py b/dpnp/tests/third_party/cupyx/scipy_tests/sparse_tests/test_linalg.py
new file mode 100644
index 000000000000..b0939f6899d1
--- /dev/null
+++ b/dpnp/tests/third_party/cupyx/scipy_tests/sparse_tests/test_linalg.py
@@ -0,0 +1,493 @@
+from __future__ import annotations
+
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from dpnp.tests.third_party.cupy import testing
+
+if cupy.tests.helper.is_scipy_available():
+    import scipy.sparse
+    import scipy.sparse.linalg
+
+
+def _spd_matrix(n, dtype, seed=0):
+    rng = numpy.random.RandomState(seed)
+    R = rng.rand(n, n).astype(dtype)
+    if numpy.dtype(dtype).kind == "c":
+        R = R + 1j * rng.rand(n, n).astype(dtype)
+    return R @ R.conj().T + n * numpy.eye(n, dtype=dtype)
+
+
+def _diag_dominant(n, dtype, seed=0):
+    rng = numpy.random.RandomState(seed)
+    A = rng.rand(n, n).astype(dtype)
+    if numpy.dtype(dtype).kind == "c":
+        A = A + 1j * rng.rand(n, n).astype(dtype)
+    return A + n * numpy.eye(n, dtype=dtype)
+
+
+def _sym_indef(n, dtype, seed=0):
+    rng = numpy.random.RandomState(seed)
+    A = rng.rand(n, n).astype(dtype)
+    A = 0.5 * (A + A.T)
+    return A - 0.5 * numpy.eye(n, dtype=dtype)
+
+
+def _rhs(n, dtype, seed=1):
+    rng = numpy.random.RandomState(seed)
+    b = rng.rand(n).astype(dtype)
+    if numpy.dtype(dtype).kind == "c":
+        b = b + 1j * rng.rand(n).astype(dtype)
+    return b
+
+
+class TestLinearOperator(unittest.TestCase):
+
+    @testing.for_dtypes("fdFD")
+    def test_explicit_dtype_preserved(self, dtype):
+        n = 4
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            dtype=dtype,
+        )
+        assert A.dtype == numpy.dtype(dtype)
+
+    @testing.for_dtypes("fdFD")
+    def test_dtype_inferred_from_int8_trial(self, dtype):
+        n = 4
+
+        def mv(v):
+            return v.astype(dtype)
+
+        A = cupy.scipy.sparse.linalg.LinearOperator((n, n), matvec=mv)
+        assert A.dtype == numpy.dtype(dtype)
+
+    def test_matvec_dimension_mismatch_raises(self):
+        n = 4
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            dtype=cupy.float64,
+        )
+        wrong = cupy.zeros(n + 1, dtype=cupy.float64)
+        with pytest.raises(ValueError):
+            A.matvec(wrong)
+
+    def test_matmul_dispatch(self):
+        n = 3
+        diag = cupy.asarray([1.0, 2.0, 3.0])
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: diag * v,
+            dtype=cupy.float64,
+        )
+        x = cupy.asarray([10.0, 20.0, 30.0])
+        testing.assert_allclose(cupy.asnumpy(A @ x), [10.0, 40.0, 90.0])
+        testing.assert_allclose(cupy.asnumpy(A * x), [10.0, 40.0, 90.0])
+
+    def test_adjoint_returns_linear_operator(self):
+        n = 3
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            rmatvec=lambda v: v,
+            dtype=cupy.float64,
+        )
+        AH = A.H
+        assert isinstance(AH, cupy.scipy.sparse.linalg.LinearOperator)
+        assert AH.shape == (n, n)
+
+    def test_array_ufunc_opt_out(self):
+        n = 3
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            dtype=cupy.float64,
+        )
+        assert getattr(A, "__array_ufunc__", "missing") is None
+
+    def test_numpy_scalar_times_linop_dispatches_to_rmul(self):
+        n = 3
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            dtype=cupy.float64,
+        )
+        scaled = numpy.float64(2.0) * A
+        assert isinstance(scaled, cupy.scipy.sparse.linalg.LinearOperator)
+        x = cupy.ones(n, dtype=cupy.float64)
+        testing.assert_allclose(
+            cupy.asnumpy(scaled.matvec(x)), 2.0 * numpy.ones(n)
+        )
+
+    def test_dot_rejects_numpy_array(self):
+        n = 4
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            dtype=cupy.float64,
+        )
+        host_vec = numpy.ones(n, dtype=numpy.float64)
+        with pytest.raises(TypeError, match="numpy.ndarray"):
+            A.dot(host_vec)
+        with pytest.raises(TypeError, match="numpy.ndarray"):
+            A @ host_vec
+        with pytest.raises(TypeError, match="numpy.ndarray"):
+            A * host_vec
+
+    def test_dot_accepts_dpnp_array_after_explicit_transfer(self):
+        n = 4
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: 2 * v,
+            dtype=cupy.float64,
+        )
+        host_vec = numpy.ones(n, dtype=numpy.float64)
+        dev_vec = cupy.asarray(host_vec)
+        result = A.dot(dev_vec)
+        testing.assert_allclose(
+            cupy.asnumpy(result),
+            2.0 * numpy.ones(n),
+        )
+
+    def test_scaled_operator_preserves_float32_dtype(self):
+        n = 3
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            dtype=cupy.float32,
+        )
+        scaled = numpy.float32(2.0) * A
+        assert scaled.dtype == numpy.dtype("float32")
+
+
+class TestAsLinearOperator(unittest.TestCase):
+
+    def test_passthrough_existing_linear_operator(self):
+        n = 3
+        A = cupy.scipy.sparse.linalg.LinearOperator(
+            (n, n),
+            matvec=lambda v: v,
+            dtype=cupy.float64,
+        )
+        out = cupy.scipy.sparse.linalg.aslinearoperator(A)
+        assert out is A
+
+    @testing.for_dtypes("fdFD")
+    def test_wrap_dense_dpnp_array(self, dtype):
+        n = 4
+        A_np = _spd_matrix(n, dtype)
+        A_dp = cupy.asarray(A_np)
+        op = cupy.scipy.sparse.linalg.aslinearoperator(A_dp)
+        x = cupy.asarray(_rhs(n, dtype))
+        y = op.matvec(x)
+        y_ref = A_np @ cupy.asnumpy(x)
+        testing.assert_allclose(cupy.asnumpy(y), y_ref, rtol=1e-5, atol=1e-6)
+
+    def test_reject_numpy_ndarray(self):
+        A_np = numpy.eye(3, dtype=numpy.float64)
+        with pytest.raises(TypeError, match="numpy"):
+            cupy.scipy.sparse.linalg.aslinearoperator(A_np)
+
+    @testing.for_dtypes("fd")
+    def test_wrap_csr_matrix(self, dtype):
+        n = 5
+        A_np = _spd_matrix(n, dtype)
+        A_dp = cupy.scipy.sparse.csr_matrix(cupy.asarray(A_np))
+        op = cupy.scipy.sparse.linalg.aslinearoperator(A_dp)
+        x = cupy.asarray(_rhs(n, dtype))
+        y = op.matvec(x)
+        y_ref = A_np @ cupy.asnumpy(x)
+        testing.assert_allclose(cupy.asnumpy(y), y_ref, rtol=1e-5, atol=1e-6)
+
+
+@testing.with_requires("scipy")
+class TestCG(unittest.TestCase):
+
+    @testing.for_dtypes("fd")
+    def test_cg_converges_dense_spd(self, dtype):
+        n = 8
+        A = _spd_matrix(n, dtype)
+        b = _rhs(n, dtype)
+
+        x_ref, info_ref = scipy.sparse.linalg.cg(A, b, rtol=1e-8, atol=0.0)
+        assert info_ref == 0
+
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        x_dp, info_dp = cupy.scipy.sparse.linalg.cg(
+            A_dp,
+            b_dp,
+            rtol=1e-8,
+            atol=0.0,
+        )
+        assert info_dp == 0
+        testing.assert_allclose(
+            cupy.asnumpy(x_dp),
+            x_ref,
+            rtol=1e-4,
+            atol=1e-5,
+        )
+
+    @testing.for_dtypes("fd")
+    def test_cg_warm_start(self, dtype):
+        n = 8
+        A = _spd_matrix(n, dtype)
+        b = _rhs(n, dtype)
+
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        x0_dp, _ = cupy.scipy.sparse.linalg.cg(
+            A_dp,
+            b_dp,
+            rtol=1e-3,
+            atol=0.0,
+        )
+        x_dp, info_dp = cupy.scipy.sparse.linalg.cg(
+            A_dp,
+            b_dp,
+            x0=x0_dp,
+            rtol=1e-8,
+            atol=0.0,
+        )
+        assert info_dp == 0
+        x_ref, _ = scipy.sparse.linalg.cg(A, b, rtol=1e-8, atol=0.0)
+        testing.assert_allclose(
+            cupy.asnumpy(x_dp),
+            x_ref,
+            rtol=1e-4,
+            atol=1e-5,
+        )
+
+    def test_cg_info_contract_unconverged_is_positive(self):
+        n = 32
+        A = _spd_matrix(n, numpy.float64)
+        b = _rhs(n, numpy.float64)
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        _, info = cupy.scipy.sparse.linalg.cg(
+            A_dp,
+            b_dp,
+            maxiter=1,
+            rtol=1e-12,
+            atol=0.0,
+        )
+        assert info > 0
+
+    def test_cg_zero_rhs_returns_zero(self):
+        n = 4
+        A_dp = cupy.asarray(_spd_matrix(n, numpy.float64))
+        b_dp = cupy.zeros(n, dtype=cupy.float64)
+        x, info = cupy.scipy.sparse.linalg.cg(A_dp, b_dp)
+        assert info == 0
+        testing.assert_allclose(cupy.asnumpy(x), numpy.zeros(n))
+
+    def test_cg_inf_breakdown_returns_positive_info(self):
+        n = 8
+        # Rank-deficient: row 0 is zero, A is PSD but not PD.
+        A = numpy.eye(n, dtype=numpy.float64)
+        A[0, 0] = 0.0
+        b = numpy.ones(n, dtype=numpy.float64)
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        _, info = cupy.scipy.sparse.linalg.cg(
+            A_dp,
+            b_dp,
+            maxiter=20,
+            rtol=1e-12,
+            atol=0.0,
+        )
+        assert info > 0
+
+
+@testing.with_requires("scipy")
+class TestGMRES(unittest.TestCase):
+
+    @testing.for_dtypes("fd")
+    def test_gmres_converges_diag_dominant(self, dtype):
+        n = 10
+        A = _diag_dominant(n, dtype)
+        b = _rhs(n, dtype)
+
+        # float32 cannot reliably reach 1e-8 in 10 Arnoldi steps;
+        # the noise floor of classical Gram-Schmidt is O(eps*sqrt(n)).
+        rtol = 1e-5 if numpy.dtype(dtype) == numpy.float32 else 1e-8
+
+        x_ref, info_ref = scipy.sparse.linalg.gmres(
+            A,
+            b,
+            rtol=rtol,
+            atol=0.0,
+        )
+        assert info_ref == 0
+
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        x_dp, info_dp = cupy.scipy.sparse.linalg.gmres(
+            A_dp,
+            b_dp,
+            rtol=rtol,
+            atol=0.0,
+        )
+        assert info_dp == 0
+        cmp_rtol = 5e-4 if numpy.dtype(dtype) == numpy.float32 else 1e-4
+        cmp_atol = 5e-5 if numpy.dtype(dtype) == numpy.float32 else 1e-5
+        testing.assert_allclose(
+            cupy.asnumpy(x_dp),
+            x_ref,
+            rtol=cmp_rtol,
+            atol=cmp_atol,
+        )
+
+    def test_gmres_restart_parameter(self):
+        n = 20
+        A = _diag_dominant(n, numpy.float64)
+        b = _rhs(n, numpy.float64)
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        x_dp, info_dp = cupy.scipy.sparse.linalg.gmres(
+            A_dp,
+            b_dp,
+            restart=5,
+            rtol=1e-8,
+            atol=0.0,
+        )
+        assert info_dp == 0
+        testing.assert_allclose(
+            cupy.asnumpy(A_dp @ x_dp),
+            cupy.asnumpy(b_dp),
+            rtol=1e-4,
+            atol=1e-5,
+        )
+
+    def test_gmres_info_contract_unconverged_is_positive(self):
+        n = 32
+        A = _diag_dominant(n, numpy.float64)
+        b = _rhs(n, numpy.float64)
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        _, info = cupy.scipy.sparse.linalg.gmres(
+            A_dp,
+            b_dp,
+            restart=2,
+            maxiter=1,
+            rtol=1e-12,
+            atol=0.0,
+        )
+        assert info > 0
+
+    @testing.for_dtypes("FD")
+    def test_gmres_complex_arnoldi_fast_path(self, dtype):
+        n = 12
+        A = _diag_dominant(n, dtype)
+        b = _rhs(n, dtype)
+
+        rtol = 1e-5 if numpy.dtype(dtype) == numpy.complex64 else 1e-7
+
+        x_ref, info_ref = scipy.sparse.linalg.gmres(
+            A,
+            b,
+            rtol=rtol,
+            atol=0.0,
+        )
+        assert info_ref == 0
+
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        x_dp, info_dp = cupy.scipy.sparse.linalg.gmres(
+            A_dp,
+            b_dp,
+            rtol=rtol,
+            atol=0.0,
+        )
+        assert info_dp == 0
+        cmp_rtol = 5e-4 if numpy.dtype(dtype) == numpy.complex64 else 1e-4
+        cmp_atol = 5e-5 if numpy.dtype(dtype) == numpy.complex64 else 1e-5
+        testing.assert_allclose(
+            cupy.asnumpy(x_dp),
+            x_ref,
+            rtol=cmp_rtol,
+            atol=cmp_atol,
+        )
+
+
+@testing.with_requires("scipy")
+class TestMINRES(unittest.TestCase):
+
+    def test_minres_converges_symmetric_indefinite(self):
+        n = 12
+        A = _sym_indef(n, numpy.float64)
+        b = _rhs(n, numpy.float64)
+        x_ref, info_ref = scipy.sparse.linalg.minres(A, b, rtol=1e-8)
+        assert info_ref == 0
+
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        x_dp, info_dp = cupy.scipy.sparse.linalg.minres(
+            A_dp,
+            b_dp,
+            rtol=1e-8,
+        )
+        assert info_dp == 0
+        testing.assert_allclose(
+            cupy.asnumpy(x_dp),
+            x_ref,
+            rtol=1e-4,
+            atol=1e-5,
+        )
+
+    def test_minres_shift_parameter(self):
+        n = 10
+        A = _sym_indef(n, numpy.float64)
+        b = _rhs(n, numpy.float64)
+        shift = 0.25
+        x_ref, _ = scipy.sparse.linalg.minres(
+            A,
+            b,
+            shift=shift,
+            rtol=1e-8,
+        )
+        A_dp = cupy.asarray(A)
+        b_dp = cupy.asarray(b)
+        x_dp, _ = cupy.scipy.sparse.linalg.minres(
+            A_dp,
+            b_dp,
+            shift=shift,
+            rtol=1e-8,
+        )
+        testing.assert_allclose(
+            cupy.asnumpy(x_dp),
+            x_ref,
+            rtol=1e-4,
+            atol=1e-5,
+        )
+
+    def test_minres_zero_rhs_returns_zero(self):
+        n = 4
+        A_dp = cupy.asarray(_sym_indef(n, numpy.float64))
+        b_dp = cupy.zeros(n, dtype=cupy.float64)
+        x, info = cupy.scipy.sparse.linalg.minres(A_dp, b_dp)
+        assert info == 0
+        testing.assert_allclose(cupy.asnumpy(x), numpy.zeros(n))
+
+
+class TestModuleSurface(unittest.TestCase):
+
+    def test_public_symbols_match_pr_contract(self):
+        from dpnp.scipy.sparse.linalg import (
+            LinearOperator,
+            aslinearoperator,
+            cg,
+            gmres,
+            minres,
+        )
+
+        assert callable(LinearOperator)
+        assert callable(aslinearoperator)
+        assert callable(cg)
+        assert callable(gmres)
+        assert callable(minres)
diff --git a/setup.py b/setup.py
index 1193b61ac2a8..11b7d5aa0d7d 100644
--- a/setup.py
+++ b/setup.py
@@ -44,6 +44,8 @@
         "dpnp.random",
         "dpnp.scipy",
         "dpnp.scipy.linalg",
+        "dpnp.scipy.sparse",
+        "dpnp.scipy.sparse.linalg",
         "dpnp.scipy.special",
     ],
     package_data={