IntelPython · abagusetty · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
@@ -17,6 +17,8 @@ This release is compatible with NumPy 2.5.
 * Added `dpnp-config.cmake` to make `find_package(Dpnp)` work out of the box, and an example which uses it [#2941](https://github.com/IntelPython/dpnp/pull/2941)
 * Added implementation of `dpnp.lib.stride_tricks.as_strided` [#2991](https://github.com/IntelPython/dpnp/pull/2991)
 * Added `dpnp.tensor.broadcast_shapes` to align with the 2025.12 version of the Python array API [#3009](https://github.com/IntelPython/dpnp/pull/3009)
+* Added implementation of `dpnp.scipy.sparse.linalg import LinearOperator, cg, gmres, minres` [#2841](https://github.com/IntelPython/dpnp/pull/2841)
+
 
 ### Changed
 

@@ -232,12 +232,18 @@ if(_use_onemath)
         set(ENABLE_CUBLAS_BACKEND True)
         set(ENABLE_CUSOLVER_BACKEND True)
         set(ENABLE_CUFFT_BACKEND True)
+        set(ENABLE_CUSPARSE_BACKEND True)
         # set(ENABLE_CURAND_BACKEND True)
     endif()
     if(_use_onemath_hip)
         set(ENABLE_ROCBLAS_BACKEND True)
         set(ENABLE_ROCSOLVER_BACKEND True)
         set(ENABLE_ROCFFT_BACKEND True)
+        # rocSPARSE is not enabled: it forbids reusing a sparse matrix
+        # handle across spmv calls (oneMath #647 / ROCm/rocSPARSE #332),
+        # which the cached-handle SpMV design in extensions/sparse relies
+        # on. Enable only once oneMath supports rocSPARSE handle reuse.
+        # set(ENABLE_ROCSPARSE_BACKEND True)
         # set(ENABLE_ROCRAND_BACKEND True)
     endif()
 

@@ -51,6 +51,7 @@ requirements:
       - {{ pin_compatible('onemkl-sycl-lapack', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('onemkl-sycl-rng', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('onemkl-sycl-vm', min_pin='x.x', max_pin='x') }}
+      - {{ pin_compatible('onemkl-sycl-sparse', min_pin='x.x', max_pin='x') }}
       - numpy
       - intel-gpu-ocl-icd-system
 

@@ -9,4 +9,5 @@ These functions cover a subset of
    :maxdepth: 2
 
    scipy_linalg
+   scipy_sparse
    scipy_special
@@ -0,0 +1,53 @@
+.. currentmodule:: dpnp.scipy.sparse
+
+Sparse arrays (:mod:`dpnp.scipy.sparse`)
+========================================
+
+.. hint:: `SciPy API Reference: Sparse arrays (scipy.sparse) <https://docs.scipy.org/doc/scipy/reference/sparse.html>`_
+
+Sparse matrix classes
+---------------------
+
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   csr_matrix
+
+Identifying sparse matrices
+---------------------------
+
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   issparse
+
+
+Sparse linear algebra (:mod:`dpnp.scipy.sparse.linalg`)
+=======================================================
+
+.. currentmodule:: dpnp.scipy.sparse.linalg
+
+.. hint:: `SciPy API Reference: Sparse linear algebra (scipy.sparse.linalg) <https://docs.scipy.org/doc/scipy/reference/sparse.linalg.html>`_
+
+Abstract linear operators
+-------------------------
+
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   LinearOperator
+   aslinearoperator
+
+Iterative solvers for linear systems
+------------------------------------
+
+.. autosummary::
+   :toctree: generated/
+   :nosignatures:
+
+   cg
+   gmres
+   minres
@@ -196,6 +196,7 @@ add_subdirectory(backend/extensions/statistics)
 add_subdirectory(backend/extensions/ufunc)
 add_subdirectory(backend/extensions/vm)
 add_subdirectory(backend/extensions/window)
+add_subdirectory(backend/extensions/sparse)
 
 add_subdirectory(dpnp_algo)
 add_subdirectory(dpnp_utils)

@@ -105,10 +105,10 @@ if(DPNP_GENERATE_COVERAGE)
     )
 endif()
 
-if(_ues_onemath)
+if(_use_onemath)
     target_link_libraries(${python_module_name} PRIVATE ${ONEMATH_LIB})
     target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH)
-    if(_ues_onemath_cuda)
+    if(_use_onemath_cuda)
         target_compile_options(${python_module_name} PRIVATE -DUSE_ONEMATH_CUBLAS)
     endif()
 else()

@@ -145,9 +145,12 @@ PYBIND11_MODULE(_blas_impl, m)
     {
         m.def("_gemv", &blas_ns::gemv,
               "Call `gemv` from oneMKL BLAS library to compute "
-              "the matrix-vector product with a general matrix.",
+              "y = alpha * op(A) * x + beta * y. trans_op picks op(A): "
+              "0=N, 1=T, 2=C (C is F-contiguous only). alpha/beta "
+              "default to 1/0.",
               py::arg("sycl_queue"), py::arg("matrixA"), py::arg("vectorX"),
-              py::arg("vectorY"), py::arg("transpose"),
+              py::arg("vectorY"), py::arg("trans_op") = 0,
+              py::arg("alpha") = 1.0, py::arg("beta") = 0.0,
               py::arg("depends") = py::list());
     }
 

@@ -53,10 +53,12 @@ typedef sycl::event (*gemv_impl_fn_ptr_t)(sycl::queue &,
                                           oneapi::mkl::transpose,
                                           const std::int64_t,
                                           const std::int64_t,
+                                          const double, // alpha
                                           const char *,
                                           const std::int64_t,
                                           const char *,
                                           const std::int64_t,
+                                          const double, // beta
                                           char *,
                                           const std::int64_t,
                                           const bool,
@@ -69,10 +71,12 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
                              oneapi::mkl::transpose transA,
                              const std::int64_t m,
                              const std::int64_t n,
+                             const double alpha_d,
                              const char *matrixA,
                              const std::int64_t lda,
                              const char *vectorX,
                              const std::int64_t incx,
+                             const double beta_d,
                              char *vectorY,
                              const std::int64_t incy,
                              const bool is_row_major,
@@ -84,6 +88,9 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
     const T *x = reinterpret_cast<const T *>(vectorX);
     T *y = reinterpret_cast<T *>(vectorY);
 
+    const T alpha = static_cast<T>(alpha_d);
+    const T beta = static_cast<T>(beta_d);
+
     std::stringstream error_msg;
     bool is_exception_caught = false;
 
@@ -112,13 +119,13 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
                     // or 'C' for a conjugate transpose.
             m,      // Number of rows in matrix A.
             n,      // Number of columns in matrix A.
-            T(1),   // Scaling factor for the matrix-vector product.
+            alpha,  // Scaling factor for the matrix-vector product.
             a,      // Pointer to the input matrix A.
             lda,    // Leading dimension of matrix A, which is the
                     // stride between successive rows (for row major layout).
             x,      // Pointer to the input vector x.
             incx,   // The stride of vector x.
-            T(0),   // Scaling factor for vector y.
+            beta,   // Scaling factor for vector y.
             y,      // Pointer to output vector y, where the result is stored.
             incy,   // The stride of vector y.
             depends);
@@ -141,14 +148,28 @@ static sycl::event gemv_impl(sycl::queue &exec_q,
     return gemv_event;
 }
 
+// Computes y = alpha * op(A) * x + beta * y.
+//
+// ``trans_op`` is a tri-state matching oneapi::mkl::transpose:
+//      0 = N (no transpose),
+//      1 = T (plain transpose),
+//      2 = C (conjugate-transpose, complex only; F-contiguous input).
 std::pair<sycl::event, sycl::event>
     gemv(sycl::queue &exec_q,
          const dpnp::tensor::usm_ndarray &matrixA,
          const dpnp::tensor::usm_ndarray &vectorX,
          const dpnp::tensor::usm_ndarray &vectorY,
-         const bool transpose,
+         const int trans_op,
+         const double alpha,
+         const double beta,
          const std::vector<sycl::event> &depends)
 {
+    if (trans_op < 0 || trans_op > 2) {
+        throw py::value_error("gemv: trans_op must be 0 (N), 1 (T), or 2 (C).");
+    }
+    const bool is_transposed = (trans_op != 0);
+    const bool is_conj_trans = (trans_op == 2);
+
     const int matrixA_nd = matrixA.get_ndim();
     const int vectorX_nd = vectorX.get_ndim();
     const int vectorY_nd = vectorY.get_ndim();
@@ -182,10 +203,21 @@ std::pair<sycl::event, sycl::event>
             "Input matrix is not c-contiguous nor f-contiguous.");
     }
 
+    // Conjugate-transpose is only wired up for column-major (F-contig)
+    // matrices. The row-major remap (treating a C-contig matrix as its
+    // column-major transpose) does not extend cleanly to the C op
+    // because (A^T)^H == conj(A), which oneMKL does not expose as a
+    // gemv mode. Callers needing C-mode on row-major input must
+    // F-contigify first (e.g. via dpnp.asarray(A, order="F")).
+    if (is_conj_trans && !is_matrixA_f_contig) {
+        throw py::value_error("Input matrix is not f-contiguous, but "
+                              "trans_op = 2 (conjugate-transpose) requested.");
+    }
+
     const py::ssize_t *a_shape = matrixA.get_shape_raw();
     const py::ssize_t *x_shape = vectorX.get_shape_raw();
     const py::ssize_t *y_shape = vectorY.get_shape_raw();
-    if (transpose) {
+    if (is_transposed) {
         if (a_shape[0] != x_shape[0]) {
             throw py::value_error("The number of rows in A must be equal to "
                                   "the number of elements in X.");
@@ -209,6 +241,9 @@ std::pair<sycl::event, sycl::event>
     oneapi::mkl::transpose transA;
     std::size_t src_nelems;
 
+    // Resolve the storage layout into the oneMKL transpose mode.
+    // Conjugate-transpose is constrained to F-contig above; the
+    // row-major branch therefore only sees N/T here.
 // cuBLAS supports only column-major storage
 #if defined(USE_ONEMATH_CUBLAS)
     constexpr bool is_row_major = false;
@@ -218,7 +253,11 @@ std::pair<sycl::event, sycl::event>
     if (is_matrixA_f_contig) {
         m = a_shape[0];
         n = a_shape[1];
-        if (transpose) {
+        if (is_conj_trans) {
+            transA = oneapi::mkl::transpose::C;
+            src_nelems = n;
+        }
+        else if (is_transposed) {
             transA = oneapi::mkl::transpose::T;
             src_nelems = n;
         }
@@ -228,9 +267,11 @@ std::pair<sycl::event, sycl::event>
         }
     }
     else {
+        // Row-major-as-column-major swap. is_conj_trans is rejected
+        // above, so only N/T need handling.
         m = a_shape[1];
         n = a_shape[0];
-        if (transpose) {
+        if (is_transposed) {
             transA = oneapi::mkl::transpose::N;
             src_nelems = m;
         }
@@ -248,7 +289,11 @@ std::pair<sycl::event, sycl::event>
     const std::int64_t m = a_shape[0];
     const std::int64_t n = a_shape[1];
 
-    if (transpose) {
+    if (is_conj_trans) {
+        transA = oneapi::mkl::transpose::C;
+        src_nelems = n;
+    }
+    else if (is_transposed) {
         transA = oneapi::mkl::transpose::T;
         src_nelems = n;
     }
@@ -299,9 +344,9 @@ std::pair<sycl::event, sycl::event>
         y_typeless_ptr -= (y_shape[0] - 1) * std::abs(incy) * y_elemsize;
     }
 
-    sycl::event gemv_ev =
-        gemv_fn(exec_q, transA, m, n, a_typeless_ptr, lda, x_typeless_ptr, incx,
-                y_typeless_ptr, incy, is_row_major, depends);
+    sycl::event gemv_ev = gemv_fn(exec_q, transA, m, n, alpha, a_typeless_ptr,
+                                  lda, x_typeless_ptr, incx, beta,
+                                  y_typeless_ptr, incy, is_row_major, depends);
 
     sycl::event args_ev = dpnp::utils::keep_args_alive(
         exec_q, {matrixA, vectorX, vectorY}, {gemv_ev});

@@ -35,12 +35,30 @@
 
 namespace dpnp::extensions::blas
 {
+// y = alpha * op(A) * x + beta * y. alpha/beta are real-valued (double)
+// and are cast to the matrix value type in the impl.
+//
+// ``trans_op`` selects the operation applied to A:
+//      0 = N  (no transpose)         y = alpha * A   @ x + beta * y
+//      1 = T  (transpose)            y = alpha * A^T @ x + beta * y
+//      2 = C  (conjugate-transpose)  y = alpha * A^H @ x + beta * y
+//
+// For real-valued A, T and C are equivalent. For complex A they
+// differ, and C is required for any algorithm that performs a
+// Hermitian inner product through gemv -- the GMRES Arnoldi step
+// (Gram-Schmidt over a complex Krylov basis) being the canonical
+// example. ``trans_op = 2`` is currently only supported for
+// F-contiguous (column-major) matrices; the row-major code path
+// for conjugate-transpose would require an explicit element-wise
+// conjugate pass and is not wired up here.
 extern std::pair<sycl::event, sycl::event>
     gemv(sycl::queue &exec_q,
          const dpnp::tensor::usm_ndarray &matrixA,
          const dpnp::tensor::usm_ndarray &vectorX,
          const dpnp::tensor::usm_ndarray &vectorY,
-         const bool transpose,
+         const int trans_op,
+         const double alpha,
+         const double beta,
          const std::vector<sycl::event> &depends);
 
 extern void init_gemv_dispatch_vector(void);