diff --git a/pylops/optimization/cls_basic.py b/pylops/optimization/cls_basic.py
index 4f47565f..0996a9f8 100644
--- a/pylops/optimization/cls_basic.py
+++ b/pylops/optimization/cls_basic.py
@@ -1248,8 +1248,13 @@ def step(self, x: NDArray, show: bool = False) -> NDArray:
             self.ncp.add(self.v, self.w, out=self.w)
         self.ddnorm = self.ddnorm + self.ncp.linalg.norm(self.dk) ** 2.0
         if self.calc_var:
+            # ``var`` estimates the diagonal of (A^H A)^-1, so it must accumulate
+            # the *element-wise* square of ``dk`` (one variance per unknown), as in
+            # Paige & Saunders (1982) eq. on p.53 and scipy's lsqr. Using
+            # ``dot(dk, dk)`` instead adds the same scalar (sum of squares) to every
+            # entry, yielding identical, wrong variances (issue #639).
             self.var = self.var + to_numpy_conditional(
-                self.var, self.ncp.dot(self.dk, self.dk)
+                self.var, self.ncp.square(self.dk)
             )
 
         # use a plane rotation on the right to eliminate the
diff --git a/pytests/test_solver.py b/pytests/test_solver.py
index 13a2ca0c..1e73cd2b 100644
--- a/pytests/test_solver.py
+++ b/pytests/test_solver.py
@@ -377,6 +377,33 @@ def test_lsqr_pylops_scipy(par):
         assert_array_almost_equal(xinv_sp, x, decimal=4)
 
 
+@pytest.mark.parametrize("par", [(par3), (par4)])
+def test_lsqr_calc_var(par):
+    """LSQR ``var`` estimates the diagonal of (A^H A)^-1 element-wise (issue #639).
+
+    Each unknown must get its own variance; a regression here would make all
+    entries identical (the previous ``dot(dk, dk)`` bug). Compare against scipy's
+    LSQR and the dense ``diag(inv(A^H A))`` on a full-column-rank system.
+    """
+    np.random.seed(10)
+
+    A = np.random.normal(0, 1, (par["ny"], par["nx"]))
+    Aop = MatrixMult(A, dtype=par["dtype"])
+    y = Aop * (np.ones(par["nx"]))
+
+    niter = 10 * par["nx"]
+    var = lsqr(Aop, y, x0=None, niter=niter, atol=1e-8, btol=1e-8)[9]
+    var_sp = sp_lsqr(
+        Aop, y, iter_lim=niter, atol=1e-8, btol=1e-8, calc_var=True
+    )[9]
+    var_dense = np.diag(np.linalg.inv(np.conj(A.T) @ A))
+
+    # not all identical (the bug produced a constant vector)
+    assert not np.allclose(var, var[0])
+    assert_array_almost_equal(var, var_sp, decimal=6)
+    assert_array_almost_equal(var, var_dense, decimal=6)
+
+
 @pytest.mark.parametrize(
     "par", [(par1), (par2), (par3), (par4), (par1j), (par2j), (par3j), (par3j)]
 )