diff --git a/.github/workflows/ci_linux/deps_apt.sh b/.github/workflows/ci_linux/deps_apt.sh index fdc19d6b53..b4fbc23a73 100755 --- a/.github/workflows/ci_linux/deps_apt.sh +++ b/.github/workflows/ci_linux/deps_apt.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash sudo apt-get -qq install \ gcc \ + g++ \ + gfortran \ + liblapack-dev \ libblas-dev \ cmake \ curl diff --git a/.github/workflows/ci_linux/python_deps.sh b/.github/workflows/ci_linux/python_deps.sh index 1f5e400f2d..bc4dd8b246 100755 --- a/.github/workflows/ci_linux/python_deps.sh +++ b/.github/workflows/ci_linux/python_deps.sh @@ -1,17 +1,16 @@ #!/usr/bin/env bash python -m pip install --upgrade pip pip install "numpy!=1.16,!=1.17" "scipy!=1.5" h5py pytest pytest-cov pytest-timer -pip install pyberny +pip install git+https://github.com/jhrmnn/pyberny.git@36a4be9 pip install --no-deps pyscf-dispersion==1.3.0 +pip install geometric version=$(python -c 'import sys; version=sys.version_info[:2]; print("{0}.{1}".format(*version))') -if [ $version != '3.12' ]; then - pip install geometric - pip install spglib -fi -if [ $version != '3.8' ]; then +if [ $version == '3.12' ]; then + pip install spglib pip install pytblis + pip install git+https://github.com/sunqm/zquatev fi #cppe diff --git a/.github/workflows/run_tests.sh b/.github/workflows/run_tests.sh index 886e8c78c8..b4a6396778 100755 --- a/.github/workflows/run_tests.sh +++ b/.github/workflows/run_tests.sh @@ -13,6 +13,7 @@ version=$(python -c 'import sys; print("{0}.{1}".format(*sys.version_info[:2]))' # pytest-cov on Python 3.12 consumes huge memory if [ "$RUNNER_OS" == "Linux" ] && [ $version != "3.12" ]; then pytest pyscf/ -s -c pytest.ini \ + --durations=20 \ --cov-report xml --cov-report term --cov-config .coveragerc --cov pyscf else pytest pyscf/ -s -c pytest.ini pyscf diff --git a/CHANGELOG b/CHANGELOG index ddd2c09500..03bcbd0f87 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,12 @@ +PySCF 2.13.1 (2026-06-01) +------------------------- +* Fixes + - Missing CP2K basis set data in wheel distributions + - Small-rotor error. + - Corrected SG1 grid radii handling for ghost atoms. + - Fixed ECP loading to correctly fall back to Basis Set Exchange when local data is unavailable. + + PySCF 2.13.0 (2026-04-20) ------------------------- * Added diff --git a/MANIFEST.in b/MANIFEST.in index 469d803cf3..e62e98ac34 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,12 +11,16 @@ include pyscf/lib/deps/lib*/libcint.[4-9].dylib include pyscf/lib/deps/lib*/libxc.*.dylib include pyscf/lib/deps/lib*/libxcfun.[2-9].dylib +# windows dynamic libraries +include pyscf/lib/*.dll +include pyscf/lib/deps/bin/*.dll + include pyscf/geomopt/log.ini include pyscf/gto/basis/bse_meta.json # CP2K basis set -include pyscf/lib/pbc/gto/basis/*BASIS* -include pyscf/lib/pbc/gto/pseudo/*POTENTIAL* +include pyscf/pbc/gto/basis/*BASIS* +include pyscf/pbc/gto/pseudo/*POTENTIAL* # source code recursive-include pyscf/lib *.c *.h CMakeLists.txt diff --git a/README.md b/README.md index 82544d08cb..9632434deb 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ Python-based Simulations of Chemistry Framework [![Build Status](https://github.com/pyscf/pyscf/workflows/CI/badge.svg)](https://github.com/pyscf/pyscf/actions?query=workflow%3ACI) [![codecov](https://codecov.io/gh/pyscf/pyscf/branch/master/graph/badge.svg)](https://codecov.io/gh/pyscf/pyscf) -2026-04-20 +2026-06-01 -* [Stable release 2.13.0](https://github.com/pyscf/pyscf/releases/tag/v2.13.0) +* [Stable release 2.13.1](https://github.com/pyscf/pyscf/releases/tag/v2.13.1) * [Changelog](../master/CHANGELOG) * [Documentation](http://www.pyscf.org) * [Installation](#installation) diff --git a/examples/2-benchmark/benchmarking_utils.py b/examples/2-benchmark/benchmarking_utils.py index 78e3c37eaf..ddefe615a3 100644 --- a/examples/2-benchmark/benchmarking_utils.py +++ b/examples/2-benchmark/benchmarking_utils.py @@ -4,13 +4,24 @@ def setup_logger(): log = pyscf.lib.logger.Logger(verbose=5) - with open('/proc/cpuinfo') as f: - for line in f: - if 'model name' in line: - log.note(line[:-1]) - break - with open('/proc/meminfo') as f: - log.note(f.readline()[:-1]) + try: + with open('/proc/cpuinfo') as f: + for line in f: + if 'model name' in line: + log.note(line[:-1]) + break + except FileNotFoundError: + pass + try: + with open('/proc/meminfo') as f: + log.note(f.readline()[:-1]) + except FileNotFoundError: + try: + import psutil + mem = psutil.virtual_memory() + log.note(f'MemTotal: {mem.total // 1024} kB') + except ImportError: + pass log.note('OMP_NUM_THREADS=%s\n', os.environ.get('OMP_NUM_THREADS', None)) return log diff --git a/examples/ao2mo/01-outcore.py b/examples/ao2mo/01-outcore.py index a175441e84..70fd796d31 100644 --- a/examples/ao2mo/01-outcore.py +++ b/examples/ao2mo/01-outcore.py @@ -3,9 +3,9 @@ # Author: Qiming Sun # -import tempfile import h5py from pyscf import gto, scf, ao2mo +from pyscf import lib ''' Save the transformed integrals in the given file in HDF5 format @@ -22,7 +22,7 @@ myhf.kernel() orb = myhf.mo_coeff -ftmp = tempfile.NamedTemporaryFile() +ftmp = lib.NamedTemporaryFile() print('MO integrals are saved in file %s under dataset "eri_mo"' % ftmp.name) ao2mo.kernel(mol, orb, ftmp.name) diff --git a/examples/ao2mo/10-diff_orbs_for_ijkl.py b/examples/ao2mo/10-diff_orbs_for_ijkl.py index e2ac7240ca..6af4a3fb7a 100644 --- a/examples/ao2mo/10-diff_orbs_for_ijkl.py +++ b/examples/ao2mo/10-diff_orbs_for_ijkl.py @@ -3,10 +3,10 @@ # Author: Qiming Sun # -import tempfile import numpy import h5py from pyscf import gto, scf, ao2mo +from pyscf import lib ''' Integral transformation for four different orbitals @@ -39,7 +39,7 @@ # # Given four MOs, compute the MO-integrals and saved in dataset "mp2_bz" # -eritmp = tempfile.NamedTemporaryFile() +eritmp = lib.NamedTemporaryFile() nocc = mol.nelectron // 2 nvir = len(mf.mo_energy) - nocc co = mf.mo_coeff[:,:nocc] diff --git a/examples/ao2mo/11-ump2.py b/examples/ao2mo/11-ump2.py index cbb66405d9..b1642b2156 100644 --- a/examples/ao2mo/11-ump2.py +++ b/examples/ao2mo/11-ump2.py @@ -3,7 +3,6 @@ # Author: Qiming Sun # -import tempfile import numpy import h5py from pyscf import gto, scf, ao2mo diff --git a/examples/ao2mo/20-eri_grad_hess.py b/examples/ao2mo/20-eri_grad_hess.py index aefd32f851..22e739280c 100644 --- a/examples/ao2mo/20-eri_grad_hess.py +++ b/examples/ao2mo/20-eri_grad_hess.py @@ -3,10 +3,10 @@ # Author: Qiming Sun # -import tempfile import numpy import h5py from pyscf import gto, scf, ao2mo +from pyscf import lib ''' Integral transformation for irregular operators @@ -28,7 +28,7 @@ # # Given four MOs, compute the MO-integral gradients # -gradtmp = tempfile.NamedTemporaryFile() +gradtmp = lib.NamedTemporaryFile() nocc = mol.nelectron // 2 nvir = len(mf.mo_energy) - nocc co = mf.mo_coeff[:,:nocc] @@ -56,7 +56,7 @@ # 9 d/dZ d/dZ # orb = mf.mo_coeff -hesstmp = tempfile.NamedTemporaryFile() +hesstmp = lib.NamedTemporaryFile() ao2mo.kernel(mol, orb, hesstmp.name, intor='cint2e_ipvip1_sph', dataname='hessints1', aosym='s4') with ao2mo.load(hesstmp, 'hessints1') as eri: diff --git a/examples/ao2mo/22-rkb_no_pair_ints.py b/examples/ao2mo/22-rkb_no_pair_ints.py index e0c0ee2216..319c676eb1 100644 --- a/examples/ao2mo/22-rkb_no_pair_ints.py +++ b/examples/ao2mo/22-rkb_no_pair_ints.py @@ -10,7 +10,6 @@ from pyscf import scf from pyscf import lib from pyscf.ao2mo import r_outcore -import tempfile import os mol = gto.M( @@ -53,7 +52,7 @@ def no_pair_ovov(mol, mo_coeff, erifile): def run_and_add(mol, mos, erifile, dataname_main, intor): # Use a temporary file for the intermediate integrals - with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as tmpfile: + with lib.NamedTemporaryFile(suffix=".h5", delete=False) as tmpfile: tmp_erifile = tmpfile.name try: diff --git a/examples/cc/03-gccsd.py b/examples/cc/03-gccsd.py new file mode 100644 index 0000000000..4f5f798e06 --- /dev/null +++ b/examples/cc/03-gccsd.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +''' +GCCSD: CCSD based on the GHF reference. + +The cluster amplitudes of GCCSD are represented in the spin-orbital basis and +are solved without assuming spin symmetry. GCCSD can be applied to spin-orbit +coupled systems. + +For non-relativistic calculations, GCCSD is typically equivalent to the +corresponding UHF-CCSD calculation. When spin-orbit coupling (SOC) is included, +for example, the X2C Hamiltonian (see examples/x2c/03-x2c_ghf.py) or SOC-ECP +(see examples/scf/44-soc_ecp.py), the GHF orbitals become complex-valued, and +the resulting GCCSD amplitude are also complex-valued. +''' + +import pyscf + +mol = pyscf.M(atom=''' +O 0. 0. 0. +H 0. -0.757 0.587 +H 0. 0.757 0.587''', +basis='cc-pvdz') +# +# Non-relativistic calculation. The CCSD object returned by mf.CCSD() is an +# instance of the GCCSD class. The cluster amplitudes are represented in the +# spin-orbital basis. +# +mf = mol.GHF().run() +mycc = mf.CCSD().run() + +# +# Enable SOC via the X2C Hamiltonian. GCCSD amplitudes are complex-valued. +# +mf = mol.GHF().x2c().run() +mycc = mf.CCSD().run() + +# +# For calculations using ECPs, SOC can be enabled with the setting +# mf.with_soc = True +# +# Running mf.CCSD() on such a reference will performs a GCCSD calculation +# with complex-valued amplitudes. +# diff --git a/examples/cc/61-rccsdtq.py b/examples/cc/61-rccsdtq.py index ce1aa962b8..4c385a1c7a 100644 --- a/examples/cc/61-rccsdtq.py +++ b/examples/cc/61-rccsdtq.py @@ -52,7 +52,7 @@ mycc2.verbose = 5 mycc2.incore_complete = True mycc2.kernel() -print('Full-T4 RCCSDQ e_corr % .12f Ref % .12f Diff % .12e' % ( +print('Full-T4 RCCSDTQ e_corr % .12f Ref % .12f Diff % .12e' % ( mycc2.e_corr, ref_e_corr, mycc2.e_corr - ref_e_corr)) # diff --git a/examples/cc/63-check_rccsdt_uccsdt_consistency.py b/examples/cc/63-check_rccsdt_uccsdt_consistency.py index a04ab655e8..5af5e4f00a 100644 --- a/examples/cc/63-check_rccsdt_uccsdt_consistency.py +++ b/examples/cc/63-check_rccsdt_uccsdt_consistency.py @@ -63,7 +63,7 @@ # Restart UCCSDT using amplitudes converted from RCCSDT tamps_init_uhf = [t1_rhf2uhf, t2_rhf2uhf, t3_rhf2uhf] -myucc2 = cc.UCCSDT(mf, compact_tamps=False).set(conv_tol=1e-10, conv_tol_normt=1e-8, verbose=5) +myucc2 = cc.UCCSDT(mf_uhf, compact_tamps=False).set(conv_tol=1e-10, conv_tol_normt=1e-8, verbose=5) myucc2.kernel(tamps=tamps_init_uhf) print('UCCSDT correlation energy % .12f Ref % .12f Diff % .12e' % ( myucc2.e_corr, -0.2188784727114157, myucc2.e_corr - -0.2188784727114157)) diff --git a/examples/cc/64-chained_rccsd_rccsdt_rccsdtq.py b/examples/cc/64-chained_rccsd_rccsdt_rccsdtq.py index 162be81458..3389325cbe 100644 --- a/examples/cc/64-chained_rccsd_rccsdt_rccsdtq.py +++ b/examples/cc/64-chained_rccsd_rccsdt_rccsdtq.py @@ -11,7 +11,6 @@ - Examine the influence of DIIS acceleration on convergence. ''' -import numpy as np from pyscf import gto, scf, cc def run_rccsd_rccsdt_rccsdtq(do_diis=False, do_diis_max_t=False, verbose=0): diff --git a/examples/cc/65-chained_uccsd_uccsdt.py b/examples/cc/65-chained_uccsd_uccsdt.py index bbfc3a8777..15a5a782c0 100644 --- a/examples/cc/65-chained_uccsd_uccsdt.py +++ b/examples/cc/65-chained_uccsd_uccsdt.py @@ -11,7 +11,6 @@ - Understand and handle the difference in T2 amplitude conventions between UCCSD and UCCSDT implementations. ''' -import numpy as np from pyscf import gto, scf, cc def run_uccsd_uccsdt(do_diis=False, do_diis_max_t=False, verbose=0): @@ -32,7 +31,7 @@ def run_uccsd_uccsdt(do_diis=False, do_diis_max_t=False, verbose=0): myccsd.verbose = verbose myccsd.diis = do_diis myccsd.kernel() - print('RCCSD e_corr % .12f Ref % .12f Diff % .12e' % ( + print('UCCSD e_corr % .12f Ref % .12f Diff % .12e' % ( myccsd.e_corr, ref_ccsd_e_corr, myccsd.e_corr - ref_ccsd_e_corr)) # UCCSDT @@ -99,7 +98,7 @@ def run_uccsd_uccsdt(do_diis=False, do_diis_max_t=False, verbose=0): do_diis_max_t = False run_uccsd_uccsdt(do_diis=do_diis, do_diis_max_t=do_diis_max_t) - print('=== UCCSD / UCCSDT with DIIS (including T3 amplitudes) ===') + print('=== UCCSD -> UCCSDT with DIIS (including T3 amplitudes) ===') do_diis = True do_diis_max_t = True run_uccsd_uccsdt(do_diis=do_diis, do_diis_max_t=do_diis_max_t) diff --git a/examples/cc/66-rccsdt_q.py b/examples/cc/66-rccsdt_q.py new file mode 100644 index 0000000000..28e51177fa --- /dev/null +++ b/examples/cc/66-rccsdt_q.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# +# Author: Yu Jin +# + +''' +Examples of RCCSDT(Q) calculations. + +This script demonstrates: + - Consistency of the [Q] and (Q) energy corrections on top of RCCSDT between calculations + using full and compact T3 storage. +''' + +from pyscf import gto, scf, cc + +mol = gto.M(atom='H 0 0 0; F 0 0 1.1', basis='ccpvdz') +mf = scf.RHF(mol) +mf.conv_tol = 1e-14 +mf.kernel() + +# Reference CCSDT correlation energy, and [Q] and (Q) energy correction +ref_e_corr = -0.2188784733230733 +ref_e_q_bracket = -0.0005026220700017348 +ref_e_q_paren = -0.0005490746450078632 + +mycc1 = cc.RCCSDT(mf, compact_tamps=True) +mycc1.conv_tol = 1e-10 +mycc1.conv_tol_normt = 1e-8 +mycc1.verbose = 5 +# einsum_backend: numpy (default) | pyscf | pytblis (recommended) +# pytblis can be installed via `pip install pytblis==0.05` (See https://github.com/chillenb/pytblis) +mycc1.set_einsum_backend('pyscf') +mycc1.incore_complete = True +mycc1.kernel() +e_q_bracket, e_q_paren = mycc1.ccsdt_q() +print('Triangular RCCSDT e_corr % .12f Ref % .12f Diff % .12e' % ( + mycc1.e_corr, ref_e_corr, mycc1.e_corr - ref_e_corr)) +print('Triangular RCCSDT [Q] % .12f Ref % .12f Diff % .12e' % ( + e_q_bracket, ref_e_q_bracket, e_q_bracket - ref_e_q_bracket)) +print('Triangular RCCSDT (Q) % .12f Ref % .12f Diff % .12e' % ( + e_q_paren, ref_e_q_paren, e_q_paren - ref_e_q_paren)) + +# +# RCCSDT with full T3 storage +# Same as cc.rccsdt_highm.RCCSDT +# +mycc2 = cc.RCCSDT(mf, compact_tamps=False) +mycc2.conv_tol = 1e-10 +mycc2.conv_tol_normt = 1e-8 +mycc2.verbose = 5 +mycc2.incore_complete = True +mycc2.kernel() +q_bracket2, q_paren2 = mycc2.ccsdt_q() +print('Full-T3 RCCSDT e_corr % .12f Ref % .12f Diff % .12e' % ( + mycc2.e_corr, ref_e_corr, mycc2.e_corr - ref_e_corr)) +print('Full-T3 RCCSDT [Q] % .12f Ref % .12f Diff % .12e' % ( + q_bracket2, ref_e_q_bracket, q_bracket2 - ref_e_q_bracket)) +print('Full-T3 RCCSDT (Q) % .12f Ref % .12f Diff % .12e' % ( + q_paren2, ref_e_q_paren, q_paren2 - ref_e_q_paren)) diff --git a/examples/df/01-auxbasis.py b/examples/df/01-auxbasis.py index 523b9e3012..df01368000 100644 --- a/examples/df/01-auxbasis.py +++ b/examples/df/01-auxbasis.py @@ -10,7 +10,6 @@ See also examples/gto/04-input_basis.py ''' -import tempfile from pyscf import gto, scf, df # diff --git a/examples/df/40-precompute_df_integrals.py b/examples/df/40-precompute_df_integrals.py index 38b36245a5..13cddcc35d 100644 --- a/examples/df/40-precompute_df_integrals.py +++ b/examples/df/40-precompute_df_integrals.py @@ -10,12 +10,12 @@ reused many times. ''' -import tempfile from pyscf import gto, scf, df +from pyscf import lib from pyscf.pbc import gto as pgto from pyscf.pbc import dft as pdft -tmpf = tempfile.NamedTemporaryFile() +tmpf = lib.NamedTemporaryFile() file_to_save_df_ints = tmpf.name print('DF integral is saved in %s' % file_to_save_df_ints) diff --git a/examples/dft/33-custom_disp.py b/examples/dft/33-custom_disp.py new file mode 100644 index 0000000000..f3d01b4cbe --- /dev/null +++ b/examples/dft/33-custom_disp.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python +# Copyright 2021-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +########################################################### +# Example of DFT with custom dispersion correction (dftd3/dftd4) +########################################################### + +""" +This example demonstrates the updated dispersion convention (mf.disp) in PySCF. + +To run the D3 and D4 examples, install the optional dispersion dependencies: + + pip install 'pyscf-dispersion>1.5.0' + +Key knobs +1) mf.xc + The XC functional for the underlying DFT calculation (e.g. 'b3lyp', 'wb97x-v'). + +2) mf.disp + The dispersion correction to apply (e.g. D3BJ or D4). + + Two common forms are supported: + a) Version only: 'd3bj', 'd3zero', 'd3bjm', 'd3zerom', 'd3op', 'd4' + The code will infer the dispersion parameter "method keyword" from mf.xc. + + b) Explicit version:method: 'd4:wb97x' / 'd4:wb97x-rev' / 'd4:wb97x-3c' + - version: dispersion engine/version tag (d3bj, d3zero, d4, ...) + - method: the keyword used by dftd3 (https://github.com/dftd3/simple-dftd3/blob/main/assets/parameters.toml) + or dftd4 (https://github.com/dftd4/dftd4/blob/main/assets/parameters.toml) to select parameters + +3) mf.nlc + Non-local correlation (e.g. VV10). + You do not need to set this if you would like to use *-V functional since + they will invoke VV10 by default. If you want the wB97X-V/wB97M-V XC form, but + without VV10, and with D3/D4 instead (e.g. wB97X-3c, wB97M-D4), explicitly disable VV10 via: + mf.nlc = 0 + +Below we run six minimal single-point examples for H2O. Each block creates an +SCF object, then sets mf.xc / mf.disp / mf.nlc explicitly. +""" + +import pyscf +from pyscf import dft + +atom = ''' +O 0.0000000000 -0.0000000000 0.1174000000 +H -0.7570000000 -0.0000000000 -0.4696000000 +H 0.7570000000 0.0000000000 -0.4696000000 +''' + +mol = pyscf.M(atom=atom, basis='def2-svp') + +print('Dispersion convention examples (tutorial)') +print('------------------------------------------------') + +print() +print('Example 1: B3LYP + D3BJ') +mf = dft.RKS(mol) +mf.xc = 'b3lyp' +mf.disp = 'd3bj' +mf.grids.level = 5 +mf.direct_scf_tol = 1e-14 +mf.conv_tol = 1e-12 +mf.max_cycle = 50 +e_tot = mf.kernel() +print(f' mf.xc = {mf.xc}') +print(f' mf.disp = {mf.disp}') +print(f' e_tot = {e_tot}') + +print() +print("Example 2: B3LYP + D3BJ (explicit version:method)") +print(" 'd3bj:b3lyp' means: use D3BJ, and force the D3BJ parameters of method='b3lyp'") +mf = dft.RKS(mol) +mf.xc = 'b3lyp' +mf.disp = 'd3bj:b3lyp' +mf.grids.level = 5 +mf.direct_scf_tol = 1e-14 +mf.conv_tol = 1e-12 +mf.max_cycle = 50 +e_tot = mf.kernel() +print(f' mf.xc = {mf.xc}') +print(f' mf.disp = {mf.disp}') +print(f' e_tot = {e_tot}') + +print() +print('Example 3: wB97X-V (VV10 nonlocal correlation)') +print(" Here we demonstrate mf.nlc='vv10' (and no extra dispersion via mf.disp)") +mf = dft.RKS(mol) +mf.xc = 'wb97x-v' +mf.nlc = 'vv10' +mf.disp = None +mf.grids.level = 5 +mf.direct_scf_tol = 1e-14 +mf.conv_tol = 1e-12 +mf.max_cycle = 50 +e_tot = mf.kernel() +print(f' mf.xc = {mf.xc}') +print(f' mf.nlc = {mf.nlc}') +print(f' mf.disp = {mf.disp}') +print(f' e_tot = {e_tot}') + +print() +print('Example 4: wB97X-D4 (explicit D4 parameters for method=wb97x, VV10 disabled)') +print(" Key point: mf.xc='wb97x-v' + mf.nlc=0 + mf.disp='d4:wb97x'") +mf = dft.RKS(mol) +mf.xc = 'wb97x-v' +mf.nlc = 0 +mf.disp = 'd4:wb97x' +mf.grids.level = 5 +mf.direct_scf_tol = 1e-14 +mf.conv_tol = 1e-12 +mf.max_cycle = 50 +e_tot = mf.kernel() +print(f' mf.xc = {mf.xc}') +print(f' mf.nlc = {mf.nlc}') +print(f' mf.disp = {mf.disp}') +print(f' e_tot = {e_tot}') + +print() +print('Example 5: wB97X-D4rev (explicit D4 parameters for method=wb97x-rev, VV10 disabled)') +print(" Key point: mf.xc='wb97x-v' + mf.nlc=0 + mf.disp='d4:wb97x-rev'") +mf = dft.RKS(mol) +mf.xc = 'wb97x-v' +mf.nlc = 0 +mf.disp = 'd4:wb97x-rev' +mf.grids.level = 5 +mf.direct_scf_tol = 1e-14 +mf.conv_tol = 1e-12 +mf.max_cycle = 50 +e_tot = mf.kernel() +print(f' mf.xc = {mf.xc}') +print(f' mf.nlc = {mf.nlc}') +print(f' mf.disp = {mf.disp}') +print(f' e_tot = {e_tot}') + +print() +print('Example 6: wB97X-3c (use wB97X-V form but disable VV10, then add D4 parameters for wb97x-3c)') +print(" Key point: mf.xc='wb97x-v' + mf.nlc=0 + mf.disp='d4:wb97x-3c'") +print(" basis = 'Grimme vDZP'") +print(" ecp = 'Grimme vDZP', please specify it for each element that needs ecp") +print(" To load the Grimme vDZP basis/ECP, install basis-set-exchange:") +print(" pip install basis-set-exchange") + +mol_3c = pyscf.M( + atom=atom, + basis='Grimme vDZP', + ecp={'O': 'Grimme vDZP'}, # H does not have ecp in Grimme vDZP. +) +mf = dft.RKS(mol_3c) +mf.xc = 'wb97x-v' +mf.nlc = 0 +mf.disp = 'd4:wb97x-3c' +mf.grids.level = 5 +mf.direct_scf_tol = 1e-14 +mf.conv_tol = 1e-12 +mf.max_cycle = 50 +e_tot = mf.kernel() +print(f' mf.xc = {mf.xc}') +print(f' mf.nlc = {mf.nlc}') +print(f' mf.disp = {mf.disp}') +print(f' e_tot = {e_tot}') diff --git a/examples/gto/01-input_geometry.py b/examples/gto/01-input_geometry.py index 96a4334601..6fba4e8dbc 100644 --- a/examples/gto/01-input_geometry.py +++ b/examples/gto/01-input_geometry.py @@ -14,6 +14,7 @@ import numpy from pyscf import gto +from pyscf import lib # # Input Cartesian coordinates @@ -127,8 +128,7 @@ # Read geometry from a file. If the file name is assigned to mol.atom, the # build method will guess the file format and parse the contents accordingly # -import tempfile -with tempfile.NamedTemporaryFile(mode='w', suffix='.xyz') as f: +with lib.NamedTemporaryFile(mode='w', suffix='.xyz') as f: f.write('''3 O 0 0 0 diff --git a/examples/gw/04-bse.py b/examples/gw/04-bse.py new file mode 100644 index 0000000000..9da63f0c7e --- /dev/null +++ b/examples/gw/04-bse.py @@ -0,0 +1,99 @@ +""" +Example for Bethe-Salpeter equation. + +######## +Reference results for acetone / B3LYP / def2-SVP +acetone geometry from: J. Phys. Chem. Lett. 2016, 7, 3, 586-591 + +* GW step (fully analytic GW, quasiparticle equation solved iteratively) + HOMO (eV) LUMO (eV) +Turbomole -8.78 2.75 +PySCF -8.79 2.75 + +* First three singlet excitations (eV) for BSE + S1 S2 S3 +Turbomole 3.41 7.35 8.56 +PySCF 3.41 7.36 8.58 + +* First three triplet excitations (eV) for BSE + T1 T2 T3 +Turbomole 2.68 4.67 7.14 +PySCF 2.67 4.67 7.14 + +""" +import numpy as np +from pyscf import gto, dft +from pyscf.gw.gw_ac import GWAC +from pyscf.gw.ugw_ac import UGWAC +from pyscf.gw.bse import BSE, bse_lanczos, lanczos_estimate_spectrum + +# restricted +mol = gto.Mole() +mol.verbose = 5 +mol.atom = [[8, (0.0, 0.0, 0.0)], [1, (0.7571, 0.0, 0.5861)], [1, (-0.7571, 0.0, 0.5861)]] +mol.basis = 'def2-svp' +mol.build() +mf = dft.RKS(mol, xc='pbe') +mf.kernel() + +# GW-AC/BSE +gw = GWAC(mf) +gw.kernel() +bse = BSE(gw) +# Davidson algorithm for singlet excitation +bse.TDA = False +bse.kernel('s') +bse.analyze() +# Davidson algorithm for triplet excitation, turn on TDA +bse.TDA = True +bse.kernel('t') +bse.analyze() +# full diagonalization for triplet excitation +bse.full_diagonalization('t') +bse.analyze() + +eta = 0.01 # spectrum broadening in eV +omega = np.linspace(0.0, 1.0, 1000)[:, None] + 1j * eta # (nω, 1) + +ao_dip = mol.intor('int1e_r', comp=3) +nocc = mol.nelectron // 2 +mo_dip = np.einsum('xij,ia,jb->xab', ao_dip, mf.mo_coeff[:, :nocc], mf.mo_coeff[:, nocc:]) + +bse.TDA = False +lanczos_spectra = [] +for j in range(3): + alphas, betas = bse_lanczos(bse, multi='s', u1=mo_dip[j].flatten(), nsteps=500) + freqs, density = lanczos_estimate_spectrum(alphas, betas, (0, 1), eta, 1000) + lanczos_spectra.append(density) +mean_spectrum = np.mean(lanczos_spectra, axis=0) * 4 * np.pi +print("spectrum from Lanczos algorithm:") +for i in range(len(freqs)): + print(f"{freqs[i]:.6f} {mean_spectrum[i]:.6f}") + +# Energy-specific BSE, target excitations above 0.4 AU +gw = GWAC(mf) +gw.kernel() +bse = BSE(gw) +bse.kernel('s', e_min=0.4) +bse.analyze() +bse.kernel('t', e_min=0.4) +bse.analyze() + +# unrestricted +mol = gto.Mole() +mol.verbose = 5 +mol.atom = [[8, (0.0, 0.0, 0.0)], [1, (0.7571, 0.0, 0.5861)], [1, (-0.7571, 0.0, 0.5861)]] +mol.charge = 1 +mol.spin = 1 +mol.basis = 'def2-svp' +mol.build() +mf = dft.UKS(mol, xc='pbe') +mf.kernel() + +# UGWAC/BSE +gw = UGWAC(mf) +gw.kernel() + +bse = BSE(gw) +bse.kernel('u') +bse.analyze() diff --git a/examples/mcscf/13-load_chkfile.py b/examples/mcscf/13-load_chkfile.py index e25c68259b..096d9c2a81 100644 --- a/examples/mcscf/13-load_chkfile.py +++ b/examples/mcscf/13-load_chkfile.py @@ -3,7 +3,7 @@ # Author: Qiming Sun # -import tempfile +import os import h5py from pyscf import gto, scf, mcscf from pyscf import lib @@ -14,7 +14,8 @@ MCSCF objects. ''' -tmpchk = tempfile.NamedTemporaryFile() + + mol = gto.Mole() mol.atom = 'C 0 0 0; C 0 0 1.2' @@ -22,11 +23,12 @@ mol.build() mf = scf.RHF(mol) -mf.chkfile = tmpchk.name +chkname = os.path.join(lib.param.TMPDIR, '13-load_chkfile.chk') +mf.chkfile = chkname mf.kernel() mc = mcscf.CASSCF(mf, 6, 6) -mc.chkfile = tmpchk.name +mc.chkfile = chkname mc.max_cycle_macro = 1 mc.kernel() @@ -35,7 +37,7 @@ # Scenario 1: Using h5py to read quantities in chkfile # -with h5py.File(tmpchk.name) as f: +with h5py.File(chkname) as f: print('Keys in chkfile', f.keys) print('Keys in mcscf group', f['mcscf'].keys) mcscf_orb = f['mcscf/mo_coeff'].value @@ -44,12 +46,12 @@ # # Scenario 2: Using lib.chkfile module # -mol = lib.chkfile.load_mol(tmpchk.name) -mcscf_orb = lib.chkfile.load(tmpchk.name, 'mcscf/mo_coeff') +mol = lib.chkfile.load_mol(chkname) +mcscf_orb = lib.chkfile.load(chkname, 'mcscf/mo_coeff') # # Scenario 3: Using Python trick to quickly load scf/mcscf # intermediates/results # mc = mcscf.CASSCF(mf, 6, 6) -mc.__dict__.update(lib.chkfile.load(tmpchk.name, 'mcscf')) +mc.__dict__.update(lib.chkfile.load(chkname, 'mcscf')) diff --git a/examples/mcscf/13-restart.py b/examples/mcscf/13-restart.py index a6846d17fa..5aea906fc7 100644 --- a/examples/mcscf/13-restart.py +++ b/examples/mcscf/13-restart.py @@ -3,7 +3,7 @@ # Author: Qiming Sun # -import tempfile +import os from pyscf import gto, scf, mcscf from pyscf import lib @@ -19,25 +19,25 @@ intermediate results. ''' -tmpchk = tempfile.NamedTemporaryFile() - mol = gto.Mole() mol.atom = 'C 0 0 0; C 0 0 1.2' mol.basis = 'ccpvdz' mol.build() +chkname = os.path.join(lib.param.TMPDIR, '13-restart.chk') mf = scf.RHF(mol) +mf.chkfile = chkname mf.kernel() mc = mcscf.CASSCF(mf, 6, 6) -mc.chkfile = tmpchk.name +mc.chkfile = chkname mc.max_cycle_macro = 1 mc.kernel() ####################################################################### # # Assuming the CASSCF was interrupted. Intermediate data were saved in -# tmpchk file. Here we read the chkfile to restart the previous calculation. +# chkname file. Here we read the chkfile to restart the previous calculation. # ####################################################################### mol = gto.Mole() @@ -46,11 +46,11 @@ mol.build() mc = mcscf.CASSCF(scf.RHF(mol), 6, 6) -mo = lib.chkfile.load(tmpchk.name, 'mcscf/mo_coeff') +mo = lib.chkfile.load(chkname, 'mcscf/mo_coeff') mc.kernel(mo) # Assuming you lose all memory about the previous calculation. # Restart the calculation with chkfile only. -mol, mcdata = mcscf.chkfile.load_mcscf(tmpchk.name) -mc = mcscf.CASSCF(mol, mcdata['ncas'], mcdata['nelecas']).update_from_chk(tmpchk.name) +mol, mcdata = mcscf.chkfile.load_mcscf(chkname) +mc = mcscf.CASSCF(mol, mcdata['ncas'], mcdata['nelecas']).update_from_chk(chkname) mc.kernel() diff --git a/examples/mcscf/41-mcscf_custom_df_hamiltonian.py b/examples/mcscf/41-mcscf_custom_df_hamiltonian.py index 8ef9465796..8ba2a00c4c 100644 --- a/examples/mcscf/41-mcscf_custom_df_hamiltonian.py +++ b/examples/mcscf/41-mcscf_custom_df_hamiltonian.py @@ -3,9 +3,9 @@ # Author: Qiming Sun # -import tempfile import h5py from pyscf import gto, df, scf, mcscf +from pyscf import lib ''' Using the Cholesky decomposed 2-electron integrals to define the Hamiltonian in CASSCF @@ -33,7 +33,7 @@ # # Integrals on disk # -ftmp = tempfile.NamedTemporaryFile() +ftmp = lib.NamedTemporaryFile() df.outcore.cholesky_eri(mol, ftmp.name, auxbasis='ccpvdz-fit') with h5py.File(ftmp.name, 'r') as file1: diff --git a/examples/mp/13-mp2_cabs.py b/examples/mp/13-mp2_cabs.py new file mode 100644 index 0000000000..393158ed2a --- /dev/null +++ b/examples/mp/13-mp2_cabs.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# +# Author: Igor S. Gerasimov +# + +""" +A simple example to run MP2 calculation with CABS correction. +""" + +import pyscf + +mol = pyscf.M(atom='H 0 0 0; F 0 0 1.1', basis='ccpvdz') + +mf = mol.RHF().run() + +mf.MP2().run() + +pyscf.mp.cabs.energy_singles(mf, auxbasis='ccpvdzri') diff --git a/examples/pbc/22-k_points_gw.py b/examples/pbc/22-k_points_gw.py index 2c1604bc28..854807600a 100644 --- a/examples/pbc/22-k_points_gw.py +++ b/examples/pbc/22-k_points_gw.py @@ -3,10 +3,8 @@ ''' G0W0 with k-points sampling ''' - -from functools import reduce -import numpy -from pyscf.pbc import gto, scf, gw +import numpy as np +from pyscf.pbc import df, gto, scf, gw cell = gto.Cell() cell.atom=''' @@ -23,21 +21,59 @@ cell.verbose = 5 cell.build() -# -# KDFT and KGW with 2x2x2 k-points -# -kpts = cell.make_kpts([2,2,2]) -kmf = scf.KRKS(cell).density_fit() -kmf.kpts = kpts -emf = kmf.kernel() +kpts = cell.make_kpts([2, 2, 2]) +gdf = df.RSDF(cell, kpts) +gdf.build() + +# restricted KGW +kmf = scf.KRKS(cell, kpts).rs_density_fit() +kmf.with_df = gdf +kmf.kernel() + +# KRGWAC using analytical continuation +mygw = gw.krgw_ac.KRGWAC(kmf) +mygw.kernel() + +# KRGWAC low-memory routine +# finite-size correction is not implemented for outcore routine +mygw = gw.krgw_ac.KRGWAC(kmf) +mygw.outcore = True +mygw.fc = False +mygw.kernel() -# Default is AC frequency integration -mygw = gw.KRGW(kmf) +# KRGWAC full self-energy and density of states +mygw = gw.krgw_ac.KRGWAC(kmf) +mygw.fullsigma = True mygw.kernel() -print("KRGW energies =", mygw.mo_energy) +omega = np.linspace(-1, 1, 201) +# gf: GW Green's function; gf0: DFT Green's function; sigma: self-energy +gf, gf0, sigma = mygw.make_gf(omega, eta=1e-2) +print("k=0 density of states") +for i in range(len(omega)): + print(omega[i], -np.trace(gf[0, :, :, i].imag) / np.pi) # With CD frequency integration #mygw = gw.KRGW(kmf, freq_int='cd') #mygw.kernel() #print("KRGW-CD energies =", mygw.mo_energy) +# restricted KGW +kmf = scf.KUKS(cell, kpts).rs_density_fit() +kmf.with_df = gdf +kmf.kernel() + +# KUGWAC using analytical continuation with finite-size correction +mygw = gw.kugw_ac.KUGWAC(kmf) +mygw.fc = True +mygw.kernel() + +# KUGWAC full self-energy and density of states +mygw = gw.kugw_ac.KUGWAC(kmf) +mygw.fullsigma = True +mygw.kernel() +omega = np.linspace(-1, 1, 201) +# gf: GW Green's function; gf0: DFT Green's function; sigma: self-energy +gf, gf0, sigma = mygw.make_gf(omega, eta=1e-2) +print("k=0 density of states: alpha beta") +for i in range(len(omega)): + print(omega[i], -np.trace(gf[0, 0, :, :, i].imag) / np.pi, -np.trace(gf[0, 1, :, :, i].imag) / np.pi) diff --git a/examples/pbc/22-k_points_rpa.py b/examples/pbc/22-k_points_rpa.py new file mode 100644 index 0000000000..b21b02106d --- /dev/null +++ b/examples/pbc/22-k_points_rpa.py @@ -0,0 +1,114 @@ +''' +RPA with k-points sampling +''' + +from pyscf.pbc import gto, df, dft, scf +from pyscf.pbc.gw.krpa import KRPA +from pyscf.pbc.gw.kurpa import KURPA + +# spin-restricted RPA +cell = gto.Cell() +cell.build( + unit='angstrom', + a=""" + 0.000000 1.783500 1.783500 + 1.783500 0.000000 1.783500 + 1.783500 1.783500 0.000000 + """, + atom='C 1.337625 1.337625 1.337625; C 2.229375 2.229375 2.229375', + dimension=3, + max_memory=12000, + verbose=5, + pseudo='gth-pbe', + basis='gth-dzv', + precision=1e-12, +) + +kpts = cell.make_kpts([3, 1, 1], scaled_center=[0, 0, 0]) +gdf = df.RSGDF(cell, kpts) +gdf.build() +kmf = scf.KRHF(cell, kpts).rs_density_fit() +kmf.with_df = gdf +kmf.kernel() + +# RPA with finite-size correction +rpa = KRPA(kmf) +rpa.fc = True +rpa.kernel() +# RPA with finite-size correction +rpa = KRPA(kmf) +rpa.fc = False +rpa.kernel() +# low-memory routine +rpa = KRPA(kmf) +rpa.outcore = True +rpa.segsize = 2 +rpa.kernel() + +# Na (metallic) +cell = gto.Cell() +cell.build( + unit='angstrom', + a=""" + -2.11250000000000 2.11250000000000 2.11250000000000 + 2.11250000000000 -2.11250000000000 2.11250000000000 + 2.11250000000000 2.11250000000000 -2.11250000000000 + """, + atom="""Na 0.00000 0.00000 0.00000""", + dimension=3, + max_memory=126000, + verbose=5, + pseudo='gth-pade', + basis='gth-dzvp-molopt-sr', + precision=1e-10, +) + +kpts = cell.make_kpts([2, 2, 1], scaled_center=[0, 0, 0]) +gdf = df.RSGDF(cell, kpts) +gdf.build() + +kmf = dft.KRKS(cell, kpts).rs_density_fit() +kmf = scf.addons.smearing_(kmf, sigma=5e-3, method='fermi') +kmf.xc = 'lda' +kmf.with_df = gdf +kmf.kernel() + +rpa = KRPA(kmf) +rpa.kernel() +# use ACFDT exchange energy +rpa = KRPA(kmf) +rpa.acfd_exx = True +rpa.kernel() + +# spin-unrestricted RPA +cell = gto.Cell() +cell.build( + unit='B', + a=[[0.0, 6.74027466, 6.74027466], [6.74027466, 0.0, 6.74027466], [6.74027466, 6.74027466, 0.0]], + atom="""H 0 0 0 + H 1.68506866 1.68506866 1.68506866 + H 3.37013733 3.37013733 3.37013733""", + basis='gth-dzvp', + pseudo='gth-pade', + verbose=5, + charge=0, + spin=1, +) + +cell.spin = cell.spin * 3 +kpts = cell.make_kpts([3, 1, 1], scaled_center=[0, 0, 0]) +gdf = df.RSDF(cell, kpts) +gdf.build() + +kmf = scf.KUHF(cell, kpts, exxdiv='ewald').rs_density_fit() +kmf = scf.addons.smearing_(kmf, sigma=5e-3, method='fermi') +kmf.xc = 'lda' +kmf.with_df = gdf +kmf.kernel() + +rpa = KURPA(kmf) +rpa.kernel() +# use ACFDT exchange energy +rpa = KURPA(kmf) +rpa.acfd_exx = True +rpa.kernel() diff --git a/examples/scf/02-ghf.py b/examples/scf/02-ghf.py index 6a6963c5c0..b32be1c68d 100644 --- a/examples/scf/02-ghf.py +++ b/examples/scf/02-ghf.py @@ -4,14 +4,23 @@ # ''' -scf.GHF, real, complex. +Examples of generalized Hartree–Fock (GHF) calculations. + +Each molecular orbital in GHF is represented in a two-component basis (alpha +beta components). Typically, the GHF orbital coefficient matrix (mo_coeff) has +dimension 2N x 2N, where N is the number of AOs (mol.nao). The alpha +components are stored in the upper block (mo_coeff[:N]) and the beta components +are stored in the lower block (mo_coeff[N:]). + +This example demonstrates + +1. Real-valued GHF calculations. +2. Complex-valued GHF calculations. +3. Breaking the Sz spin symmetry in GHF. ''' from pyscf import gto, scf -# -# 1. real GHF -# mol = gto.M( atom = ''' O 0 0 0 @@ -22,14 +31,51 @@ spin = 1 # = 2S = spin_up - spin_down ) -mf = scf.GHF(mol) +# +# 1. Real-valued GHF +# +# For a non-relativistic Hamiltonian with only real-valued integrals, the GHF +# solution is normally real. In this case, the converged GHF solution is usually +# equivalent to the corresponding UHF solution. Although the Hamiltonian itself +# does not couple the alpha and beta spin channels, degeneracy can lead to the +# rotation within the alpha and beta orbitals, leading to spin mixed spin +# components in the GHF orbitals. +# +mf = mol.GHF() mf.kernel() # -# 2. complex GHF +# 2. Complex-valued GHF +# +# GHF can also optimize complex-valued orbitals. One way to obtain such a +# solution is to start the SCF procedure from a complex density matrix. +# +mf = mol.GHF() +dm = mf.get_init_guess() + 0j +dm[0,0] += .05j +dm[1,1] -= .05j +mf.kernel(dm0=dm) + +# +# 3. Breaking the Sz spin symmetry +# +# Spin-orbit coupling (SOC) operator can mix alpha and beta components. The SOC +# term can be enabled by the X2C relativistic calculations with GHF (see also +# examples/x2c/03-x2c_ghf.py) or the configuration mf.with_soc in the case of +# ECP-SOC calculations (see also examples/scf/44-soc_ecp.py). +# +mf = mol.GHF().x2c() +mf.run() + +# +# A non-zero alpha-beta block in the density matrix explicitly couples the two +# spin sectors. Such initial guesses can drive the SCF procedure toward a +# solution that breaks the Sz symmetry, even without an explicit SOC term in the +# Hamiltonian. # -mf = scf.GHF(mol) +mf = mol.GHF() dm = mf.get_init_guess() + 0j -dm[0,:] += .05j -dm[:,0] -= .05j +nao = mol.nao +dm[:nao,nao:] = 0.05j +dm[nao:,:nao] = -0.05j mf.kernel(dm0=dm) diff --git a/examples/scf/15-initial_guess.py b/examples/scf/15-initial_guess.py index 98807bd96e..03f9480f0e 100644 --- a/examples/scf/15-initial_guess.py +++ b/examples/scf/15-initial_guess.py @@ -11,7 +11,6 @@ initial guess. ''' -import tempfile from pyscf import gto from pyscf import scf @@ -38,10 +37,7 @@ basis = 'cc-pVDZ', ) -tmp_chkfile = tempfile.NamedTemporaryFile() -chkfile_name = tmp_chkfile.name mf = scf.RHF(mol) -mf.chkfile = chkfile_name mf.kernel(dm_init_guess) # If a numpy array is assigned to the attribute .init_guess, it will be used diff --git a/examples/scf/21-x2c.py b/examples/scf/21-x2c.py index 0a55200396..1b8762a7ba 100644 --- a/examples/scf/21-x2c.py +++ b/examples/scf/21-x2c.py @@ -1,7 +1,4 @@ #!/usr/bin/env python -# -# Author: Qiming Sun -# ''' Applying scalar relativistic effects by decorating the scf object with diff --git a/examples/scf/32-break_spin_symm.py b/examples/scf/32-break_spin_symm.py index 2511f757f3..6d28e3c703 100644 --- a/examples/scf/32-break_spin_symm.py +++ b/examples/scf/32-break_spin_symm.py @@ -7,6 +7,7 @@ Break spin symmetry for UHF/UKS by initial guess. See also examples/dft/32-broken_symmetry_dft.py + and examples/scf/56-h2_symm_breaking.py ''' import numpy @@ -38,3 +39,17 @@ dm_beta[:2,:2] = 0 dm = (dm_alpha,dm_beta) mf.kernel(dm) + +# +# Alternative: use the built-in HOMO-LUMO rotation (breaksym='mix'). +# Instead of zeroing atom blocks, this rotates the alpha and beta HOMOs +# by +/-45 degrees into the LUMO: +# alpha HOMO -> (HOMO + LUMO) / sqrt(2) +# beta HOMO -> (HOMO - LUMO) / sqrt(2) +# The orbitals remain delocalized over the full molecule, giving a smoother +# symmetry break that is less likely to collapse back to the RHF solution. +# This option also works for UKS. +# +mf2 = scf.UHF(mol) +mf2.init_guess_breaksym = 'mix' +mf2.kernel() diff --git a/examples/scf/41-hf_with_given_densityfit_ints.py b/examples/scf/41-hf_with_given_densityfit_ints.py index 364b4804f3..5d772b2700 100644 --- a/examples/scf/41-hf_with_given_densityfit_ints.py +++ b/examples/scf/41-hf_with_given_densityfit_ints.py @@ -10,9 +10,9 @@ examples/df/40-precompute_df_ints.py ''' -import tempfile import h5py from pyscf import gto, df, scf +from pyscf import lib mol = gto.M(atom='H 0 0 0; F 0 0 1', basis='ccpvdz') @@ -20,7 +20,7 @@ int3c = df.incore.cholesky_eri(mol, auxbasis='ccpvdz-fit') # Integrals on disk -ftmp = tempfile.NamedTemporaryFile() +ftmp = lib.NamedTemporaryFile() df.outcore.cholesky_eri(mol, ftmp.name, auxbasis='ccpvdz-fit') diff --git a/examples/scf/56-h2_symm_breaking.py b/examples/scf/56-h2_symm_breaking.py index 0d7d2c9830..29f529b46e 100644 --- a/examples/scf/56-h2_symm_breaking.py +++ b/examples/scf/56-h2_symm_breaking.py @@ -1,12 +1,21 @@ #!/usr/bin/env python # Author: James D Whitfield ''' -Scan H2 molecule dissociation curve comparing UHF and RHF solutions per the -example of Szabo and Ostlund section 3.8.7 +Scan H2 molecule dissociation curve comparing UHF and RHF solutions per the +example of Szabo and Ostlund section 3.8.7. The initial guess is obtained by mixing the HOMO and LUMO and is implemented as a function that can be used in other applications. +NOTE: The HOMO-LUMO mixing strategy used here is now available as a built-in +option via init_guess_breaksym='mix', which also works for UKS. The manual +init_guess_mixed function below is kept for educational purposes. To use the +built-in version replace uhf.kernel(init_guess_mixed(mol)) with: + + uhf = scf.UHF(mol) + uhf.init_guess_breaksym = 'mix' + uhf.kernel() + See also 16-h2_scan.py, 30-scan_pes.py, 32-break_spin_symm.py ''' @@ -23,18 +32,18 @@ def init_guess_mixed(mol,mixing_parameter=numpy.pi/4): ''' Generate density matrix with broken spatial and spin symmetry by mixing HOMO and LUMO orbitals following ansatz in Szabo and Ostlund, Sec 3.8.7. - + psi_1a = numpy.cos(q)*psi_homo + numpy.sin(q)*psi_lumo psi_1b = numpy.cos(q)*psi_homo - numpy.sin(q)*psi_lumo - + psi_2a = -numpy.sin(q)*psi_homo + numpy.cos(q)*psi_lumo psi_2b = numpy.sin(q)*psi_homo + numpy.cos(q)*psi_lumo - Returns: + Returns: Density matrices, a list of 2D ndarrays for alpha and beta spins ''' # opt: q, mixing parameter 0 < q < 2 pi - + #based on init_guess_by_1e h1e = scf.hf.get_hcore(mol) s1e = scf.hf.get_ovlp(mol) @@ -51,7 +60,7 @@ def init_guess_mixed(mol,mixing_parameter=numpy.pi/4): psi_homo=mo_coeff[:, homo_idx] psi_lumo=mo_coeff[:, lumo_idx] - + Ca=numpy.zeros_like(mo_coeff) Cb=numpy.zeros_like(mo_coeff) @@ -72,7 +81,7 @@ def init_guess_mixed(mol,mixing_parameter=numpy.pi/4): Cb[:,k]=mo_coeff[:,k] dm =scf.UHF(mol).make_rdm1( (Ca,Cb), (mo_occ,mo_occ) ) - return dm + return dm for b in numpy.arange(0.7, 4.01, 0.1): diff --git a/pyproject.toml b/pyproject.toml index 7518049d46..ff6a9d3a41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ 'scipy>=1.6.0', 'h5py>=2.7', 'setuptools', + "psutil; sys_platform == 'win32'" ] [project.urls] diff --git a/pyscf/__init__.py b/pyscf/__init__.py index 571b5fad3e..37fd9a907b 100644 --- a/pyscf/__init__.py +++ b/pyscf/__init__.py @@ -35,7 +35,7 @@ ''' -__version__ = '2.13.0' +__version__ = '2.13.1' import os import sys diff --git a/pyscf/adc/radc_amplitudes.py b/pyscf/adc/radc_amplitudes.py index 02f978c8d6..5996c52d67 100644 --- a/pyscf/adc/radc_amplitudes.py +++ b/pyscf/adc/radc_amplitudes.py @@ -565,5 +565,5 @@ def _create_t2_h5cache(): as a temporary workaround before figuring out a better solution to handle big t2 amplitudes. ''' - tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + tmpfile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) return h5py.File(tmpfile.name, 'w') diff --git a/pyscf/adc/test/test_radc/test_ee_df_N2.py b/pyscf/adc/test/test_radc/test_ee_df_N2.py index 55bcd677dd..c44605edc5 100644 --- a/pyscf/adc/test/test_radc/test_ee_df_N2.py +++ b/pyscf/adc/test/test_radc/test_ee_df_N2.py @@ -20,9 +20,7 @@ import unittest import numpy as np import math -from pyscf import gto -from pyscf import scf -from pyscf import adc +from pyscf import gto, scf, adc, lib def setUpModule(): global mol, mf, myadc, myadc_fr @@ -49,8 +47,8 @@ def tearDownModule(): def rdms_test(dm): r2_int = mol.intor('int1e_r2') - dm_ao = np.einsum('pi,ij,qj->pq', mf.mo_coeff, dm, mf.mo_coeff.conj()) - r2 = np.einsum('pq,pq->',r2_int,dm_ao) + dm_ao = lib.einsum('pi,ij,qj->pq', mf.mo_coeff, dm, mf.mo_coeff.conj()) + r2 = lib.einsum('pq,pq->',r2_int,dm_ao) return r2 class KnownValues(unittest.TestCase): @@ -72,10 +70,10 @@ def test_ee_adc2(self): self.assertAlmostEqual(p[3], 6.481812538202186e-30, 6) dm1_exc = np.array(myadc.make_rdm1()) - self.assertAlmostEqual(rdms_test(dm1_exc[0]), 39.97509426976306, 4) - self.assertAlmostEqual(rdms_test(dm1_exc[1]), 39.97509426976296, 4) - self.assertAlmostEqual(rdms_test(dm1_exc[2]), 40.69394840350379, 4) - self.assertAlmostEqual(rdms_test(dm1_exc[3]), 40.99987050864409, 4) + self.assertAlmostEqual(rdms_test(dm1_exc[0]) - 39.97509426976306, 0, 3) + self.assertAlmostEqual(rdms_test(dm1_exc[1]) - 39.97509426976296, 0, 3) + self.assertAlmostEqual(rdms_test(dm1_exc[2]) - 40.69394840350379, 0, 3) + self.assertAlmostEqual(rdms_test(dm1_exc[3]) - 40.99987050864409, 0, 3) def test_ee_adc2x(self): @@ -124,7 +122,7 @@ def test_ee_adc2x_cis(self): self.assertAlmostEqual(rdms_test(dm1_exc[3]), 40.91091417592432, 4) - def test_ee_adc3(self): + def test_ee_adc3_high_cost(self): myadc.method = "adc(3)" e, t_amp1, t_amp2 = myadc.kernel_gs() diff --git a/pyscf/adc/test/test_uadc/test_ee_df_F2.py b/pyscf/adc/test/test_uadc/test_ee_df_F2.py index 323427ffad..fae700bef9 100644 --- a/pyscf/adc/test/test_uadc/test_ee_df_F2.py +++ b/pyscf/adc/test/test_uadc/test_ee_df_F2.py @@ -120,7 +120,7 @@ def test_ee_adc2x_cis(self): self.assertAlmostEqual(rdms_test(dm1_exc[0][2],dm1_exc[1][2]), 40.49491598756553, 6) self.assertAlmostEqual(rdms_test(dm1_exc[0][3],dm1_exc[1][3]), 40.49491598756554, 6) - def test_ee_adc3(self): + def test_ee_adc3_high_cost(self): myadc.method = "adc(3)" e,v,p,x = myadc.kernel(nroots=4) diff --git a/pyscf/adc/test/test_uadc/test_ee_rohf_CN.py b/pyscf/adc/test/test_uadc/test_ee_rohf_CN.py index 09471cb7fb..c9e94391ab 100644 --- a/pyscf/adc/test/test_uadc/test_ee_rohf_CN.py +++ b/pyscf/adc/test/test_uadc/test_ee_rohf_CN.py @@ -122,7 +122,7 @@ def test_ee_adc2x(self): self.assertAlmostEqual(rdms_test(dm1_exc[0][2],dm1_exc[1][2]), 40.27044834802643, 4) self.assertAlmostEqual(rdms_test(dm1_exc[0][3],dm1_exc[1][3]), 40.64183214575419, 4) - def test_ee_adc3(self): + def test_ee_adc3_high_cost(self): myadc.method = "adc(3)" e,v,p,x = myadc.kernel(nroots=4) diff --git a/pyscf/agf2/test/test_ragf2_h2o.py b/pyscf/agf2/test/test_ragf2_h2o.py index 6f890638d7..d395309a4a 100644 --- a/pyscf/agf2/test/test_ragf2_h2o.py +++ b/pyscf/agf2/test/test_ragf2_h2o.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import numpy as np import h5py from pyscf import gto, scf, agf2, lib @@ -29,8 +28,8 @@ class KnownValues(unittest.TestCase): def setUpClass(self): self.mol = gto.M(atom='O 0 0 0; H 0 0 1; H 0 1 0', basis='cc-pvdz', verbose=0) self.mf = scf.RHF(self.mol) - self.mf.chkfile = tempfile.NamedTemporaryFile().name self.mf.conv_tol = 1e-12 + self.mf.chkfile = lib.NamedTemporaryFile().name self.mf.run() self.gf2 = agf2.RAGF2(self.mf) self.gf2.conv_tol = 1e-7 diff --git a/pyscf/agf2/test/test_uagf2_beh.py b/pyscf/agf2/test/test_uagf2_beh.py index 081e06b4e5..68857d1279 100644 --- a/pyscf/agf2/test/test_uagf2_beh.py +++ b/pyscf/agf2/test/test_uagf2_beh.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import numpy as np from pyscf import gto, scf, agf2, lib @@ -28,8 +27,8 @@ class KnownValues(unittest.TestCase): def setUpClass(self): self.mol = gto.M(atom='Be 0 0 0; H 0 0 1', basis='cc-pvdz', spin=1, verbose=0) self.mf = scf.UHF(self.mol) - self.mf.chkfile = tempfile.NamedTemporaryFile().name self.mf.conv_tol = 1e-12 + self.mf.chkfile = lib.NamedTemporaryFile().name self.mf.run() self.gf2 = agf2.UAGF2(self.mf) self.gf2.conv_tol = 1e-7 @@ -72,9 +71,9 @@ def test_uagf2_beh_ea(self): def test_uagf2_outcore(self): # tests the out-of-core and chkfile support for AGF2 for BeH/cc-pvdz gf2 = agf2.UAGF2(self.mf) - gf2.chkfile = tempfile.NamedTemporaryFile().name gf2.max_memory = 1 gf2.conv_tol = 1e-7 + gf2.chkfile = lib.NamedTemporaryFile().name gf2.run() e_ip, v_ip = self.gf2.ipagf2(nroots=1) e_ea, v_ea = self.gf2.eaagf2(nroots=1) diff --git a/pyscf/ao2mo/__init__.py b/pyscf/ao2mo/__init__.py index fbe646eb56..02b62949dc 100644 --- a/pyscf/ao2mo/__init__.py +++ b/pyscf/ao2mo/__init__.py @@ -35,6 +35,7 @@ from pyscf.ao2mo import incore from pyscf.ao2mo import outcore from pyscf.ao2mo import r_outcore +from pyscf.ao2mo import nrr_outcore from pyscf.ao2mo.addons import load, restore def full(eri_or_mol, mo_coeff, erifile=None, dataname='eri_mo', intor='int2e', @@ -147,6 +148,8 @@ def full(eri_or_mol, mo_coeff, erifile=None, dataname='eri_mo', intor='int2e', elif isinstance(eri_or_mol, gto.MoleBase): if '_spinor' in intor: mod = r_outcore + elif numpy.result_type(mo_coeff) == numpy.complex128: + mod = nrr_outcore else: mod = outcore @@ -302,6 +305,8 @@ def general(eri_or_mol, mo_coeffs, erifile=None, dataname='eri_mo', intor='int2e elif isinstance(eri_or_mol, gto.MoleBase): if '_spinor' in intor: mod = r_outcore + elif numpy.result_type(*mo_coeffs) == numpy.complex128: + mod = nrr_outcore else: mod = outcore diff --git a/pyscf/ao2mo/_ao2mo.py b/pyscf/ao2mo/_ao2mo.py index a2b04b458d..5f06314e60 100644 --- a/pyscf/ao2mo/_ao2mo.py +++ b/pyscf/ao2mo/_ao2mo.py @@ -14,15 +14,15 @@ # limitations under the License. import ctypes -import _ctypes import numpy from pyscf import lib from pyscf.gto.moleintor import make_cintopt, make_loc, ascint3 from pyscf.scf import _vhf libao2mo = lib.load_library('libao2mo') + def _fpointer(name): - return ctypes.c_void_p(_ctypes.dlsym(libao2mo._handle, name)) + return ctypes.cast(getattr(libao2mo, name), ctypes.c_void_p) class AO2MOpt: def __init__(self, mol, intor, prescreen='CVHFnoscreen', qcondname=None): diff --git a/pyscf/ao2mo/nrr_outcore.py b/pyscf/ao2mo/nrr_outcore.py index 39556e07ad..51d44ec086 100644 --- a/pyscf/ao2mo/nrr_outcore.py +++ b/pyscf/ao2mo/nrr_outcore.py @@ -18,11 +18,9 @@ ''' import time -import tempfile import numpy import h5py import ctypes -import _ctypes from pyscf import lib from pyscf import gto from pyscf.lib import logger @@ -33,7 +31,7 @@ libao2mo = lib.load_library('libao2mo') def _fpointer(name): - return ctypes.c_void_p(_ctypes.dlsym(libao2mo._handle, name)) + return ctypes.cast(getattr(libao2mo, name), ctypes.c_void_p) IOBLK_SIZE = getattr(__config__, 'ao2mo_outcore_ioblk_size', 256) # 256 MB IOBUF_WORDS = getattr(__config__, 'ao2mo_outcore_iobuf_words', 1e8) # 1.6 GB @@ -124,7 +122,7 @@ def general(mol, mo_coeffs, erifile, dataname='eri_mo', float(nij_pair)*nkl_pair*comp, nij_pair*nkl_pair*comp*16/1e6) # transform e1 - swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + swapfile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) half_e1(mol, (mo_alph, mo_beta), swapfile.name, intor, aosym, comp, max_memory, ioblk_size, log) time_1pass = log.timer('AO->MO transformation for %s 1 pass'%intor, @@ -189,7 +187,7 @@ def general(mol, mo_coeffs, erifile, dataname='eri_mo', def full_iofree(mol, mo_coeff, dataname='eri_mo', intor='int2e_sph', motype='ghf', aosym='s1', comp=None, verbose=logger.debug, **kwargs): - erifile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + erifile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) general(mol, (mo_coeff,)*4, erifile.name, dataname='eri_mo', intor=intor, motype=motype, aosym=aosym, comp=comp, verbose=verbose) @@ -199,7 +197,7 @@ def full_iofree(mol, mo_coeff, dataname='eri_mo', intor='int2e_sph', def general_iofree(mol, mo_coeffs, dataname='eri_mo', intor='int2e_sph', motype='ghf', aosym='s1', comp=None, verbose=logger.debug, **kwargs): - erifile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + erifile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) general(mol, mo_coeffs, erifile.name, dataname='eri_mo', intor=intor, motype=motype, aosym=aosym, comp=comp, verbose=verbose) diff --git a/pyscf/ao2mo/r_outcore.py b/pyscf/ao2mo/r_outcore.py index 2c77b5f89a..66b4ce7ae6 100644 --- a/pyscf/ao2mo/r_outcore.py +++ b/pyscf/ao2mo/r_outcore.py @@ -14,7 +14,6 @@ # limitations under the License. -import tempfile import numpy import h5py from pyscf import lib @@ -103,7 +102,7 @@ def general(mol, mo_coeffs, erifile, dataname='eri_mo', float(nij_pair)*nkl_pair*comp, nij_pair*nkl_pair*comp*16/1e6) # transform e1 - swapfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + swapfile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) half_e1(mol, mo_coeffs, swapfile.name, intor, aosym, comp, max_memory, ioblk_size, log) @@ -253,7 +252,7 @@ def half_e1(mol, mo_coeffs, swapfile, def full_iofree(mol, mo_coeff, intor='int2e_spinor', aosym='s4', comp=None, verbose=logger.WARN, **kwargs): - erifile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + erifile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) general(mol, (mo_coeff,)*4, erifile.name, dataname='eri_mo', intor=intor, aosym=aosym, comp=comp, verbose=verbose) @@ -262,7 +261,7 @@ def full_iofree(mol, mo_coeff, intor='int2e_spinor', aosym='s4', comp=None, def general_iofree(mol, mo_coeffs, intor='int2e_spinor', aosym='s4', comp=None, verbose=logger.WARN, **kwargs): - erifile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + erifile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) general(mol, mo_coeffs, erifile.name, dataname='eri_mo', intor=intor, aosym=aosym, comp=comp, verbose=verbose) diff --git a/pyscf/ao2mo/semi_incore.py b/pyscf/ao2mo/semi_incore.py index f5a488b838..11de1fda6c 100644 --- a/pyscf/ao2mo/semi_incore.py +++ b/pyscf/ao2mo/semi_incore.py @@ -290,7 +290,7 @@ def save(start, stop, buf): onnn2 = ao2mo.incore.general(mf._eri, (orbo,mo_coeff,mo_coeff,mo_coeff)) print(' Time elapsed (s): ',logger.perf_counter() - start_time) - tmpfile2 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + tmpfile2 = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) print('\n\nCustom outcore transformation ...') orbo = mo_coeff[:,:nocc] diff --git a/pyscf/ao2mo/test/test_incore.py b/pyscf/ao2mo/test/test_incore.py index befa0a484c..430c7e00c0 100644 --- a/pyscf/ao2mo/test/test_incore.py +++ b/pyscf/ao2mo/test/test_incore.py @@ -16,7 +16,6 @@ import ctypes import unittest from functools import reduce -import tempfile import numpy import h5py from pyscf import lib diff --git a/pyscf/ao2mo/test/test_init.py b/pyscf/ao2mo/test/test_init.py index 21641746cd..66322ef869 100644 --- a/pyscf/ao2mo/test/test_init.py +++ b/pyscf/ao2mo/test/test_init.py @@ -16,7 +16,6 @@ import ctypes import unittest from functools import reduce -import tempfile import numpy import h5py from pyscf import lib @@ -80,7 +79,7 @@ def test_full(self): with ao2mo.load(h5file, 'eri') as eri: self.assertEqual(eri.shape, (10,10)) - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() ao2mo.kernel(mol, mo, ftmp, intor='int2e', dataname='eri') with ao2mo.load(ftmp, 'eri') as eri: self.assertEqual(eri.shape, (10,10)) @@ -97,7 +96,7 @@ def test_general(self): ao2mo.kernel(mol, [mo]*4, erifile=h5file, intor='int2e', dataname='eri') self.assertEqual(h5file['eri'].shape, (10,10)) - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() ao2mo.kernel(mol, [mo]*4, ftmp, intor='int2e', dataname='eri') with ao2mo.load(ftmp.name, 'eri') as eri: self.assertEqual(eri.shape, (10,10)) diff --git a/pyscf/ao2mo/test/test_nrr_outcore.py b/pyscf/ao2mo/test/test_nrr_outcore.py index a81e275b34..2f2c16218a 100644 --- a/pyscf/ao2mo/test/test_nrr_outcore.py +++ b/pyscf/ao2mo/test/test_nrr_outcore.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile import numpy as np import h5py from pyscf import lib diff --git a/pyscf/ao2mo/test/test_outcore.py b/pyscf/ao2mo/test/test_outcore.py index 6dcd76644a..dc6e3a62da 100644 --- a/pyscf/ao2mo/test/test_outcore.py +++ b/pyscf/ao2mo/test/test_outcore.py @@ -16,7 +16,6 @@ import ctypes import unittest from functools import reduce -import tempfile import numpy import h5py from pyscf import lib @@ -47,7 +46,7 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_nroutcore_grad(self): - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) erifile = ftmp.name eri_ao = mol.intor('int2e_ip1', aosym='s1').reshape(3,nao,nao,nao,nao) eriref = numpy.einsum('npjkl,pi->nijkl', eri_ao, mo) @@ -64,7 +63,7 @@ def test_nroutcore_grad(self): self.assertTrue(numpy.allclose(eri1, eriref)) def test_nroutcore_eri(self): - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) erifile = ftmp.name eri_ao = ao2mo.restore(1, mol.intor('int2e', aosym='s2kl'), nao) eriref = numpy.einsum('pjkl,pi->ijkl', eri_ao, mo) diff --git a/pyscf/ao2mo/test/test_r_outcore.py b/pyscf/ao2mo/test/test_r_outcore.py index 271968f950..66d9263da2 100644 --- a/pyscf/ao2mo/test/test_r_outcore.py +++ b/pyscf/ao2mo/test/test_r_outcore.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile import numpy from pyscf import lib from pyscf import gto @@ -51,7 +50,7 @@ def test_r_outcore_eri(self): numpy.random.seed(1) mo = numpy.random.random((n2c,n2c)) + numpy.random.random((n2c,n2c))*1j eriref = trans(eri0, [mo]*4) - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() ao2mo.kernel(mol, mo, erifile=ftmp.name, intor='int2e_spinor', max_memory=10, ioblk_size=5) with ao2mo.load(ftmp) as eri1: diff --git a/pyscf/ao2mo/test/test_semi_incore.py b/pyscf/ao2mo/test/test_semi_incore.py index 8271c7b051..0fc95fbd01 100644 --- a/pyscf/ao2mo/test/test_semi_incore.py +++ b/pyscf/ao2mo/test/test_semi_incore.py @@ -16,7 +16,6 @@ import ctypes import unittest from functools import reduce -import tempfile import numpy import h5py from pyscf import lib @@ -46,7 +45,7 @@ def test_general(self): mo = numpy.random.random((nao,nmo)) eriref = ao2mo.incore.full(eri, mo) - tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + tmpfile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) io_size = nao**2*4e-5 semi_incore.general(eri, [mo]*4, tmpfile.name, ioblk_size=io_size) @@ -67,7 +66,7 @@ def test_general_complex(self): mo.conj(), mo, mo.conj(), mo) eriref = eriref.reshape(12**2,12**2) - tmpfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + tmpfile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) io_size = nao**2*4e-5 semi_incore.general(eri, [mo]*4, tmpfile.name, ioblk_size=io_size) diff --git a/pyscf/cc/gccsd.py b/pyscf/cc/gccsd.py index 303b387ae3..934ebaadf0 100644 --- a/pyscf/cc/gccsd.py +++ b/pyscf/cc/gccsd.py @@ -127,7 +127,7 @@ def init_amps(self, eris=None): eia = mo_e[:nocc,None] - mo_e[None,nocc:] eijab = lib.direct_sum('ia,jb->ijab', eia, eia) t1 = eris.fock[:nocc,nocc:] / eia - eris_oovv = np.array(eris.oovv) + eris_oovv = np.asarray(eris.oovv) t2 = eris_oovv.conj() / eijab self.emp2 = 0.25*einsum('ijab,ijab', t2, eris_oovv).real logger.info(self, 'Init t2, MP2 energy = %.15g', self.emp2) @@ -346,20 +346,23 @@ def _make_eris_incore(mycc, mo_coeff=None, ao2mofn=None): eris._common_init_(mycc, mo_coeff) nocc = eris.nocc nao, nmo = eris.mo_coeff.shape + nao = nao // 2 if callable(ao2mofn): eri = ao2mofn(eris.mo_coeff).reshape([nmo]*4) - else: - assert (eris.mo_coeff.dtype == np.double) - mo_a = eris.mo_coeff[:nao//2] - mo_b = eris.mo_coeff[nao//2:] + elif eris.mo_coeff.dtype == np.float64: + mo_a = eris.mo_coeff[:nao] + mo_b = eris.mo_coeff[nao:] orbspin = eris.orbspin if orbspin is None: eri = ao2mo.kernel(mycc._scf._eri, mo_a) eri += ao2mo.kernel(mycc._scf._eri, mo_b) eri1 = ao2mo.kernel(mycc._scf._eri, (mo_a,mo_a,mo_b,mo_b)) eri += eri1 - eri += eri1.T + if eri1.ndim == 2: + eri += eri1.T + else: + eri += eri1.transpose(2,3,0,1) else: mo = mo_a + mo_b eri = ao2mo.kernel(mycc._scf._eri, mo) @@ -373,6 +376,16 @@ def _make_eris_incore(mycc, mo_coeff=None, ao2mofn=None): if eri.dtype == np.double: eri = ao2mo.restore(1, eri, nmo) + else: + assert eris.mo_coeff.dtype == np.complex128 + mo_a = eris.mo_coeff[:nao] + mo_b = eris.mo_coeff[nao:] + eri_ao = ao2mo.restore(1, mycc._scf._eri, nao) + eri = ao2mo.kernel(eri_ao, mo_a) + eri += ao2mo.kernel(eri_ao, mo_b) + eri1 = ao2mo.kernel(eri_ao, (mo_a,mo_a,mo_b,mo_b)) + eri += eri1 + eri += eri1.transpose(2,3,0,1) eri = eri.reshape(nmo,nmo,nmo,nmo) eri = eri.transpose(0,2,1,3) - eri.transpose(0,2,3,1) @@ -395,30 +408,72 @@ def _make_eris_outcore(mycc, mo_coeff=None): eris = _PhysicistsERIs() eris._common_init_(mycc, mo_coeff) nocc = eris.nocc - nao, nmo = eris.mo_coeff.shape + mo = eris.mo_coeff + nao, nmo = mo.shape + nao = nao // 2 nvir = nmo - nocc - assert (eris.mo_coeff.dtype == np.double) - mo_a = eris.mo_coeff[:nao//2] - mo_b = eris.mo_coeff[nao//2:] + mo_a = mo[:nao] + mo_b = mo[nao:] orbspin = eris.orbspin feri = eris.feri = lib.H5TmpFile() - dtype = np.result_type(eris.mo_coeff).char - eris.oooo = feri.create_dataset('oooo', (nocc,nocc,nocc,nocc), dtype) - eris.ooov = feri.create_dataset('ooov', (nocc,nocc,nocc,nvir), dtype) - eris.oovv = feri.create_dataset('oovv', (nocc,nocc,nvir,nvir), dtype) - eris.ovov = feri.create_dataset('ovov', (nocc,nvir,nocc,nvir), dtype) - eris.ovvo = feri.create_dataset('ovvo', (nocc,nvir,nvir,nocc), dtype) - eris.ovvv = feri.create_dataset('ovvv', (nocc,nvir,nvir,nvir), dtype) - - if orbspin is None: + dtype = np.result_type(eris.mo_coeff) + eris.oooo = feri.create_dataset('oooo', (nocc,nocc,nocc,nocc), dtype.char) + eris.ooov = feri.create_dataset('ooov', (nocc,nocc,nocc,nvir), dtype.char) + eris.oovv = feri.create_dataset('oovv', (nocc,nocc,nvir,nvir), dtype.char) + eris.ovov = feri.create_dataset('ovov', (nocc,nvir,nocc,nvir), dtype.char) + eris.ovvo = feri.create_dataset('ovvo', (nocc,nvir,nvir,nocc), dtype.char) + eris.ovvv = feri.create_dataset('ovvv', (nocc,nvir,nvir,nvir), dtype.char) + + if mo.dtype == np.complex128: + max_memory = mycc.max_memory-lib.current_memory()[0] + blksize = min(nocc, max(2, int(max_memory*1e6/dtype.itemsize/(nmo**3*2)))) + max_memory = max(MEMORYMIN, max_memory) + + orbo = mo[:,:nocc] + orbv = mo[:,nocc:] + fswap = lib.H5TmpFile() + ao2mo.kernel(mycc.mol, (orbo,mo,mo,mo), fswap, 'eri_mo', + max_memory=max_memory, verbose=log) + + for p0, p1 in lib.prange(0, nocc, blksize): + tmp = np.asarray(fswap['eri_mo'][p0*nmo:p1*nmo]) + tmp = tmp.reshape(p1-p0, nmo, nmo, nmo) + eris.oooo[p0:p1] = (tmp[:,:nocc,:nocc,:nocc].transpose(0,2,1,3) - + tmp[:,:nocc,:nocc,:nocc].transpose(0,2,3,1)) + eris.ooov[p0:p1] = (tmp[:,:nocc,:nocc,nocc:].transpose(0,2,1,3) - + tmp[:,nocc:,:nocc,:nocc].transpose(0,2,3,1)) + eris.ovvv[p0:p1] = (tmp[:,nocc:,nocc:,nocc:].transpose(0,2,1,3) - + tmp[:,nocc:,nocc:,nocc:].transpose(0,2,3,1)) + eris.oovv[p0:p1] = (tmp[:,nocc:,:nocc,nocc:].transpose(0,2,1,3) - + tmp[:,nocc:,:nocc,nocc:].transpose(0,2,3,1)) + eris.ovov[p0:p1] = (tmp[:,:nocc,nocc:,nocc:].transpose(0,2,1,3) - + tmp[:,nocc:,nocc:,:nocc].transpose(0,2,3,1)) + eris.ovvo[p0:p1] = (tmp[:,nocc:,nocc:,:nocc].transpose(0,2,1,3) - + tmp[:,:nocc,nocc:,nocc:].transpose(0,2,3,1)) + tmp = None + fswap = None + cput0 = log.timer_debug1('transforming ovvv', *cput0) + + eris.vvvv = feri.create_dataset('vvvv', (nvir,nvir,nvir,nvir), dtype.char) + fswap = lib.H5TmpFile() + ao2mo.kernel(mycc.mol, orbv, fswap, 'vvvv', + max_memory=max_memory, verbose=log) + for p0, p1 in lib.prange(0, nvir, blksize): + tmp = np.asarray(fswap['vvvv'][p0*nvir:p1*nvir]) + tmp = tmp.reshape(p1-p0, nvir, nvir, nvir) + eris.vvvv[p0:p1] = tmp.transpose(0,2,1,3) - tmp.transpose(0,2,3,1) + tmp = None + cput0 = log.timer_debug1('transforming vvvv', *cput0) + + elif orbspin is None: orbo_a = mo_a[:,:nocc] orbv_a = mo_a[:,nocc:] orbo_b = mo_b[:,:nocc] orbv_b = mo_b[:,nocc:] max_memory = mycc.max_memory-lib.current_memory()[0] - blksize = min(nocc, max(2, int(max_memory*1e6/8/(nmo**3*2)))) + blksize = min(nocc, max(2, int(max_memory*1e6/dtype.itemsize/(nmo**3*2)))) max_memory = max(MEMORYMIN, max_memory) fswap = lib.H5TmpFile() @@ -452,7 +507,7 @@ def _make_eris_outcore(mycc, mo_coeff=None): tmp = None cput0 = log.timer_debug1('transforming ovvv', *cput0) - eris.vvvv = feri.create_dataset('vvvv', (nvir,nvir,nvir,nvir), dtype) + eris.vvvv = feri.create_dataset('vvvv', (nvir,nvir,nvir,nvir), dtype.char) tril2sq = lib.square_mat_in_trilu_indices(nvir) fswap = lib.H5TmpFile() ao2mo.kernel(mycc.mol, (orbv_a,orbv_a,orbv_a,orbv_a), fswap, 'aaaa', @@ -491,7 +546,7 @@ def _make_eris_outcore(mycc, mo_coeff=None): orbv = mo[:,nocc:] max_memory = mycc.max_memory-lib.current_memory()[0] - blksize = min(nocc, max(2, int(max_memory*1e6/8/(nmo**3*2)))) + blksize = min(nocc, max(2, int(max_memory*1e6/dtype.itemsize/(nmo**3*2)))) max_memory = max(MEMORYMIN, max_memory) fswap = lib.H5TmpFile() @@ -520,7 +575,7 @@ def _make_eris_outcore(mycc, mo_coeff=None): tmp = None cput0 = log.timer_debug1('transforming ovvv', *cput0) - eris.vvvv = feri.create_dataset('vvvv', (nvir,nvir,nvir,nvir), dtype) + eris.vvvv = feri.create_dataset('vvvv', (nvir,nvir,nvir,nvir), dtype.char) sym_forbid = (orbspin[nocc:,None]!=orbspin[nocc:])[np.tril_indices(nvir)] tril2sq = lib.square_mat_in_trilu_indices(nvir) diff --git a/pyscf/cc/rccsdt.py b/pyscf/cc/rccsdt.py index 7fe8935828..a1204ba066 100644 --- a/pyscf/cc/rccsdt.py +++ b/pyscf/cc/rccsdt.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ from pyscf.ao2mo import _ao2mo from pyscf.lib import logger from pyscf.mp.mp2 import get_nocc, get_nmo, get_frozen_mask, get_e_hf, _mo_without_core -from pyscf.cc import ccsd, _ccsd +from pyscf.cc import ccsd from pyscf import __config__ @@ -121,23 +121,6 @@ def unpack_t3_tri2block_(t3, t3_blk, map_, mask, i0, i1, j0, j1, k0, k1, nocc, n ) return t3_blk -def unpack_t3_tri2single_pair_(t3, t3_blk, map_, mask, i0, j0, k0, nocc, nvir): - assert t3.dtype == np.float64 and t3_blk.dtype == np.float64 - assert map_.dtype == np.int64 and mask.dtype == np.bool_ - t3 = np.ascontiguousarray(t3) - t3_blk = np.ascontiguousarray(t3_blk) - map_ = np.ascontiguousarray(map_) - mask = np.ascontiguousarray(mask) - _libccsdt.unpack_t3_tri2single_pair_( - t3.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), - t3_blk.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), - map_.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), - mask.ctypes.data_as(ctypes.POINTER(ctypes.c_bool)), - ctypes.c_int64(i0), ctypes.c_int64(j0), ctypes.c_int64(k0), - ctypes.c_int64(nocc), ctypes.c_int64(nvir), - ) - return t3_blk - def unpack_t3_tri2block_pair_(t3, t3_blk, map_, mask, i0, i1, j0, j1, k0, k1, nocc, nvir, blk_i, blk_j, blk_k): assert t3.dtype == np.float64 and t3_blk.dtype == np.float64 assert map_.dtype == np.int64 and mask.dtype == np.bool_ @@ -177,22 +160,6 @@ def accumulate_t3_block2tri_(t3, t3_blk, map_, i0, i1, j0, j1, k0, k1, nocc, nvi ) return t3 -def accumulate_t3_single2tri_(t3, t3_blk, map_, i0, j0, k0, nocc, nvir, alpha, beta): - assert t3.dtype == np.float64 and t3_blk.dtype == np.float64 - assert map_.dtype == np.int64 - t3 = np.ascontiguousarray(t3) - t3_blk = np.ascontiguousarray(t3_blk) - map_ = np.ascontiguousarray(map_) - _libccsdt.accumulate_t3_single2tri_( - t3.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), - t3_blk.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), - map_.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), - ctypes.c_int64(i0), ctypes.c_int64(j0), ctypes.c_int64(k0), - ctypes.c_int64(nocc), ctypes.c_int64(nvir), - ctypes.c_double(alpha), ctypes.c_double(beta) - ) - return t3 - def _unpack_t3_(mycc, t3, t3_blk, i0, i1, j0, j1, k0, k1, blksize0=None, blksize1=None, blksize2=None): r'''Unpack triangular-stored T3 amplitudes into the block `t3_full[i0:i1, j0:j1, k0:k1, :, :, :]`''' if blksize0 is None: blksize0 = mycc.blksize @@ -202,17 +169,9 @@ def _unpack_t3_(mycc, t3, t3_blk, i0, i1, j0, j1, k0, k1, blksize0=None, blksize i0, i1, j0, j1, k0, k1, mycc.nocc, mycc.nmo - mycc.nocc, blksize0, blksize1, blksize2) return t3_blk -def _unpack_t3_s_pair_(mycc, t3, t3_blk, i0, j0, k0): - r'''Unpack triangular-stored T3 amplitudes into the block - `t3_full[i0, j0, k0, :, :, :] + t3_full[j0, i0, k0, :, :, :].transpose(1, 0, 2)` - ''' - unpack_t3_tri2single_pair_(t3, t3_blk, mycc.tri2block_map, mycc.tri2block_mask, - i0, j0, k0, mycc.nocc, mycc.nmo - mycc.nocc) - return t3_blk - def _unpack_t3_pair_(mycc, t3, t3_blk, i0, i1, j0, j1, k0, k1, blksize0=None, blksize1=None, blksize2=None): r'''Unpack triangular-stored T3 amplitudes into the block - `t3_full[i0:i1, j0:j1, k0:k1, :, :, :] + t3_full[k0:k1, j0:j1, i0:i1, :, :, :].transpose(0, 1, 2, 3, 5, 4)` + `t3_full[i0:i1, j0:j1, k0:k1, :, :, :] + t3_full[i0:i1, j0:j1, k0:k1, :, :, :].transpose(0, 1, 2, 4, 5, 3)` ''' if blksize0 is None: blksize0 = mycc.blksize_oovv if blksize1 is None: blksize1 = mycc.nocc @@ -230,11 +189,6 @@ def _accumulate_t3_(mycc, t3, t3_blk, i0, i1, j0, j1, k0, k1, mycc.nocc, mycc.nmo - mycc.nocc, blksize0, blksize1, blksize2, alpha=alpha, beta=beta) return t3 -def _accumulate_t3_s_(mycc, t3, t3_blk, i0, j0, k0, alpha=1.0, beta=0.0): - accumulate_t3_single2tri_(t3, t3_blk, mycc.tri2block_map, i0, j0, k0, - mycc.nocc, mycc.nmo - mycc.nocc, alpha=alpha, beta=beta) - return t3 - def setup_tri2block_rhf(mycc): '''Build the map used to unpack and accumulate between the triangular-stored T3 and the block of full T3 tensor.''' nx = lambda n, order: prod(n + i for i in range(order)) // factorial(order) @@ -463,12 +417,13 @@ def intermediates_t1t2(mycc, imds, t2): einsum('lkdc,ljcd->kj', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=F_oo, alpha=-1.0, beta=1.0) W_oooo = t1_eris[:nocc, :nocc, :nocc, :nocc].copy() einsum('klcd,ijcd->klij', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=W_oooo, alpha=1.0, beta=1.0) - W_ovvo = - t1_eris[:nocc, nocc:, nocc:, :nocc] - einsum('klcd,ilad->kaci', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=W_ovvo, alpha=-1.0, beta=1.0) - einsum('kldc,ilad->kaci', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=W_ovvo, alpha=0.5, beta=1.0) - einsum('klcd,ilda->kaci', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=W_ovvo, alpha=0.5, beta=1.0) - W_ovov = - t1_eris[:nocc, nocc:, :nocc, nocc:] - einsum('kldc,liad->kaic', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=W_ovov, alpha=0.5, beta=1.0) + c_t2 = 2.0 * t2 - t2.transpose(0, 1, 3, 2) + W_ovvo = 2.0 * t1_eris[:nocc, nocc:, nocc:, :nocc] - t1_eris[:nocc, nocc:, :nocc, nocc:].transpose(0, 1, 3, 2) + einsum('mled,miea->ladi', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t2, out=W_ovvo, alpha=1.0, beta=1.0) + einsum('mlde,miea->ladi', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t2, out=W_ovvo, alpha=-0.5, beta=1.0) + c_t2 = None + W_ovov = t1_eris[:nocc, nocc:, :nocc, nocc:].copy() + einsum('mlde,imea->laid', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=W_ovov, alpha=-0.5, beta=1.0) imds.F_vv, imds.F_oo, imds.W_oooo, imds.W_ovvo, imds.W_ovov = F_vv, F_oo, W_oooo, W_ovvo, W_ovov return imds @@ -495,12 +450,10 @@ def compute_r1r2(mycc, imds, t2): einsum("kj,ikab->ijab", F_oo, t2, out=r2, alpha=-1.0, beta=1.0) einsum("abcd,ijcd->ijab", t1_eris[nocc:, nocc:, nocc:, nocc:], t2, out=r2, alpha=0.5, beta=1.0) einsum("klij,klab->ijab", W_oooo, t2, out=r2, alpha=0.5, beta=1.0) - einsum("kajc,ikcb->ijab", W_ovov, t2, out=r2, alpha=1.0, beta=1.0) - einsum("kaci,kjcb->ijab", W_ovvo, t2, out=r2, alpha=-2.0, beta=1.0) - einsum("kaic,kjcb->ijab", W_ovov, t2, out=r2, alpha=1.0, beta=1.0) - einsum("kaci,jkcb->ijab", W_ovvo, t2, out=r2, alpha=1.0, beta=1.0) - W_ovvo = imds.W_ovvo = None - W_ovov = imds.W_ovov = None + einsum("kaci,kjcb->ijab", W_ovvo, c_t2, out=r2, alpha=0.5, beta=1.0) + einsum("kaic,jkcb->ijab", W_ovov, t2, out=r2, alpha=-0.5, beta=1.0) + einsum("kbic,jkca->ijab", W_ovov, t2, out=r2, alpha=-1.0, beta=1.0) + c_t2 = None return r1, r2 def r1r2_add_t3_tri_(mycc, imds, r1, r2, t3): @@ -523,9 +476,7 @@ def r1r2_add_t3_tri_(mycc, imds, r1, r2, t3): t3_spin_summation_inplace_(t3_tmp, blksize**3, nvir, "P3_422", 1.0, 0.0) einsum('jkbc,ijkabc->ia', t1_eris[j0:j1, k0:k1, nocc:, nocc:], t3_tmp[:bi, :bj, :bk], out=r1[i0:i1, :], alpha=0.5, beta=1.0) - t3_tmp = None - t3_tmp = np.empty((blksize,) * 3 + (nvir,) * 3, dtype=t3.dtype) for k0, k1 in lib.prange(0, nocc, blksize): bk = k1 - k0 for j0, j1 in lib.prange(0, nocc, blksize): @@ -535,7 +486,7 @@ def r1r2_add_t3_tri_(mycc, imds, r1, r2, t3): _unpack_t3_(mycc, t3, t3_tmp, k0, k1, i0, i1, j0, j1) t3_spin_summation_inplace_(t3_tmp, blksize**3, nvir, "P3_201", 1.0, 0.0) einsum("kc,kijcab->ijab", t1_fock[k0:k1, nocc:], t3_tmp[:bk, :bi, :bj], - out=r2[i0:i1, j0:j1, :, :], alpha=0.5, beta=1.0) + out=r2[i0:i1, j0:j1, :, :], alpha=0.5, beta=1.0) einsum("bkcd,kijdac->ijab", t1_eris[nocc:, k0:k1, nocc:, nocc:], t3_tmp[:bk, :bi, :bj], out=r2[i0:i1, j0:j1, :, :], alpha=1.0, beta=1.0) einsum("jklc,kijcab->ilab", t1_eris[j0:j1, k0:k1, :nocc, nocc:], @@ -578,14 +529,12 @@ def intermediates_t3(mycc, imds, t2): einsum('lbde,jlea->abdj', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_vvvo, alpha=-1.0, beta=1.0) einsum('lmdj,lmab->abdj', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_vvvo, alpha=1.0, beta=1.0) - W_ovvo = (2.0 * t1_eris[:nocc, nocc:, nocc:, :nocc] - t1_eris[:nocc, nocc:, :nocc, nocc:].transpose(0, 1, 3, 2)) - einsum('mled,miea->ladi', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t2, out=W_ovvo, alpha=2.0, beta=1.0) - einsum('mlde,miea->ladi', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t2, out=W_ovvo, alpha=-1.0, beta=1.0) + einsum('mled,miea->ladi', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t2, out=imds.W_ovvo, alpha=1.0, beta=1.0) + einsum('mlde,miea->ladi', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t2, out=imds.W_ovvo, alpha=-0.5, beta=1.0) c_t2 = None - W_ovov = t1_eris[:nocc, nocc:, :nocc, nocc:].copy() - einsum('mlde,imea->laid', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=W_ovov, alpha=-1.0, beta=1.0) - imds.W_vooo, imds.W_ovvo, imds.W_ovov, imds.W_vvvo, imds.W_vvvv = W_vooo, W_ovvo, W_ovov, W_vvvo, W_vvvv + einsum('mlde,imea->laid', t1_eris[:nocc, :nocc, nocc:, nocc:], t2, out=imds.W_ovov, alpha=-0.5, beta=1.0) + imds.W_vooo, imds.W_vvvo, imds.W_vvvv = W_vooo, W_vvvo, W_vvvv return imds def intermediates_t3_add_t3_tri(mycc, imds, t3): @@ -673,10 +622,8 @@ def compute_r3_tri(mycc, imds, t2, t3): _unpack_t3_(mycc, t3, t3_tmp, i0, i1, j0, j1, k0, k1) einsum('ad,ijkdbc->ijkabc', F_vv, t3_tmp[:bi, :bj, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=1.0, beta=1.0) - _unpack_t3_(mycc, t3, t3_tmp, j0, j1, i0, i1, k0, k1) - einsum('bd,jikdac->ijkabc', F_vv, t3_tmp[:bj, :bi, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=1.0, beta=1.0) - _unpack_t3_(mycc, t3, t3_tmp, k0, k1, j0, j1, i0, i1) - einsum('cd,kjidba->ijkabc', F_vv, t3_tmp[:bk, :bj, :bi], out=r3_tmp[:bi, :bj, :bk], alpha=1.0, beta=1.0) + einsum('bd,ijkadc->ijkabc', F_vv, t3_tmp[:bi, :bj, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=1.0, beta=1.0) + einsum('cd,ijkabd->ijkabc', F_vv, t3_tmp[:bi, :bj, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=1.0, beta=1.0) _accumulate_t3_(mycc, r3, r3_tmp, i0, i1, j0, j1, k0, k1, alpha=1.0, beta=1.0) time2 = log.timer_debug1('t3: iter: W_vvvo, W_vooo, F_vv [%3d, %3d]:'%(k0, k1), *time2) @@ -740,8 +687,7 @@ def compute_r3_tri(mycc, imds, t2, t3): _unpack_t3_(mycc, t3, t3_tmp, j0, j1, 0, nocc, k0, k1, blksize_oovv, nocc, blksize_oovv) einsum('lbid,jlkdac->ijkabc', W_ovov[:, :, i0:i1, :], t3_tmp[:bj, :, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=-1.0, beta=0.0) - _unpack_t3_(mycc, t3, t3_tmp, k0, k1, 0, nocc, j0, j1, blksize_oovv, nocc, blksize_oovv) - einsum('lcid,kljdab->ijkabc', W_ovov[:, :, i0:i1, :], t3_tmp[:bk, :, :bj], + einsum('lcid,jlkbad->ijkabc', W_ovov[:, :, i0:i1, :], t3_tmp[:bj, :, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=-1.0, beta=1.0) _unpack_t3_pair_(mycc, t3, t3_tmp, j0, j1, 0, nocc, k0, k1) einsum('laid,jlkdbc->ijkabc', W_ovov[:, :, i0:i1, :], t3_tmp[:bj, :, :bk], @@ -750,8 +696,7 @@ def compute_r3_tri(mycc, imds, t2, t3): _unpack_t3_(mycc, t3, t3_tmp, i0, i1, 0, nocc, k0, k1, blksize_oovv, nocc, blksize_oovv) einsum('lajd,ilkdbc->ijkabc', W_ovov[:, :, j0:j1, :], t3_tmp[:bi, :, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=-1.0, beta=1.0) - _unpack_t3_(mycc, t3, t3_tmp, k0, k1, 0, nocc, i0, i1, blksize_oovv, nocc, blksize_oovv) - einsum('lcjd,klidba->ijkabc', W_ovov[:, :, j0:j1, :], t3_tmp[:bk, :, :bi], + einsum('lcjd,ilkabd->ijkabc', W_ovov[:, :, j0:j1, :], t3_tmp[:bi, :, :bk], out=r3_tmp[:bi, :bj, :bk], alpha=-1.0, beta=1.0) _unpack_t3_pair_(mycc, t3, t3_tmp, i0, i1, 0, nocc, k0, k1) einsum('lbjd,ilkdac->ijkabc', W_ovov[:, :, j0:j1, :], t3_tmp[:bi, :, :bk], @@ -760,8 +705,7 @@ def compute_r3_tri(mycc, imds, t2, t3): _unpack_t3_(mycc, t3, t3_tmp, i0, i1, 0, nocc, j0, j1, blksize_oovv, nocc, blksize_oovv) einsum('lakd,iljdcb->ijkabc', W_ovov[:, :, k0:k1, :], t3_tmp[:bi, :, :bj], out=r3_tmp[:bi, :bj, :bk], alpha=-1.0, beta=1.0) - _unpack_t3_(mycc, t3, t3_tmp, j0, j1, 0, nocc, i0, i1, blksize_oovv, nocc, blksize_oovv) - einsum('lbkd,jlidca->ijkabc', W_ovov[:, :, k0:k1, :], t3_tmp[:bj, :, :bi], + einsum('lbkd,iljacd->ijkabc', W_ovov[:, :, k0:k1, :], t3_tmp[:bi, :, :bj], out=r3_tmp[:bi, :bj, :bk], alpha=-1.0, beta=1.0) _unpack_t3_pair_(mycc, t3, t3_tmp, i0, i1, 0, nocc, j0, j1) einsum('lckd,iljdab->ijkabc', W_ovov[:, :, k0:k1, :], t3_tmp[:bi, :, :bj], @@ -803,22 +747,16 @@ def compute_r3_tri(mycc, imds, t2, t3): W_oooo = imds.W_oooo = None time1 = log.timer_debug1('t3: W_oooo * t3', *time1) - t3_tmp_s = np.empty((nvir, nvir, nvir), dtype=t3.dtype) - r3_tmp_s = np.empty((nvir, nvir, nvir), dtype=t3.dtype) time2 = logger.process_clock(), logger.perf_counter() - for k0 in range(nocc): - for j0 in range(k0 + 1): - for i0 in range(j0 + 1): - _unpack_t3_s_pair_(mycc, t3, t3_tmp_s, i0, j0, k0) - einsum('abde,dec->abc', W_vvvv, t3_tmp_s, out=r3_tmp_s, alpha=0.5, beta=0.0) - _unpack_t3_s_pair_(mycc, t3, t3_tmp_s, i0, k0, j0) - einsum('acde,deb->abc', W_vvvv, t3_tmp_s, out=r3_tmp_s, alpha=0.5, beta=1.0) - _unpack_t3_s_pair_(mycc, t3, t3_tmp_s, j0, k0, i0) - einsum('bcde,dea->abc', W_vvvv, t3_tmp_s, out=r3_tmp_s, alpha=0.5, beta=1.0) - _accumulate_t3_s_(mycc, r3, r3_tmp_s, i0, j0, k0, alpha=1.0, beta=1.0) - time2 = log.timer_debug1('t3: iter: W_vvvv %3d:'%k0, *time2) - t3_tmp_s = None - r3_tmp_s = None + index = 0 + for i0 in range(nocc): + for j0 in range(i0, nocc): + for k0 in range(j0, nocc): + einsum('abde,dec->abc', W_vvvv, t3[index], out=r3[index], alpha=1.0, beta=1.0) + einsum('acde,dbe->abc', W_vvvv, t3[index], out=r3[index], alpha=1.0, beta=1.0) + einsum('bcde,ade->abc', W_vvvv, t3[index], out=r3[index], alpha=1.0, beta=1.0) + index += 1 + time2 = log.timer_debug1('t3: iter: W_vvvv %3d:'%i0, *time2) W_vvvv = imds.W_vvvv = None time1 = log.timer_debug1('t3: W_vvvv * t3', *time1) return r3 @@ -869,15 +807,9 @@ def update_amps_rccsdt_tri_(mycc, tamps, eris): # symmetrization r2 += r2.transpose(1, 0, 3, 2) time1 = log.timer_debug1('t1t2: symmetrize r2', *time1) - # divide by eijkabc + # divide by eijab r1r2_divide_e_(mycc, r1, r2, mo_energy) time1 = log.timer_debug1('t1t2: divide r1 & r2 by eia & eijab', *time1) - - res_norm = [np.linalg.norm(r1), np.linalg.norm(r2)] - - t1 += r1 - t2 += r2 - time1 = log.timer_debug1('t1t2: update t1 & t2', *time1) time0 = log.timer_debug1('t1t2 total', *time0) # t3 @@ -897,11 +829,13 @@ def update_amps_rccsdt_tri_(mycc, tamps, eris): r3_tri_divide_e_(mycc, r3, mo_energy) time1 = log.timer_debug1('t3: divide r3 by eijkabc', *time1) - res_norm.append(np.linalg.norm(r3)) + res_norm = [np.linalg.norm(r1), np.linalg.norm(r2), np.linalg.norm(r3)] + t1 += r1 + t2 += r2 t3 += r3 - r3 = None - time1 = log.timer_debug1('t3: update t3', *time1) + r1, r2, r3 = None, None, None + time1 = log.timer_debug1('t3: update t1, t2, t3', *time1) time0 = log.timer_debug1('t3 total', *time0) return res_norm @@ -1087,9 +1021,9 @@ def restore_from_diis_(mycc, diis_file, inplace=True): else: mycc.tamps[:cc_order - 1] = tamps if mycc.do_tri_max_t: - mycc.tamp[-1] = np.zeros((nx(nocc, cc_order),) + (nvir,) * cc_order, dtype=ccvec.dtype) + mycc.tamps[-1] = np.zeros((nx(nocc, cc_order),) + (nvir,) * cc_order, dtype=ccvec.dtype) else: - mycc.tamp[-1] = np.zeros((nocc,) * cc_order + (nvir,) * cc_order, dtype=ccvec.dtype) + mycc.tamps[-1] = np.zeros((nocc,) * cc_order + (nvir,) * cc_order, dtype=ccvec.dtype) if inplace: mycc.diis = adiis return mycc @@ -1337,8 +1271,8 @@ class RCCSDT(ccsd.CCSDBase): the same way as in CCSD. Additional attributes are: do_diis_max_t : bool - Whether to use DIIS to accelerate convergence. Note that enabling DIIS - will increase memory consumption. + Whether to use DIIS for the highest-order amplitudes to accelerate convergence. + Note that enabling DIIS will increase memory consumption. blksize, blksize_oovv, blksize_oooo : Batch sizes used to reduce the memory footprint during tensor contractions. einsum_backend : string @@ -1359,7 +1293,7 @@ class RCCSDT(ccsd.CCSDBase): T amplitudes t1[i,a], t2[i,j,a,b] (i,j in occ, a,b in virt) t3 : An array of shape (compressed_occ, nvir, nvir, nvir) for T3 amplitudes. - The occupied-oribtal dimension is stored in a compressed form for the + The occupied-orbital dimension is stored in a compressed form for the i <= j <= k index combinations. The compressed tensor can be expanded to the full tensor by self.tamps_tri2full(t3) tamps : @@ -1490,8 +1424,11 @@ def ccsdt(self, tamps=None, eris=None): self._finalize() return self.e_corr, self.tamps - def ccsdt_q(self, tamps, eris=None): - raise NotImplementedError + def ccsdt_q(self, tamps=None, eris=None): + from pyscf.cc import rccsdt_q + if tamps is None: tamps = self.tamps + if eris is None: eris = self.ao2mo(self.mo_coeff) + return rccsdt_q.kernel(self, eris, tamps, self.verbose) class _IMDS: @@ -1634,3 +1571,13 @@ def _make_df_eris_incore_rcc(mycc, mo_coeff=None): print('max(abs(t2 difference)) % .10e' % np.max(np.abs(mycc.t2 - mycc2.t2))) print('max(abs(t3_tri - t3_tri_from_t3_full)) % .10e' % np.max(np.abs(t3_tri - t3_tri_from_t3_full))) print('max(abs(t3_full - t3_full_from_t3_tri)) % .10e' % np.max(np.abs(t3_full - t3_full_from_t3_tri))) + + # ccsdt_q + # [Q] and (Q) energy correction + e_q_bracket, e_q_paren = mycc.ccsdt_q() + e_q_bracket2, e_q_paren2 = mycc2.ccsdt_q() + ref_e_q_bracket, ref_e_q_paren = -0.001412978902990858, -0.0017003938319959389 + print('[Q] difference % .10e' % (e_q_bracket - e_q_bracket2)) + print('(Q) difference % .10e' % (e_q_paren - e_q_paren2)) + print('[Q] difference from reference % .10e' % (e_q_bracket - ref_e_q_bracket)) + print('(Q) difference from reference % .10e' % (e_q_paren - ref_e_q_paren)) diff --git a/pyscf/cc/rccsdt_highm.py b/pyscf/cc/rccsdt_highm.py index 7ab9bb5e1a..32fc128970 100644 --- a/pyscf/cc/rccsdt_highm.py +++ b/pyscf/cc/rccsdt_highm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,13 +27,11 @@ ''' import numpy as np -import numpy import functools import ctypes from pyscf import lib from pyscf.lib import logger -from pyscf.mp.mp2 import get_nocc, get_nmo, get_frozen_mask, get_e_hf, _mo_without_core -from pyscf.cc import _ccsd, rccsdt +from pyscf.cc import rccsdt from pyscf.cc.rccsdt import (_einsum, t3_spin_summation_inplace_, update_t1_fock_eris, intermediates_t1t2, compute_r1r2, r1r2_divide_e_, intermediates_t3, _PhysicistsERIs, _IMDS) from pyscf import __config__ @@ -187,15 +185,9 @@ def update_amps_rccsdt_(mycc, tamps, eris): # symmetrization r2 += r2.transpose(1, 0, 3, 2) time1 = log.timer_debug1('t1t2: symmetrize r2', *time1) - # divide by eijkabc + # divide by eijab r1r2_divide_e_(mycc, r1, r2, mo_energy) time1 = log.timer_debug1('t1t2: divide r1 & r2 by eia & eijab', *time1) - - res_norm = [np.linalg.norm(r1), np.linalg.norm(r2)] - - t1 += r1 - t2 += r2 - time1 = log.timer_debug1('t1t2: update t1 & t2', *time1) time0 = log.timer_debug1('t1t2 total', *time0) intermediates_t3(mycc, imds, t2) @@ -215,11 +207,13 @@ def update_amps_rccsdt_(mycc, tamps, eris): r3_divide_e_(mycc, r3, mo_energy) time1 = log.timer_debug1('t3: divide r3 by eijkabc', *time1) - res_norm.append(np.linalg.norm(r3)) + res_norm = [np.linalg.norm(r1), np.linalg.norm(r2), np.linalg.norm(r3)] + t1 += r1 + t2 += r2 t3 += r3 - r3 = None - time1 = log.timer_debug1('t3: update t3', *time1) + r1, r2, r3 = None, None, None + time1 = log.timer_debug1('t3: update t1, t2, t3', *time1) time0 = log.timer_debug1('t3 total', *time0) return res_norm diff --git a/pyscf/cc/rccsdt_q.py b/pyscf/cc/rccsdt_q.py new file mode 100644 index 0000000000..5ee1dd44f9 --- /dev/null +++ b/pyscf/cc/rccsdt_q.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Yu Jin +# Huanchen Zhai +# + +''' +RHF-CCSDT(Q) for real integrals +''' + +import functools +import numpy as np +import ctypes +from pyscf import lib +from pyscf.lib import logger +from pyscf.cc.rccsdt import _einsum, _unpack_t3_, setup_tri2block_rhf +from pyscf.cc.rccsdtq import t4_add_ + + +_libccsdt = lib.load_library('libccsdt') + +def eijkl_division_single_(A, eocc, evir, i, j, k, l, nvir): + assert A.dtype == np.float64 and A.flags['C_CONTIGUOUS'], "A must be a contiguous float64 array" + assert eocc.dtype == np.float64 and eocc.flags['C_CONTIGUOUS'], "eocc must be a contiguous float64 array" + assert evir.dtype == np.float64 and evir.flags['C_CONTIGUOUS'], "evir must be a contiguous float64 array" + _libccsdt.eijkl_division_single_( + A.ctypes.data_as(ctypes.c_void_p), eocc.ctypes.data_as(ctypes.c_void_p), evir.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int64(i), ctypes.c_int64(j), ctypes.c_int64(k), ctypes.c_int64(l), ctypes.c_int64(nvir) + ) + return A + +def t4_spin_summation_single_inplace_(A, nvir, pattern, alpha=1.0, beta=0.0): + assert A.dtype == np.float64 and A.flags['C_CONTIGUOUS'], "A must be a contiguous float64 array" + pattern_c = pattern.encode('utf-8') + _libccsdt.t4_spin_summation_single_inplace_( + A.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int64(nvir), ctypes.c_char_p(pattern_c), + ctypes.c_double(alpha), ctypes.c_double(beta) + ) + return A + +def kernel(mycc, eris=None, tamps=None, verbose=logger.NOTE): + + time0 = logger.process_clock(), logger.perf_counter() + log = logger.new_logger(mycc, verbose) + + if tamps is not None: + if len(tamps) != 3: + raise ValueError("tamps should be a list of length 3, containing T1, T2, and T3 amplitudes.") + if mycc.do_tri_max_t and len(tamps[2].shape) != 4: + raise ValueError("CC object uses compact T3 amplitudes but the input T3 is full.") + if not mycc.do_tri_max_t and len(tamps[2].shape) == 4: + raise ValueError("CC object uses full T3 amplitudes but the input T3 is compact.") + else: + tamps = mycc.tamps + + if eris is None: + eris = mycc.ao2mo(mycc.mo_coeff) + + if mycc.do_tri_max_t and (not hasattr(mycc, "tri2block_map") or mycc.tri2block_map is None): + mycc.tri2block_map, mycc.tri2block_mask, mycc.tri2block_tp = setup_tri2block_rhf(mycc) + + name = mycc.__class__.__name__ + + backend = mycc.einsum_backend + einsum = functools.partial(_einsum, backend) + + t1 = tamps[0] + nocc, nvir = t1.shape[0], t1.shape[1] + + t2, t3 = tamps[1:3] + mo_energy = eris.mo_energy + e_occ = mo_energy[:nocc] + e_occ = np.ascontiguousarray(e_occ) + e_vir = mo_energy[nocc:] + e_vir = np.ascontiguousarray(e_vir) + + eris_ovvv = eris.pppp[:nocc, nocc:, nocc:, nocc:].copy() + eris_oovo = eris.pppp[:nocc, :nocc, nocc:, :nocc].copy() + eris_oovv = eris.pppp[:nocc, :nocc, nocc:, nocc:].copy() + eris_ovvo = eris.pppp[:nocc, nocc:, nocc:, :nocc].copy() + eris_ovov = eris.pppp[:nocc, nocc:, :nocc, nocc:].copy() + eris_vvvv = eris.pppp[nocc:, nocc:, nocc:, nocc:].copy() + eris_oooo = eris.pppp[:nocc, :nocc, :nocc, :nocc].copy() + + eris = None + + def get_t3_slice(t3_blk, i, j): + if mycc.do_tri_max_t: + _unpack_t3_(mycc, t3, t3_blk, i, i + 1, j, j + 1, 0, nocc, 1, 1, nocc) + else: + t3_blk[0, 0, :nocc] = t3[i, j, :nocc] + return t3_blk + + def compute_W_vvvvoo(W_vvvvoo_slice, j, k): + einsum('abef,fc->abce', eris_vvvv, t2[j, k], out=W_vvvvoo_slice, alpha=0.5, beta=0.0) + einsum('acef,fb->abce', eris_vvvv, t2[k, j], out=W_vvvvoo_slice, alpha=0.5, beta=1.0) + return W_vvvvoo_slice + + def compute_W_vvoooo(W_vvoooo_slice, i, j, k): + einsum('eam,be->abm', eris_ovvo[i], t2[j, k], out=W_vvoooo_slice, alpha=1.0, beta=0.0) + einsum('ebm,ae->abm', eris_ovvo[j], t2[i, k], out=W_vvoooo_slice, alpha=1.0, beta=1.0) + einsum('ema,be->abm', eris_ovov[k], t2[j, i], out=W_vvoooo_slice, alpha=1.0, beta=1.0) + einsum('emb,ae->abm', eris_ovov[k], t2[i, j], out=W_vvoooo_slice, alpha=1.0, beta=1.0) + einsum('mn,nab->abm', eris_oooo[k, i], t2[:, j], out=W_vvoooo_slice, alpha=-0.5, beta=1.0) + einsum('mn,nba->abm', eris_oooo[k, j], t2[:, i], out=W_vvoooo_slice, alpha=-0.5, beta=1.0) + return W_vvoooo_slice + + time1 = logger.process_clock(), logger.perf_counter() + t4_blk = np.empty((nvir,) * 4, dtype=t2.dtype) + z4_blk = np.empty_like(t4_blk) + t3_blk = np.empty((1,) * 2 + (nocc,) + (nvir,) * 3, dtype=t3.dtype) + W_vvoooo_slice = np.empty((nvir, nvir, nocc), dtype=t2.dtype) + W_vvvvoo_slice = np.empty((nvir, nvir, nvir, nvir), dtype=t2.dtype) + e_q_bracket = 0.0 + e_q_paren = 0.0 + for l in range(nocc): + for k in range(l + 1): + for j in range(k + 1): + for i in range(j + 1): + + if (i == j == k == l) or (i == j and j == k) or (j == k and k == l): + continue + elif i < j and j < k and k < l: + factor = 24.0 + elif (i == j and j < k and k < l) or (i < j and j == k and k < l) or (i < j and j < k and k == l): + factor = 12.0 + elif (i == j and j < k and k == l): + factor = 6.0 + + # z for (Q) + get_t3_slice(t3_blk, k, l) + einsum('am,mcdb->abcd', eris_oovo[i, j], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=0.0) + einsum('bm,mcda->abcd', eris_oovo[j, i], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('eba,cde->abcd', eris_ovvv[j], t3_blk[0, 0, i], out=z4_blk, alpha=1.0, beta=1.0) + einsum('eab,cde->abcd', eris_ovvv[i], t3_blk[0, 0, j], out=z4_blk, alpha=1.0, beta=1.0) + + get_t3_slice(t3_blk, j, l) + einsum('am,mbdc->abcd', eris_oovo[i, k], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('cm,mbda->abcd', eris_oovo[k, i], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('eca,bde->abcd', eris_ovvv[k], t3_blk[0, 0, i], out=z4_blk, alpha=1.0, beta=1.0) + einsum('eac,bde->abcd', eris_ovvv[i], t3_blk[0, 0, k], out=z4_blk, alpha=1.0, beta=1.0) + + get_t3_slice(t3_blk, j, k) + einsum('am,mbcd->abcd', eris_oovo[i, l], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('dm,mbca->abcd', eris_oovo[l, i], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('eda,bce->abcd', eris_ovvv[l], t3_blk[0, 0, i], out=z4_blk, alpha=1.0, beta=1.0) + einsum('ead,bce->abcd', eris_ovvv[i], t3_blk[0, 0, l], out=z4_blk, alpha=1.0, beta=1.0) + + get_t3_slice(t3_blk, i, l) + einsum('bm,madc->abcd', eris_oovo[j, k], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('cm,madb->abcd', eris_oovo[k, j], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('ecb,ade->abcd', eris_ovvv[k], t3_blk[0, 0, j], out=z4_blk, alpha=1.0, beta=1.0) + einsum('ebc,ade->abcd', eris_ovvv[j], t3_blk[0, 0, k], out=z4_blk, alpha=1.0, beta=1.0) + + get_t3_slice(t3_blk, i, k) + einsum('bm,macd->abcd', eris_oovo[j, l], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('dm,macb->abcd', eris_oovo[l, j], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('edb,ace->abcd', eris_ovvv[l], t3_blk[0, 0, j], out=z4_blk, alpha=1.0, beta=1.0) + einsum('ebd,ace->abcd', eris_ovvv[j], t3_blk[0, 0, l], out=z4_blk, alpha=1.0, beta=1.0) + + get_t3_slice(t3_blk, i, j) + einsum('cm,mabd->abcd', eris_oovo[k, l], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('dm,mabc->abcd', eris_oovo[l, k], t3_blk[0, 0], out=z4_blk, alpha=-1.0, beta=1.0) + einsum('edc,abe->abcd', eris_ovvv[l], t3_blk[0, 0, k], out=z4_blk, alpha=1.0, beta=1.0) + einsum('ecd,abe->abcd', eris_ovvv[k], t3_blk[0, 0, l], out=z4_blk, alpha=1.0, beta=1.0) + + # t4 + compute_W_vvoooo(W_vvoooo_slice, i, j, k) + einsum('abm,mdc->abcd', W_vvoooo_slice, t2[l], out=t4_blk, alpha=-1.0, beta=0.0) + compute_W_vvoooo(W_vvoooo_slice, i, j, l) + einsum('abm,mcd->abcd', W_vvoooo_slice, t2[k], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, i, k, j) + einsum('acm,mdb->abcd', W_vvoooo_slice, t2[l], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, i, k, l) + einsum('acm,mbd->abcd', W_vvoooo_slice, t2[j], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, i, l, j) + einsum('adm,mcb->abcd', W_vvoooo_slice, t2[k], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, i, l, k) + einsum('adm,mbc->abcd', W_vvoooo_slice, t2[j], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, j, k, i) + einsum('bcm,mda->abcd', W_vvoooo_slice, t2[l], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, j, k, l) + einsum('bcm,mad->abcd', W_vvoooo_slice, t2[i], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, j, l, i) + einsum('bdm,mca->abcd', W_vvoooo_slice, t2[k], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, j, l, k) + einsum('bdm,mac->abcd', W_vvoooo_slice, t2[i], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, k, l, i) + einsum('cdm,mba->abcd', W_vvoooo_slice, t2[j], out=t4_blk, alpha=-1.0, beta=1.0) + compute_W_vvoooo(W_vvoooo_slice, k, l, j) + einsum('cdm,mab->abcd', W_vvoooo_slice, t2[i], out=t4_blk, alpha=-1.0, beta=1.0) + + compute_W_vvvvoo(W_vvvvoo_slice, j, k) + einsum('abce,ed->abcd', W_vvvvoo_slice, t2[i, l], out=t4_blk, alpha=1.0, beta=1.0) + einsum('dbce,ea->abcd', W_vvvvoo_slice, t2[l, i], out=t4_blk, alpha=1.0, beta=1.0) + compute_W_vvvvoo(W_vvvvoo_slice, j, l) + einsum('abde,ec->abcd', W_vvvvoo_slice, t2[i, k], out=t4_blk, alpha=1.0, beta=1.0) + einsum('cbde,ea->abcd', W_vvvvoo_slice, t2[k, i], out=t4_blk, alpha=1.0, beta=1.0) + compute_W_vvvvoo(W_vvvvoo_slice, k, l) + einsum('acde,eb->abcd', W_vvvvoo_slice, t2[i, j], out=t4_blk, alpha=1.0, beta=1.0) + einsum('bcde,ea->abcd', W_vvvvoo_slice, t2[j, i], out=t4_blk, alpha=1.0, beta=1.0) + compute_W_vvvvoo(W_vvvvoo_slice, i, k) + einsum('bace,ed->abcd', W_vvvvoo_slice, t2[j, l], out=t4_blk, alpha=1.0, beta=1.0) + einsum('dace,eb->abcd', W_vvvvoo_slice, t2[l, j], out=t4_blk, alpha=1.0, beta=1.0) + compute_W_vvvvoo(W_vvvvoo_slice, i, l) + einsum('bade,ec->abcd', W_vvvvoo_slice, t2[j, k], out=t4_blk, alpha=1.0, beta=1.0) + einsum('cade,eb->abcd', W_vvvvoo_slice, t2[k, j], out=t4_blk, alpha=1.0, beta=1.0) + compute_W_vvvvoo(W_vvvvoo_slice, i, j) + einsum('cabe,ed->abcd', W_vvvvoo_slice, t2[k, l], out=t4_blk, alpha=1.0, beta=1.0) + einsum('dabe,ec->abcd', W_vvvvoo_slice, t2[l, k], out=t4_blk, alpha=1.0, beta=1.0) + + t4_add_(t4_blk, z4_blk, 1, nvir) + eijkl_division_single_(t4_blk, e_occ, e_vir, i, j, k, l, nvir) + t4_spin_summation_single_inplace_(t4_blk, nvir, 'P4_444', alpha=1.0, beta=0.0) + + e_q_paren += np.dot(z4_blk.ravel(), t4_blk.ravel()) * factor + + # z for [Q] + einsum('ab,cd->abcd', eris_oovv[i, j], t2[k, l], out=z4_blk, alpha=1.0, beta=0.0) + einsum('ac,bd->abcd', eris_oovv[i, k], t2[j, l], out=z4_blk, alpha=1.0, beta=1.0) + einsum('ad,bc->abcd', eris_oovv[i, l], t2[j, k], out=z4_blk, alpha=1.0, beta=1.0) + einsum('bc,ad->abcd', eris_oovv[j, k], t2[i, l], out=z4_blk, alpha=1.0, beta=1.0) + einsum('bd,ac->abcd', eris_oovv[j, l], t2[i, k], out=z4_blk, alpha=1.0, beta=1.0) + einsum('cd,ab->abcd', eris_oovv[k, l], t2[i, j], out=z4_blk, alpha=1.0, beta=1.0) + + e_q_bracket += np.dot(z4_blk.ravel(), t4_blk.ravel()) * factor + + time1 = log.timer_debug1('%s(Q): iter %3d:' % (name, l), *time1) + + e_q_paren += e_q_bracket + e_q_bracket /= 12.0 + e_q_paren /= 12.0 + + log.timer('%s(Q)' % name, *time0) + log.info("[Q] correction = % .12e (Q) correction = % .12e" % (e_q_bracket, e_q_paren)) + return e_q_bracket, e_q_paren + + +if __name__ == '__main__': + + from pyscf import gto, scf, lib + from pyscf.data.elements import chemcore + from pyscf.cc.rccsdt import RCCSDT + from pyscf.cc.rccsdt_highm import RCCSDT as RCCSDT_highm + + atom = ''' + O 1.416468653903 0.111264435953 0.000000000000 + H 1.746241653903 -0.373945564047 -0.758561000000 + H 2.102765241 -0.898304829 1.578786622 + ''' + basis = 'cc-pvdz' + + mol = gto.M(atom=atom, basis=basis) + mol.verbose = 1 + mol.max_memory = 10000 + frozen = chemcore(mol) + + mf = scf.RHF(mol).density_fit() + mf.conv_tol = 1e-12 + mf.kernel() + + mycc = RCCSDT(mf, frozen=frozen) + mycc.set_einsum_backend('numpy') + mycc.conv_tol = 1e-10 + mycc.conv_tol_normt = 1e-8 + mycc.max_cycle = 100 + mycc.verbose = 3 + mycc.blksize = 2 + mycc.blksize_oovv = 2 + mycc.blksize_oooo = 2 + mycc.do_diis_max_t = False + mycc.incore_complete = True + ecorr, tamps = mycc.kernel() + + ref_e_q_bracket = -0.001462052703 + ref_e_q_paren = -0.001620887567 + + mycc.verbose = 8 + e_q_bracket, e_q_paren = kernel(mycc) + print('[Q] corr: % .12f Ref: % .12f Diff: % .12e'%( + e_q_bracket, ref_e_q_bracket, e_q_bracket - ref_e_q_bracket)) + print('(Q) corr: % .12f Ref: % .12f Diff: % .12e'%( + e_q_paren, ref_e_q_paren, e_q_paren - ref_e_q_paren)) + + mycc2 = RCCSDT_highm(mf, frozen=frozen) + mycc2.set_einsum_backend('numpy') + mycc2.conv_tol = 1e-10 + mycc2.conv_tol_normt = 1e-8 + mycc2.max_cycle = 100 + mycc2.verbose = 3 + mycc2.do_diis_max_t = False + mycc2.incore_complete = True + ecorr, tamps = mycc2.kernel() + e_q_bracket, e_q_paren = mycc2.ccsdt_q() + print('[Q] corr: % .12f Ref: % .12f Diff: % .12e'%( + e_q_bracket, ref_e_q_bracket, e_q_bracket - ref_e_q_bracket)) + print('(Q) corr: % .12f Ref: % .12f Diff: % .12e'%( + e_q_paren, ref_e_q_paren, e_q_paren - ref_e_q_paren)) diff --git a/pyscf/cc/rccsdtq.py b/pyscf/cc/rccsdtq.py index e9745191e5..eb899ae4ac 100644 --- a/pyscf/cc/rccsdtq.py +++ b/pyscf/cc/rccsdtq.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,13 +27,11 @@ ''' import numpy as np -import numpy import functools import ctypes from pyscf import lib from pyscf.lib import logger -from pyscf.mp.mp2 import get_nocc, get_nmo, get_frozen_mask, get_e_hf, _mo_without_core -from pyscf.cc import ccsd, _ccsd, rccsdt +from pyscf.cc import rccsdt from pyscf.cc.rccsdt import (_einsum, t3_spin_summation_inplace_, symmetrize_tamps_tri_, purify_tamps_tri_, update_t1_fock_eris, intermediates_t1t2, compute_r1r2, r1r2_divide_e_, intermediates_t3, kernel, _PhysicistsERIs, format_size) @@ -55,6 +53,15 @@ def t4_spin_summation_inplace_(A, nocc4, nvir, pattern, alpha=1.0, beta=0.0): ) return A +def t4_project_1_minus_p4_p31_inplace_(A, nocc4, nvir, alpha=1.0, beta=0.0): + assert A.dtype == np.float64 and A.flags['C_CONTIGUOUS'], "A must be a contiguous float64 array" + _libccsdt.t4_project_1_minus_p4_p31_inplace_( + A.ctypes.data_as(ctypes.c_void_p), + ctypes.c_int64(nocc4), ctypes.c_int64(nvir), + ctypes.c_double(alpha), ctypes.c_double(beta) + ) + return A + def t4_add_(t4, r4, nocc4, nvir): assert t4.dtype == np.float64 and t4.flags['C_CONTIGUOUS'], "t4 must be a contiguous float64 array" assert r4.dtype == np.float64 and r4.flags['C_CONTIGUOUS'], "r4 must be a contiguous float64 array" @@ -86,6 +93,28 @@ def unpack_t4_tri2block_(t4, t4_blk, map_, mask, i0, i1, j0, j1, k0, k1, l0, l1, ) return t4_blk +def unpack_t4_tri2block_triples_(t4, t4_blk, map_, mask, i0, i1, j0, j1, k0, k1, l0, l1, + nocc, nvir, blk_i, blk_j, blk_k, blk_l): + assert t4.dtype == np.float64 and t4_blk.dtype == np.float64 + assert map_.dtype == np.int64 and mask.dtype == np.bool_ + t4 = np.ascontiguousarray(t4) + t4_blk = np.ascontiguousarray(t4_blk) + map_ = np.ascontiguousarray(map_) + mask = np.ascontiguousarray(mask) + _libccsdt.unpack_t4_tri2block_triples_( + t4.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + t4_blk.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + map_.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), + mask.ctypes.data_as(ctypes.POINTER(ctypes.c_bool)), + ctypes.c_int64(i0), ctypes.c_int64(i1), + ctypes.c_int64(j0), ctypes.c_int64(j1), + ctypes.c_int64(k0), ctypes.c_int64(k1), + ctypes.c_int64(l0), ctypes.c_int64(l1), + ctypes.c_int64(nocc), ctypes.c_int64(nvir), + ctypes.c_int64(blk_i), ctypes.c_int64(blk_j), ctypes.c_int64(blk_k), ctypes.c_int64(blk_l) + ) + return t4_blk + def accumulate_t4_block2tri_(t4, t4_blk, map_, i0, i1, j0, j1, k0, k1, l0, l1, nocc, nvir, blk_i, blk_j, blk_k, blk_l, alpha, beta): assert t4.dtype == np.float64 and t4_blk.dtype == np.float64 @@ -107,6 +136,18 @@ def accumulate_t4_block2tri_(t4, t4_blk, map_, i0, i1, j0, j1, k0, k1, l0, l1, ) return t4 +def r4_tri_divide_e_(mycc, r4, mo_energy): + nocc, nmo = mycc.nocc, mycc.nmo + nvir = nmo - nocc + assert r4.dtype == np.float64 and r4.flags['C_CONTIGUOUS'], "r4 must be a contiguous float64 array" + eia = np.ascontiguousarray(mo_energy[:nocc, None] - mo_energy[None, nocc:] - mycc.level_shift, dtype=np.float64) + _libccsdt.r4_tri_divide_e_( + r4.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + eia.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), + ctypes.c_int64(nocc), ctypes.c_int64(nvir) + ) + return r4 + def _unpack_t4_(mycc, t4, t4_blk, i0, i1, j0, j1, k0, k1, l0, l1, blksize0=None, blksize1=None, blksize2=None, blksize3=None): if blksize0 is None: blksize0 = mycc.blksize @@ -117,6 +158,16 @@ def _unpack_t4_(mycc, t4, t4_blk, i0, i1, j0, j1, k0, k1, l0, l1, mycc.nocc, mycc.nmo - mycc.nocc, blksize0, blksize1, blksize2, blksize3) return t4_blk +def _unpack_t4_triples_(mycc, t4, t4_blk, i0, i1, j0, j1, k0, k1, l0, l1, + blksize0=None, blksize1=None, blksize2=None, blksize3=None): + if blksize0 is None: blksize0 = mycc.blksize + if blksize1 is None: blksize1 = mycc.blksize + if blksize2 is None: blksize2 = mycc.blksize + if blksize3 is None: blksize3 = mycc.blksize + unpack_t4_tri2block_triples_(t4, t4_blk, mycc.tri2block_map, mycc.tri2block_mask, i0, i1, j0, j1, k0, k1, l0, l1, + mycc.nocc, mycc.nmo - mycc.nocc, blksize0, blksize1, blksize2, blksize3) + return t4_blk + def _accumulate_t4_(mycc, t4, t4_blk, i0, i1, j0, j1, k0, k1, l0, l1, blksize0=None, blksize1=None, blksize2=None, blksize3=None, alpha=1.0, beta=0.0): if blksize0 is None: blksize0 = mycc.blksize @@ -198,26 +249,26 @@ def intermediates_t4_tri(mycc, imds, t2, t3, t4): einsum('me,mjab->abej', t1_fock[:nocc, nocc:], t2, out=W_vvvo, alpha=-1.0, beta=1.0) - W_ovvvoo = np.empty((nocc,) + (nvir,) * 3 + (nocc,) * 2) - einsum('maef,jibf->mabeij', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovvvoo, alpha=2.0, beta=0.0) - einsum('mafe,jibf->mabeij', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovvvoo, alpha=-1.0, beta=1.0) - einsum('mnei,njab->mabeij', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_ovvvoo, alpha=-2.0, beta=1.0) - einsum('nmei,njab->mabeij', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_ovvvoo, alpha=1.0, beta=1.0) + W_oovvvo = np.empty((nocc,) * 2 + (nvir,) * 3 + (nocc,)) + einsum('maef,jibf->ijeabm', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_oovvvo, alpha=2.0, beta=0.0) + einsum('mafe,jibf->ijeabm', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_oovvvo, alpha=-1.0, beta=1.0) + einsum('mnei,njab->ijeabm', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_oovvvo, alpha=-2.0, beta=1.0) + einsum('nmei,njab->ijeabm', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_oovvvo, alpha=1.0, beta=1.0) c_t3 = np.empty_like(t3) t3_spin_summation(t3, c_t3, nocc**3, nvir, "P3_201", 1.0, 0.0) - einsum('nmfe,nijfab->mabeij', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_ovvvoo, alpha=0.5, beta=1.0) - einsum('mnfe,nijfab->mabeij', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_ovvvoo, alpha=-0.25, beta=1.0) + einsum('nmfe,nijfab->ijeabm', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_oovvvo, alpha=0.5, beta=1.0) + einsum('mnfe,nijfab->ijeabm', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_oovvvo, alpha=-0.25, beta=1.0) c_t3 = None - W_ovvovo = np.empty((nocc, nvir, nvir, nocc, nvir, nocc)) - einsum('mafe,jibf->mabiej', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovvovo, alpha=1.0, beta=0.0) - einsum('mnie,njab->mabiej', t1_eris[:nocc, :nocc, :nocc, nocc:], t2, out=W_ovvovo, alpha=-1.0, beta=1.0) - einsum('nmef,injfab->mabiej', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_ovvovo, alpha=-0.5, beta=1.0) + W_ovovvo = np.empty((nocc, nvir, nocc, nvir, nvir, nocc)) + einsum('mafe,jibf->iejabm', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovovvo, alpha=1.0, beta=0.0) + einsum('mnie,njab->iejabm', t1_eris[:nocc, :nocc, :nocc, nocc:], t2, out=W_ovovvo, alpha=-1.0, beta=1.0) + einsum('nmef,injfab->iejabm', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_ovovvo, alpha=-0.5, beta=1.0) - W_vooooo = np.empty((nvir,) + (nocc,) * 5) - einsum('mnek,ijae->amnijk', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_vooooo, alpha=1.0, beta=0.0) - einsum('mnef,ijkaef->amnijk', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_vooooo, alpha=0.5, beta=1.0) - W_vooooo += W_vooooo.transpose(0, 2, 1, 3, 5, 4) + W_ooooov = np.empty((nocc,) * 5 + (nvir,)) + einsum('mnek,ijae->kjinma', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_ooooov, alpha=1.0, beta=0.0) + einsum('mnef,ijkaef->kjinma', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_ooooov, alpha=0.5, beta=1.0) + W_ooooov += W_ooooov.transpose(1, 0, 2, 4, 3, 5) W_vvoooo = np.empty((nvir,) * 2 + (nocc,) * 4) einsum('amef,ijkebf->abmijk', t1_eris[nocc:, :nocc, nocc:, nocc:], t3, out=W_vvoooo, alpha=1.0, beta=0.0) @@ -252,10 +303,10 @@ def intermediates_t4_tri(mycc, imds, t2, t3, t4): t4_tmp[:bn, :bi, :bj, :bk], out=W_vvvvoo[..., j0:j1, k0:k1], alpha=-0.5, beta=1.0) t4_tmp = None - W_ovvvoo += W_ovvvoo.transpose(0, 2, 1, 3, 5, 4) + W_oovvvo += W_oovvvo.transpose(1, 0, 2, 4, 3, 5) W_vvoooo += W_vvoooo.transpose(1, 0, 2, 4, 3, 5) W_vvvvoo += W_vvvvoo.transpose(0, 2, 1, 3, 5, 4) - imds.W_ovvvoo, imds.W_ovvovo, imds.W_vooooo = W_ovvvoo, W_ovvovo, W_vooooo + imds.W_oovvvo, imds.W_ovovvo, imds.W_ooooov = W_oovvvo, W_ovovvo, W_ooooov imds.W_vvoooo, imds.W_vvvvoo = W_vvoooo, W_vvvvoo return imds @@ -274,17 +325,17 @@ def compute_r4_tri(mycc, imds, t2, t3, t4): F_oo, F_vv = imds.F_oo, imds.F_vv W_oooo, W_ovvo, W_ovov = imds.W_oooo, imds.W_ovvo, imds.W_ovov W_vvvo, W_vooo, W_vvvv = imds.W_vvvo, imds.W_vooo, imds.W_vvvv - W_ovvvoo, W_ovvovo, W_vooooo = imds.W_ovvvoo, imds.W_ovvovo, imds.W_vooooo + W_oovvvo, W_ovovvo, W_ooooov = imds.W_oovvvo, imds.W_ovovvo, imds.W_ooooov W_vvoooo, W_vvvvoo = imds.W_vvoooo, imds.W_vvvvoo + W_voov = np.ascontiguousarray(W_ovvo.transpose(1, 0, 3, 2)) + c_t3 = np.empty_like(t3) t3_spin_summation(t3, c_t3, nocc**3, nvir, "P3_201", 1.0, 0.0) # r4 = np.empty_like(t4) r4 = np.zeros_like(t4) - time2 = logger.process_clock(), logger.perf_counter() - t4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) r4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) for l0, l1 in lib.prange(0, nocc, blksize): bl = l1 - l0 @@ -345,152 +396,128 @@ def compute_r4_tri(mycc, imds, t2, t3, t4): einsum("dmlk,mijcab->ijklabcd", W_vooo[:, :, l0:l1, k0:k1], t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mabeij,mklecd->ijklabcd", W_ovvvoo[..., i0:i1, j0:j1], - c_t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("maceik,mjlebd->ijklabcd", W_ovvvoo[..., i0:i1, k0:k1], - c_t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("madeil,mjkebc->ijklabcd", W_ovvvoo[..., i0:i1, l0:l1], - c_t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mbaeji,mklecd->ijklabcd", W_ovvvoo[..., j0:j1, i0:i1], - c_t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mcaeki,mjlebd->ijklabcd", W_ovvvoo[..., k0:k1, i0:i1], - c_t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mdaeli,mjkebc->ijklabcd", W_ovvvoo[..., l0:l1, i0:i1], - c_t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mbcejk,milead->ijklabcd", W_ovvvoo[..., j0:j1, k0:k1], - c_t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mbdejl,mikeac->ijklabcd", W_ovvvoo[..., j0:j1, l0:l1], - c_t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mcbekj,milead->ijklabcd", W_ovvvoo[..., k0:k1, j0:j1], - c_t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mdbelj,mikeac->ijklabcd", W_ovvvoo[..., l0:l1, j0:j1], - c_t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mcdekl,mijeab->ijklabcd", W_ovvvoo[..., k0:k1, l0:l1], - c_t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - einsum("mdcelk,mijeab->ijklabcd", W_ovvvoo[..., l0:l1, k0:k1], - c_t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.25, beta=1.0) - - einsum("mabiej,mklced->ijklabcd", W_ovvovo[..., i0:i1, :, j0:j1], - t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mabiej,mlkdec->ijklabcd", W_ovvovo[..., i0:i1, :, j0:j1], - t3[:, l0:l1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("maciek,mjlbed->ijklabcd", W_ovvovo[..., i0:i1, :, k0:k1], - t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("madiel,mjkbec->ijklabcd", W_ovvovo[..., i0:i1, :, l0:l1], - t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("maciek,mljdeb->ijklabcd", W_ovvovo[..., i0:i1, :, k0:k1], - t3[:, l0:l1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("madiel,mkjceb->ijklabcd", W_ovvovo[..., i0:i1, :, l0:l1], - t3[:, k0:k1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbajei,mklced->ijklabcd", W_ovvovo[..., j0:j1, :, i0:i1], - t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbajei,mlkdec->ijklabcd", W_ovvovo[..., j0:j1, :, i0:i1], - t3[:, l0:l1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcakei,mjlbed->ijklabcd", W_ovvovo[..., k0:k1, :, i0:i1], - t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdalei,mjkbec->ijklabcd", W_ovvovo[..., l0:l1, :, i0:i1], - t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcakei,mljdeb->ijklabcd", W_ovvovo[..., k0:k1, :, i0:i1], - t3[:, l0:l1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdalei,mkjceb->ijklabcd", W_ovvovo[..., l0:l1, :, i0:i1], - t3[:, k0:k1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbcjek,milaed->ijklabcd", W_ovvovo[..., j0:j1, :, k0:k1], - t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbdjel,mikaec->ijklabcd", W_ovvovo[..., j0:j1, :, l0:l1], - t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcbkej,milaed->ijklabcd", W_ovvovo[..., k0:k1, :, j0:j1], - t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdblej,mikaec->ijklabcd", W_ovvovo[..., l0:l1, :, j0:j1], - t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcdkel,mijaeb->ijklabcd", W_ovvovo[..., k0:k1, :, l0:l1], - t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdclek,mijaeb->ijklabcd", W_ovvovo[..., l0:l1, :, k0:k1], - t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbcjek,mlidea->ijklabcd", W_ovvovo[..., j0:j1, :, k0:k1], - t3[:, l0:l1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbdjel,mkicea->ijklabcd", W_ovvovo[..., j0:j1, :, l0:l1], - t3[:, k0:k1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcbkej,mlidea->ijklabcd", W_ovvovo[..., k0:k1, :, j0:j1], - t3[:, l0:l1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdblej,mkicea->ijklabcd", W_ovvovo[..., l0:l1, :, j0:j1], - t3[:, k0:k1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcdkel,mjibea->ijklabcd", W_ovvovo[..., k0:k1, :, l0:l1], - t3[:, j0:j1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdclek,mjibea->ijklabcd", W_ovvovo[..., l0:l1, :, k0:k1], - t3[:, j0:j1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - - einsum("mcbiej,mklaed->ijklabcd", W_ovvovo[..., i0:i1, :, j0:j1], + einsum("ijeabm,mklecd->ijklabcd", W_oovvvo[i0:i1, j0:j1], + c_t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + einsum("ikeacm,mjlebd->ijklabcd", W_oovvvo[i0:i1, k0:k1], + c_t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + einsum("ileadm,mjkebc->ijklabcd", W_oovvvo[i0:i1, l0:l1], + c_t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + einsum("jkebcm,milead->ijklabcd", W_oovvvo[j0:j1, k0:k1], + c_t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + einsum("jlebdm,mikeac->ijklabcd", W_oovvvo[j0:j1, l0:l1], + c_t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + einsum("klecdm,mijeab->ijklabcd", W_oovvvo[k0:k1, l0:l1], + c_t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + + einsum("iejcbm,mklaed->ijklabcd", W_ovovvo[i0:i1, :, j0:j1], t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mdbiej,mlkaec->ijklabcd", W_ovvovo[..., i0:i1, :, j0:j1], + einsum("iejdbm,mlkaec->ijklabcd", W_ovovvo[i0:i1, :, j0:j1], t3[:, l0:l1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mbciek,mjlaed->ijklabcd", W_ovvovo[..., i0:i1, :, k0:k1], + einsum("iekbcm,mjlaed->ijklabcd", W_ovovvo[i0:i1, :, k0:k1], t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mbdiel,mjkaec->ijklabcd", W_ovvovo[..., i0:i1, :, l0:l1], + einsum("ielbdm,mjkaec->ijklabcd", W_ovovvo[i0:i1, :, l0:l1], t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mdciek,mljaeb->ijklabcd", W_ovvovo[..., i0:i1, :, k0:k1], + einsum("iekdcm,mljaeb->ijklabcd", W_ovovvo[i0:i1, :, k0:k1], t3[:, l0:l1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mcdiel,mkjaeb->ijklabcd", W_ovvovo[..., i0:i1, :, l0:l1], + einsum("ielcdm,mkjaeb->ijklabcd", W_ovovvo[i0:i1, :, l0:l1], t3[:, k0:k1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mcajei,mklbed->ijklabcd", W_ovvovo[..., j0:j1, :, i0:i1], + einsum("jeicam,mklbed->ijklabcd", W_ovovvo[j0:j1, :, i0:i1], t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mdajei,mlkbec->ijklabcd", W_ovvovo[..., j0:j1, :, i0:i1], + einsum("jeidam,mlkbec->ijklabcd", W_ovovvo[j0:j1, :, i0:i1], t3[:, l0:l1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mbakei,mjlced->ijklabcd", W_ovvovo[..., k0:k1, :, i0:i1], + einsum("keibam,mjlced->ijklabcd", W_ovovvo[k0:k1, :, i0:i1], t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mbalei,mjkdec->ijklabcd", W_ovvovo[..., l0:l1, :, i0:i1], + einsum("leibam,mjkdec->ijklabcd", W_ovovvo[l0:l1, :, i0:i1], t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mdakei,mljceb->ijklabcd", W_ovvovo[..., k0:k1, :, i0:i1], + einsum("keidam,mljceb->ijklabcd", W_ovovvo[k0:k1, :, i0:i1], t3[:, l0:l1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mcalei,mkjdeb->ijklabcd", W_ovvovo[..., l0:l1, :, i0:i1], + einsum("leicam,mkjdeb->ijklabcd", W_ovovvo[l0:l1, :, i0:i1], t3[:, k0:k1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("macjek,milbed->ijklabcd", W_ovvovo[..., j0:j1, :, k0:k1], + einsum("jekacm,milbed->ijklabcd", W_ovovvo[j0:j1, :, k0:k1], t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("madjel,mikbec->ijklabcd", W_ovvovo[..., j0:j1, :, l0:l1], + einsum("jeladm,mikbec->ijklabcd", W_ovovvo[j0:j1, :, l0:l1], t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mabkej,milced->ijklabcd", W_ovvovo[..., k0:k1, :, j0:j1], + einsum("kejabm,milced->ijklabcd", W_ovovvo[k0:k1, :, j0:j1], t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mablej,mikdec->ijklabcd", W_ovvovo[..., l0:l1, :, j0:j1], + einsum("lejabm,mikdec->ijklabcd", W_ovovvo[l0:l1, :, j0:j1], t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("madkel,mijceb->ijklabcd", W_ovvovo[..., k0:k1, :, l0:l1], + einsum("keladm,mijceb->ijklabcd", W_ovovvo[k0:k1, :, l0:l1], t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("maclek,mijdeb->ijklabcd", W_ovvovo[..., l0:l1, :, k0:k1], + einsum("lekacm,mijdeb->ijklabcd", W_ovovvo[l0:l1, :, k0:k1], t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mdcjek,mlibea->ijklabcd", W_ovvovo[..., j0:j1, :, k0:k1], + einsum("jekdcm,mlibea->ijklabcd", W_ovovvo[j0:j1, :, k0:k1], t3[:, l0:l1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mcdjel,mkibea->ijklabcd", W_ovvovo[..., j0:j1, :, l0:l1], + einsum("jelcdm,mkibea->ijklabcd", W_ovovvo[j0:j1, :, l0:l1], t3[:, k0:k1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mdbkej,mlicea->ijklabcd", W_ovvovo[..., k0:k1, :, j0:j1], + einsum("kejdbm,mlicea->ijklabcd", W_ovovvo[k0:k1, :, j0:j1], t3[:, l0:l1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mcblej,mkidea->ijklabcd", W_ovvovo[..., l0:l1, :, j0:j1], + einsum("lejcbm,mkidea->ijklabcd", W_ovovvo[l0:l1, :, j0:j1], t3[:, k0:k1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mbdkel,mjicea->ijklabcd", W_ovvovo[..., k0:k1, :, l0:l1], + einsum("kelbdm,mjicea->ijklabcd", W_ovovvo[k0:k1, :, l0:l1], t3[:, j0:j1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("mbclek,mjidea->ijklabcd", W_ovvovo[..., l0:l1, :, k0:k1], + einsum("lekbcm,mjidea->ijklabcd", W_ovovvo[l0:l1, :, k0:k1], t3[:, j0:j1, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - einsum("amnijk,mnlbcd->ijklabcd", W_vooooo[..., i0:i1, j0:j1, k0:k1], + _accumulate_t4_(mycc, r4, r4_tmp, i0, i1, j0, j1, k0, k1, l0, l1) + time2 = log.timer_debug1('t4: iter: W_vvvo * t3, W_vooo * t3, W_oovvvo * t3, W_ovovvo * t3' + ' [%3d, %3d]:' % (l0, l1), *time2) + r4_tmp = None + c_t3 = None + W_vvvo = imds.W_vvvo = None + W_vooo = imds.W_vooo = None + W_oovvvo = imds.W_oovvvo = None + time1 = log.timer_debug1('t4: W_vvvo * t3, W_vooo * t3, W_oovvvo * t3, W_ovovvo * t3', *time1) + + c_t3 = t3 + t3.transpose(0, 1, 2, 4, 5, 3) + W_ovovvo += W_ovovvo.transpose(2, 1, 0, 4, 3, 5) + time2 = logger.process_clock(), logger.perf_counter() + t4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) + r4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) + for l0, l1 in lib.prange(0, nocc, blksize): + bl = l1 - l0 + for k0, k1 in lib.prange(0, l1, blksize): + bk = k1 - k0 + for j0, j1 in lib.prange(0, k1, blksize): + bj = j1 - j0 + for i0, i1 in lib.prange(0, j1, blksize): + bi = i1 - i0 + + einsum("iejabm,mklced->ijklabcd", W_ovovvo[i0:i1, :, j0:j1], + c_t3[:, k0:k1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=0.0) + einsum("iekacm,mjlbed->ijklabcd", W_ovovvo[i0:i1, :, k0:k1], + c_t3[:, j0:j1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + einsum("ieladm,mjkbec->ijklabcd", W_ovovvo[i0:i1, :, l0:l1], + c_t3[:, j0:j1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + einsum("jekbcm,milaed->ijklabcd", W_ovovvo[j0:j1, :, k0:k1], + c_t3[:, i0:i1, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + einsum("jelbdm,mikaec->ijklabcd", W_ovovvo[j0:j1, :, l0:l1], + c_t3[:, i0:i1, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + einsum("kelcdm,mijaeb->ijklabcd", W_ovovvo[k0:k1, :, l0:l1], + c_t3[:, i0:i1, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + + einsum("kjinma,mnlbcd->ijklabcd", W_ooooov[k0:k1, j0:j1, i0:i1], t3[:, :, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("amnijl,mnkbdc->ijklabcd", W_vooooo[..., i0:i1, j0:j1, l0:l1], + einsum("ljinma,mnkbdc->ijklabcd", W_ooooov[l0:l1, j0:j1, i0:i1], t3[:, :, k0:k1,], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("amnikl,mnjcdb->ijklabcd", W_vooooo[..., i0:i1, k0:k1, l0:l1], + einsum("lkinma,mnjcdb->ijklabcd", W_ooooov[l0:l1, k0:k1, i0:i1], t3[:, :, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("bmnjik,mnlacd->ijklabcd", W_vooooo[..., j0:j1, i0:i1, k0:k1], + einsum("kijnmb,mnlacd->ijklabcd", W_ooooov[k0:k1, i0:i1, j0:j1], t3[:, :, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("bmnjil,mnkadc->ijklabcd", W_vooooo[..., j0:j1, i0:i1, l0:l1], + einsum("lijnmb,mnkadc->ijklabcd", W_ooooov[l0:l1, i0:i1, j0:j1], t3[:, :, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("bmnjkl,mnicda->ijklabcd", W_vooooo[..., j0:j1, k0:k1, l0:l1], + einsum("lkjnmb,mnicda->ijklabcd", W_ooooov[l0:l1, k0:k1, j0:j1], t3[:, :, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("cmnkij,mnlabd->ijklabcd", W_vooooo[..., k0:k1, i0:i1, j0:j1], + einsum("jiknmc,mnlabd->ijklabcd", W_ooooov[j0:j1, i0:i1, k0:k1], t3[:, :, l0:l1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("cmnkil,mnjadb->ijklabcd", W_vooooo[..., k0:k1, i0:i1, l0:l1], + einsum("liknmc,mnjadb->ijklabcd", W_ooooov[l0:l1, i0:i1, k0:k1], t3[:, :, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("cmnkjl,mnibda->ijklabcd", W_vooooo[..., k0:k1, j0:j1, l0:l1], + einsum("ljknmc,mnibda->ijklabcd", W_ooooov[l0:l1, j0:j1, k0:k1], t3[:, :, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("dmnlij,mnkabc->ijklabcd", W_vooooo[..., l0:l1, i0:i1, j0:j1], + einsum("jilnmd,mnkabc->ijklabcd", W_ooooov[j0:j1, i0:i1, l0:l1], t3[:, :, k0:k1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("dmnlik,mnjacb->ijklabcd", W_vooooo[..., l0:l1, i0:i1, k0:k1], + einsum("kilnmd,mnjacb->ijklabcd", W_ooooov[k0:k1, i0:i1, l0:l1], t3[:, :, j0:j1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - einsum("dmnljk,mnibca->ijklabcd", W_vooooo[..., l0:l1, j0:j1, k0:k1], + einsum("kjlnmd,mnibca->ijklabcd", W_ooooov[k0:k1, j0:j1, l0:l1], t3[:, :, i0:i1], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) einsum("mlcd,abmijk->ijklabcd", t2[:, l0:l1], W_vvoooo[..., i0:i1, j0:j1, k0:k1], @@ -546,57 +573,44 @@ def compute_r4_tri(mycc, imds, t2, t3, t4): _unpack_t4_(mycc, t4, t4_tmp, i0, i1, j0, j1, k0, k1, l0, l1) einsum("ae,ijklebcd->ijklabcd", F_vv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, j0, j1, i0, i1, k0, k1, l0, l1) - einsum("be,jikleacd->ijklabcd", F_vv, t4_tmp[:bj, :bi, :bk, :bl], + einsum("be,ijklaecd->ijklabcd", F_vv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, k0, k1, i0, i1, j0, j1, l0, l1) - einsum("ce,kijleabd->ijklabcd", F_vv, t4_tmp[:bk, :bi, :bj, :bl], + einsum("ce,ijklabed->ijklabcd", F_vv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, l0, l1, i0, i1, j0, j1, k0, k1) - einsum("de,lijkeabc->ijklabcd", F_vv, t4_tmp[:bl, :bi, :bj, :bk], + einsum("de,ijklabce->ijklabcd", F_vv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, i0, i1, j0, j1, k0, k1, l0, l1) einsum("abef,ijklefcd->ijklabcd", W_vvvv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, i0, i1, k0, k1, j0, j1, l0, l1) - einsum("acef,ikjlefbd->ijklabcd", W_vvvv, t4_tmp[:bi, :bk, :bj, :bl], + einsum("acef,ijklebfd->ijklabcd", W_vvvv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, i0, i1, l0, l1, j0, j1, k0, k1) - einsum("adef,iljkefbc->ijklabcd", W_vvvv, t4_tmp[:bi, :bl, :bj, :bk], + einsum("adef,ijklebcf->ijklabcd", W_vvvv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, j0, j1, k0, k1, i0, i1, l0, l1) - einsum("bcef,jkilefad->ijklabcd", W_vvvv, t4_tmp[:bj, :bk, :bi, :bl], + einsum("bcef,ijklaefd->ijklabcd", W_vvvv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, j0, j1, l0, l1, i0, i1, k0, k1) - einsum("bdef,jlikefac->ijklabcd", W_vvvv, t4_tmp[:bj, :bl, :bi, :bk], + einsum("bdef,ijklaecf->ijklabcd", W_vvvv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, k0, k1, l0, l1, i0, i1, j0, j1) - einsum("cdef,klijefab->ijklabcd", W_vvvv, t4_tmp[:bk, :bl, :bi, :bj], + einsum("cdef,ijklabef->ijklabcd", W_vvvv, t4_tmp[:bi, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _accumulate_t4_(mycc, r4, r4_tmp, i0, i1, j0, j1, k0, k1, l0, l1) - time2 = log.timer_debug1('t4: iter: W_vvoooo * t2, W_vvvvoo * t2,\n' - ' W_vvvo * t3, W_vooo * t3, W_ovvvoo * t3, W_ovvovo * t3, W_vooooo * t3,\n' + _accumulate_t4_(mycc, r4, r4_tmp, i0, i1, j0, j1, k0, k1, l0, l1, beta=1.0) + time2 = log.timer_debug1('t4: iter: W_vvoooo * t2, W_vvvvoo * t2, W_ovovvo * t3, W_ooooov * t3,\n' ' F_vv * t4, W_vvvv * t4 [%3d, %3d]:' % (l0, l1), *time2) t4_tmp = None r4_tmp = None c_t3 = None F_vv = imds.F_vv = None - W_vvvo = imds.W_vvvo = None - W_vooo = imds.W_vooo = None W_vvvv = imds.W_vvvv = None - W_ovvvoo = imds.W_ovvvoo = None - W_ovvovo = imds.W_ovvovo = None - W_vooooo = imds.W_vooooo = None + W_ovovvo = imds.W_ovovvo = None + W_ooooov = imds.W_ooooov = None W_vvoooo = imds.W_vvoooo = None W_vvvvoo = imds.W_vvvvoo = None - time1 = log.timer_debug1('t4: W_vvoooo * t2, W_vvvvoo * t2, W_vvvo * t3, W_vooo * t3, W_ovvvoo * t3,\n' - ' W_ovvovo * t3, W_vooooo * t3, F_vv * t4, W_vvvv * t4', *time1) + time1 = log.timer_debug1('t4: W_vvoooo * t2, W_vvvvoo * t2, W_ovovvo * t3, W_ooooov * t3, F_vv * t4, W_vvvv * t4', + *time1) time2 = logger.process_clock(), logger.perf_counter() - t4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) + t4_tmp = np.empty((nocc,) + (blksize,) * 3 + (nvir,) * 4, dtype=t4.dtype) r4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) for l0, l1 in lib.prange(0, nocc, blksize): bl = l1 - l0 @@ -607,95 +621,74 @@ def compute_r4_tri(mycc, imds, t2, t3, t4): for i0, i1 in lib.prange(0, j1, blksize): bi = i1 - i0 - r4_tmp[:] = 0.0 - for m0, m1 in lib.prange(0, nocc, blksize): - bm = m1 - m0 + _unpack_t4_(mycc, t4, t4_tmp, 0, nocc, j0, j1, k0, k1, l0, l1, nocc, blksize, blksize, blksize) + einsum("mi,mjklabcd->ijklabcd", F_oo[:, i0:i1], t4_tmp[:, :bj, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=0.0) + einsum("mbie,mjklaecd->ijklabcd", W_ovov[:, :, i0:i1, :], t4_tmp[:, :bj, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mcie,mjklabed->ijklabcd", W_ovov[:, :, i0:i1, :], t4_tmp[:, :bj, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mdie,mjklabce->ijklabcd", W_ovov[:, :, i0:i1, :], t4_tmp[:, :bj, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + t4_spin_summation_inplace_(t4_tmp, nocc * blksize**3, nvir, "P4_201", 1.0, 0.0) + einsum("amie,mjklebcd->ijklabcd", W_voov[:, :, i0:i1, :], t4_tmp[:, :bj, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, j0, j1, k0, k1, l0, l1) - einsum("mi,mjklabcd->ijklabcd", F_oo[m0:m1, i0:i1], t4_tmp[:bm, :bj, :bk, :bl], - out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - t4_spin_summation_inplace_(t4_tmp, blksize**4, nvir, "P4_201", 1.0, 0.0) - einsum("maei,mjklebcd->ijklabcd", W_ovvo[m0:m1, :, :, i0:i1], - t4_tmp[:bm, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, i0, i1, k0, k1, l0, l1) - einsum("mj,miklbacd->ijklabcd", F_oo[m0:m1, j0:j1], t4_tmp[:bm, :bi, :bk, :bl], - out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - t4_spin_summation_inplace_(t4_tmp, blksize**4, nvir, "P4_201", 1.0, 0.0) - einsum("mbej,mikleacd->ijklabcd", W_ovvo[m0:m1, :, :, j0:j1], - t4_tmp[:bm, :bi, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, i0, i1, j0, j1, l0, l1) - einsum("mk,mijlcabd->ijklabcd", F_oo[m0:m1, k0:k1], t4_tmp[:bm, :bi, :bj, :bl], - out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - t4_spin_summation_inplace_(t4_tmp, blksize**4, nvir, "P4_201", 1.0, 0.0) - einsum("mcek,mijleabd->ijklabcd", W_ovvo[m0:m1, :, :, k0:k1], - t4_tmp[:bm, :bi, :bj, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, i0, i1, j0, j1, k0, k1) - einsum("ml,mijkdabc->ijklabcd", F_oo[m0:m1, l0:l1], t4_tmp[:bm, :bi, :bj, :bk], - out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - t4_spin_summation_inplace_(t4_tmp, blksize**4, nvir, "P4_201", 1.0, 0.0) - einsum("mdel,mijkeabc->ijklabcd", W_ovvo[m0:m1, :, :, l0:l1], - t4_tmp[:bm, :bi, :bj, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) - - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, j0, j1, k0, k1, l0, l1) - einsum("maie,mjklbecd->ijklabcd", W_ovov[m0:m1, :, i0:i1, :], - t4_tmp[:bm, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbie,mjklaecd->ijklabcd", W_ovov[m0:m1, :, i0:i1, :], - t4_tmp[:bm, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, k0, k1, j0, j1, l0, l1) - einsum("maie,mkjlcebd->ijklabcd", W_ovov[m0:m1, :, i0:i1, :], - t4_tmp[:bm, :bk, :bj, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcie,mkjlaebd->ijklabcd", W_ovov[m0:m1, :, i0:i1, :], - t4_tmp[:bm, :bk, :bj, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, l0, l1, j0, j1, k0, k1) - einsum("maie,mljkdebc->ijklabcd", W_ovov[m0:m1, :, i0:i1, :], - t4_tmp[:bm, :bl, :bj, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdie,mljkaebc->ijklabcd", W_ovov[m0:m1, :, i0:i1, :], - t4_tmp[:bm, :bl, :bj, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, i0, i1, k0, k1, l0, l1) - einsum("mbje,miklaecd->ijklabcd", W_ovov[m0:m1, :, j0:j1, :], - t4_tmp[:bm, :bi, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("maje,miklbecd->ijklabcd", W_ovov[m0:m1, :, j0:j1, :], - t4_tmp[:bm, :bi, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, i0, i1, j0, j1, l0, l1) - einsum("mcke,mijlaebd->ijklabcd", W_ovov[m0:m1, :, k0:k1, :], - t4_tmp[:bm, :bi, :bj, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("make,mijlcebd->ijklabcd", W_ovov[m0:m1, :, k0:k1, :], - t4_tmp[:bm, :bi, :bj, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, i0, i1, j0, j1, k0, k1) - einsum("mdle,mijkaebc->ijklabcd", W_ovov[m0:m1, :, l0:l1, :], - t4_tmp[:bm, :bi, :bj, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("male,mijkdebc->ijklabcd", W_ovov[m0:m1, :, l0:l1, :], - t4_tmp[:bm, :bi, :bj, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, k0, k1, i0, i1, l0, l1) - einsum("mbje,mkilcead->ijklabcd", W_ovov[m0:m1, :, j0:j1, :], - t4_tmp[:bm, :bk, :bi, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcje,mkilbead->ijklabcd", W_ovov[m0:m1, :, j0:j1, :], - t4_tmp[:bm, :bk, :bi, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, l0, l1, i0, i1, k0, k1) - einsum("mbje,mlikdeac->ijklabcd", W_ovov[m0:m1, :, j0:j1, :], - t4_tmp[:bm, :bl, :bi, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdje,mlikbeac->ijklabcd", W_ovov[m0:m1, :, j0:j1, :], - t4_tmp[:bm, :bl, :bi, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, j0, j1, i0, i1, l0, l1) - einsum("mcke,mjilbead->ijklabcd", W_ovov[m0:m1, :, k0:k1, :], - t4_tmp[:bm, :bj, :bi, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mbke,mjilcead->ijklabcd", W_ovov[m0:m1, :, k0:k1, :], - t4_tmp[:bm, :bj, :bi, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, j0, j1, i0, i1, k0, k1) - einsum("mdle,mjikbeac->ijklabcd", W_ovov[m0:m1, :, l0:l1, :], - t4_tmp[:bm, :bj, :bi, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mble,mjikdeac->ijklabcd", W_ovov[m0:m1, :, l0:l1, :], - t4_tmp[:bm, :bj, :bi, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, l0, l1, i0, i1, j0, j1) - einsum("mcke,mlijdeab->ijklabcd", W_ovov[m0:m1, :, k0:k1, :], - t4_tmp[:bm, :bl, :bi, :bj], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mdke,mlijceab->ijklabcd", W_ovov[m0:m1, :, k0:k1, :], - t4_tmp[:bm, :bl, :bi, :bj], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, k0, k1, i0, i1, j0, j1) - einsum("mdle,mkijceab->ijklabcd", W_ovov[m0:m1, :, l0:l1, :], - t4_tmp[:bm, :bk, :bi, :bj], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) - einsum("mcle,mkijdeab->ijklabcd", W_ovov[m0:m1, :, l0:l1, :], - t4_tmp[:bm, :bk, :bi, :bj], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + _unpack_t4_(mycc, t4, t4_tmp, 0, nocc, i0, i1, k0, k1, l0, l1, nocc, blksize, blksize, blksize) + einsum("mj,miklbacd->ijklabcd", F_oo[:, j0:j1], t4_tmp[:, :bi, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("maje,miklbecd->ijklabcd", W_ovov[:, :, j0:j1, :], t4_tmp[:, :bi, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mcje,miklbaed->ijklabcd", W_ovov[:, :, j0:j1, :], t4_tmp[:, :bi, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mdje,miklbace->ijklabcd", W_ovov[:, :, j0:j1, :], t4_tmp[:, :bi, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + t4_spin_summation_inplace_(t4_tmp, nocc * blksize**3, nvir, "P4_201", 1.0, 0.0) + einsum("bmje,mikleacd->ijklabcd", W_voov[:, :, j0:j1, :], t4_tmp[:, :bi, :bk, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + + _unpack_t4_(mycc, t4, t4_tmp, 0, nocc, i0, i1, j0, j1, l0, l1, nocc, blksize, blksize, blksize) + einsum("mk,mijlcabd->ijklabcd", F_oo[:, k0:k1], t4_tmp[:, :bi, :bj, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("make,mijlcebd->ijklabcd", W_ovov[:, :, k0:k1, :], t4_tmp[:, :bi, :bj, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mbke,mijlcaed->ijklabcd", W_ovov[:, :, k0:k1, :], t4_tmp[:, :bi, :bj, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mdke,mijlcabe->ijklabcd", W_ovov[:, :, k0:k1, :], t4_tmp[:, :bi, :bj, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + t4_spin_summation_inplace_(t4_tmp, nocc * blksize**3, nvir, "P4_201", 1.0, 0.0) + einsum("cmke,mijleabd->ijklabcd", W_voov[:, :, k0:k1, :], t4_tmp[:, :bi, :bj, :bl], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + + _unpack_t4_(mycc, t4, t4_tmp, 0, nocc, i0, i1, j0, j1, k0, k1, nocc, blksize, blksize, blksize) + einsum("ml,mijkdabc->ijklabcd", F_oo[:, l0:l1], t4_tmp[:, :bi, :bj, :bk], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("male,mijkdebc->ijklabcd", W_ovov[:, :, l0:l1, :], t4_tmp[:, :bi, :bj, :bk], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mble,mijkdaec->ijklabcd", W_ovov[:, :, l0:l1, :], t4_tmp[:, :bi, :bj, :bk], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + einsum("mcle,mijkdabe->ijklabcd", W_ovov[:, :, l0:l1, :], t4_tmp[:, :bi, :bj, :bk], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-1.0, beta=1.0) + t4_spin_summation_inplace_(t4_tmp, nocc * blksize**3, nvir, "P4_201", 1.0, 0.0) + einsum("dmle,mijkeabc->ijklabcd", W_voov[:, :, l0:l1, :], t4_tmp[:, :bi, :bj, :bk], + out=r4_tmp[:bi, :bj, :bk, :bl], alpha=0.5, beta=1.0) + + _unpack_t4_triples_(mycc, t4, t4_tmp, 0, nocc, j0, j1, k0, k1, l0, l1, + nocc, blksize, blksize, blksize) + einsum("maie,mjklbecd->ijklabcd", W_ovov[:, :, i0:i1, :], + t4_tmp[:, :bj, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + _unpack_t4_triples_(mycc, t4, t4_tmp, 0, nocc, i0, i1, k0, k1, l0, l1, + nocc, blksize, blksize, blksize) + einsum("mbje,miklaecd->ijklabcd", W_ovov[:, :, j0:j1, :], + t4_tmp[:, :bi, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + _unpack_t4_triples_(mycc, t4, t4_tmp, 0, nocc, i0, i1, j0, j1, l0, l1, + nocc, blksize, blksize, blksize) + einsum("mcke,mijlaebd->ijklabcd", W_ovov[:, :, k0:k1, :], + t4_tmp[:, :bi, :bj, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) + _unpack_t4_triples_(mycc, t4, t4_tmp, 0, nocc, i0, i1, j0, j1, k0, k1, + nocc, blksize, blksize, blksize) + einsum("mdle,mijkaebc->ijklabcd", W_ovov[:, :, l0:l1, :], + t4_tmp[:, :bi, :bj, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=-0.5, beta=1.0) _accumulate_t4_(mycc, r4, r4_tmp, i0, i1, j0, j1, k0, k1, l0, l1, beta=1.0) time2 = log.timer_debug1('t4: iter: F_oo * t4, W_ovvo * t4, W_ovov * t4 [%3d, %3d]:'%(l0, l1), *time2) @@ -703,11 +696,11 @@ def compute_r4_tri(mycc, imds, t2, t3, t4): r4_tmp = None F_oo = imds.F_oo = None W_ovvo = imds.W_ovvo = None - W_ovov = imds.V_ovov = None + W_ovov = imds.W_ovov = None time1 = log.timer_debug1('t4: F_oo * t4, W_ovvo * t4, W_ovov * t4', *time1) time2 = logger.process_clock(), logger.perf_counter() - t4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) + t4_tmp = np.empty((blksize,) * 3 + (nocc,) + (nvir,) * 4, dtype=t4.dtype) r4_tmp = np.empty((blksize,) * 4 + (nvir,) * 4, dtype=t4.dtype) for l0, l1 in lib.prange(0, nocc, blksize): bl = l1 - l0 @@ -717,33 +710,30 @@ def compute_r4_tri(mycc, imds, t2, t3, t4): bj = j1 - j0 for i0, i1 in lib.prange(0, j1, blksize): bi = i1 - i0 - - r4_tmp[:] = 0.0 for m0, m1 in lib.prange(0, nocc, blksize): bm = m1 - m0 - for n0, n1 in lib.prange(0, nocc, blksize): - bn = n1 - n0 - - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, n0, n1, k0, k1, l0, l1) - einsum("mnij,mnklabcd->ijklabcd", W_oooo[m0:m1, n0:n1, i0:i1, j0:j1], - t4_tmp[:bm, :bn, :bk, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, n0, n1, j0, j1, l0, l1) - einsum("mnik,mnjlacbd->ijklabcd", W_oooo[m0:m1, n0:n1, i0:i1, k0:k1], - t4_tmp[:bm, :bn, :bj, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, n0, n1, j0, j1, k0, k1) - einsum("mnil,mnjkadbc->ijklabcd", W_oooo[m0:m1, n0:n1, i0:i1, l0:l1], - t4_tmp[:bm, :bn, :bj, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, n0, n1, i0, i1, l0, l1) - einsum("mnjk,mnilbcad->ijklabcd", W_oooo[m0:m1, n0:n1, j0:j1, k0:k1], - t4_tmp[:bm, :bn, :bi, :bl], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, n0, n1, i0, i1, k0, k1) - einsum("mnjl,mnikbdac->ijklabcd", W_oooo[m0:m1, n0:n1, j0:j1, l0:l1], - t4_tmp[:bm, :bn, :bi, :bk], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _unpack_t4_(mycc, t4, t4_tmp, m0, m1, n0, n1, i0, i1, j0, j1) - einsum("mnkl,mnijcdab->ijklabcd", W_oooo[m0:m1, n0:n1, k0:k1, l0:l1], - t4_tmp[:bm, :bn, :bi, :bj], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) - _accumulate_t4_(mycc, r4, r4_tmp, i0, i1, j0, j1, k0, k1, l0, l1, beta=1.0) + _unpack_t4_(mycc, t4, t4_tmp, k0, k1, l0, l1, m0, m1, 0, nocc, blksize, blksize, blksize, nocc) + einsum("mnij,klmncdab->ijklabcd", W_oooo[m0:m1, :, i0:i1, j0:j1], + t4_tmp[:bk, :bl, :bm], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=0.0) + _unpack_t4_(mycc, t4, t4_tmp, j0, j1, l0, l1, m0, m1, 0, nocc, blksize, blksize, blksize, nocc) + einsum("mnik,jlmnbdac->ijklabcd", W_oooo[m0:m1, :, i0:i1, k0:k1], + t4_tmp[:bj, :bl, :bm], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) + _unpack_t4_(mycc, t4, t4_tmp, j0, j1, k0, k1, m0, m1, 0, nocc, blksize, blksize, blksize, nocc) + einsum("mnil,jkmnbcad->ijklabcd", W_oooo[m0:m1, :, i0:i1, l0:l1], + t4_tmp[:bj, :bk, :bm], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) + _unpack_t4_(mycc, t4, t4_tmp, i0, i1, l0, l1, m0, m1, 0, nocc, blksize, blksize, blksize, nocc) + einsum("mnjk,ilmnadbc->ijklabcd", W_oooo[m0:m1, :, j0:j1, k0:k1], + t4_tmp[:bi, :bl, :bm], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) + _unpack_t4_(mycc, t4, t4_tmp, i0, i1, k0, k1, m0, m1, 0, nocc, blksize, blksize, blksize, nocc) + einsum("mnjl,ikmnacbd->ijklabcd", W_oooo[m0:m1, :, j0:j1, l0:l1], + t4_tmp[:bi, :bk, :bm], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) + _unpack_t4_(mycc, t4, t4_tmp, i0, i1, j0, j1, m0, m1, 0, nocc, blksize, blksize, blksize, nocc) + einsum("mnkl,ijmnabcd->ijklabcd", W_oooo[m0:m1, :, k0:k1, l0:l1], + t4_tmp[:bi, :bj, :bm], out=r4_tmp[:bi, :bj, :bk, :bl], alpha=1.0, beta=1.0) + + _accumulate_t4_(mycc, r4, r4_tmp, i0, i1, j0, j1, k0, k1, l0, l1, beta=1.0) + time2 = log.timer_debug1('t4: iter: W_oooo * t4 [%3d, %3d]:'%(l0, l1), *time2) t4_tmp = None r4_tmp = None @@ -751,7 +741,8 @@ def compute_r4_tri(mycc, imds, t2, t3, t4): time1 = log.timer_debug1('t4: W_oooo * t4', *time1) return r4 -def r4_tri_divide_e_(mycc, r4, mo_energy): +def r4_tri_divide_e_py_(mycc, r4, mo_energy): + # NOTE: For reference, not used in the actual code. nocc, nmo = mycc.nocc, mycc.nmo nvir = nmo - nocc blksize = mycc.blksize @@ -804,15 +795,9 @@ def update_amps_rccsdtq_tri_(mycc, tamps, eris): # symmetrization r2 += r2.transpose(1, 0, 3, 2) time1 = log.timer_debug1('t1t2: symmetrize r2', *time1) - # divide by eijkabc + # divide by eijab r1r2_divide_e_(mycc, r1, r2, mo_energy) time1 = log.timer_debug1('t1t2: divide r1 & r2 by eia & eijab', *time1) - - res_norm = [np.linalg.norm(r1), np.linalg.norm(r2)] - - t1 += r1 - t2 += r2 - time1 = log.timer_debug1('t1t2: update t1 & t2', *time1) time0 = log.timer_debug1('t1t2 total', *time0) # t3 @@ -830,12 +815,6 @@ def update_amps_rccsdtq_tri_(mycc, tamps, eris): # divide by eijkabc r3_divide_e_(mycc, r3, mo_energy) time1 = log.timer_debug1('t3: divide r3 by eijkabc', *time1) - - res_norm.append(np.linalg.norm(r3)) - - t3 += r3 - r3 = None - time1 = log.timer_debug1('t3: update t3', *time1) time0 = log.timer_debug1('t3 total', *time0) # t4 @@ -847,19 +826,22 @@ def update_amps_rccsdtq_tri_(mycc, tamps, eris): time1 = log.timer_debug1('t4: compute r4', *time1) # symmetrization symmetrize_tamps_tri_(r4, nocc) - t4_spin_summation_inplace_(r4, nocc4, nvir, "P4_full", -1.0 / 24.0, 1.0) + t4_project_1_minus_p4_p31_inplace_(r4, nocc4, nvir) purify_tamps_tri_(r4, nocc) time1 = log.timer_debug1('t4: symmetrize r4', *time1) # divide by eijkabc r4_tri_divide_e_(mycc, r4, mo_energy) time1 = log.timer_debug1('t4: divide r4 by eijklabcd', *time1) - res_norm.append(np.linalg.norm(r4)) + res_norm = [np.linalg.norm(r1), np.linalg.norm(r2), np.linalg.norm(r3), np.linalg.norm(r4)] - # t4 += r4 + t1 += r1 + t2 += r2 + t3 += r3 + # C implementation of t4 += r4 t4_add_(t4, r4, nocc4, nvir) - r4 = None - time1 = log.timer_debug1('t4: update t4', *time1) + r1, r2, r3, r4 = None, None, None, None + time1 = log.timer_debug1('t4: update t1, t2, t3, t4', *time1) time0 = log.timer_debug1('t4 total', *time0) return res_norm @@ -956,6 +938,7 @@ def dump_chk(mycc, tamps=None, frozen=None, mo_coeff=None, mo_occ=None): lib.chkfile.save(mycc.chkfile, 'rccsdtq', cc_chk) else: lib.chkfile.save(mycc.chkfile, 'rccsdtq_highm', cc_chk) + return mycc class RCCSDTQ(rccsdt.RCCSDT): @@ -987,7 +970,7 @@ class RCCSDTQ(rccsdt.RCCSDT): T amplitudes t1[i,a], t2[i,j,a,b], t3[i,j,k,a,b,c] t4 : An array of shape (compressed_occ, nvir, nvir, nvir, nvir) for T4 amplitudes. - The occupied-oribtal dimension is stored in a compressed form for the + The occupied-orbital dimension is stored in a compressed form for the i <= j <= k <= l index combinations. The compressed tensor can be expanded to the full tensor by self.tamps_tri2full(t4) tamps : @@ -1071,9 +1054,9 @@ def __init__(self): self.W_vooo = None self.W_vvvo = None self.W_vvvv = None - self.W_ovvvoo = None - self.W_ovvovo = None - self.W_vooooo = None + self.W_oovvvo = None + self.W_ovovvo = None + self.W_ooooov = None self.W_vvoooo = None self.W_vvvvoo = None diff --git a/pyscf/cc/rccsdtq_highm.py b/pyscf/cc/rccsdtq_highm.py index 0bcd01dde6..18aba0c11c 100644 --- a/pyscf/cc/rccsdtq_highm.py +++ b/pyscf/cc/rccsdtq_highm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,18 +27,16 @@ ''' import numpy as np -import numpy import functools import ctypes from pyscf import lib from pyscf.lib import logger -from pyscf.mp.mp2 import get_nocc, get_nmo, get_frozen_mask, get_e_hf, _mo_without_core -from pyscf.cc import _ccsd, rccsdtq +from pyscf.cc import rccsdtq from pyscf.cc.rccsdt import (_einsum, t3_spin_summation_inplace_, update_t1_fock_eris, intermediates_t1t2, compute_r1r2, r1r2_divide_e_, intermediates_t3, _PhysicistsERIs) from pyscf.cc.rccsdt_highm import (t3_spin_summation, t3_perm_symmetrize_inplace_, purify_tamps_, r1r2_add_t3_, intermediates_t3_add_t3, compute_r3, r3_divide_e_) -from pyscf.cc.rccsdtq import t4_spin_summation_inplace_, t4_add_, _IMDS +from pyscf.cc.rccsdtq import t4_project_1_minus_p4_p31_inplace_, t4_add_, _IMDS from pyscf import __config__ @@ -119,26 +117,26 @@ def intermediates_t4(mycc, imds, t2, t3, t4): einsum('me,mjab->abej', t1_fock[:nocc, nocc:], t2, out=W_vvvo, alpha=-1.0, beta=1.0) - W_ovvvoo = np.empty((nocc,) + (nvir,) * 3 + (nocc,) * 2, dtype=t2.dtype) - einsum('maef,jibf->mabeij', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovvvoo, alpha=2.0, beta=0.0) - einsum('mafe,jibf->mabeij', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovvvoo, alpha=-1.0, beta=1.0) - einsum('mnei,njab->mabeij', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_ovvvoo, alpha=-2.0, beta=1.0) - einsum('nmei,njab->mabeij', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_ovvvoo, alpha=1.0, beta=1.0) + W_oovvvo = np.empty((nocc,) * 2 + (nvir,) * 3 + (nocc,), dtype=t2.dtype) + einsum('maef,jibf->ijeabm', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_oovvvo, alpha=2.0, beta=0.0) + einsum('mafe,jibf->ijeabm', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_oovvvo, alpha=-1.0, beta=1.0) + einsum('mnei,njab->ijeabm', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_oovvvo, alpha=-2.0, beta=1.0) + einsum('nmei,njab->ijeabm', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_oovvvo, alpha=1.0, beta=1.0) c_t3 = np.empty_like(t3) t3_spin_summation(t3, c_t3, nocc**3, nvir, "P3_201", 1.0, 0.0) - einsum('nmfe,nijfab->mabeij', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_ovvvoo, alpha=0.5, beta=1.0) - einsum('mnfe,nijfab->mabeij', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_ovvvoo, alpha=-0.25, beta=1.0) + einsum('nmfe,nijfab->ijeabm', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_oovvvo, alpha=0.5, beta=1.0) + einsum('mnfe,nijfab->ijeabm', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t3, out=W_oovvvo, alpha=-0.25, beta=1.0) c_t3 = None - W_ovvovo = np.empty((nocc,) + (nvir,) * 2 + (nocc, nvir, nocc), dtype=t2.dtype) - einsum('mafe,jibf->mabiej', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovvovo, alpha=1.0, beta=0.0) - einsum('mnie,njab->mabiej', t1_eris[:nocc, :nocc, :nocc, nocc:], t2, out=W_ovvovo, alpha=-1.0, beta=1.0) - einsum('nmef,injfab->mabiej', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_ovvovo, alpha=-0.5, beta=1.0) + W_ovovvo = np.empty((nocc,) + (nvir,) + (nocc, nvir, nvir, nocc), dtype=t2.dtype) + einsum('mafe,jibf->iejabm', t1_eris[:nocc, nocc:, nocc:, nocc:], t2, out=W_ovovvo, alpha=1.0, beta=0.0) + einsum('mnie,njab->iejabm', t1_eris[:nocc, :nocc, :nocc, nocc:], t2, out=W_ovovvo, alpha=-1.0, beta=1.0) + einsum('nmef,injfab->iejabm', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_ovovvo, alpha=-0.5, beta=1.0) - W_vooooo = np.empty((nvir,) + (nocc,) * 5, dtype=t2.dtype) - einsum('mnek,ijae->amnijk', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_vooooo, alpha=1.0, beta=0.0) - einsum('mnef,ijkaef->amnijk', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_vooooo, alpha=0.5, beta=1.0) - W_vooooo += W_vooooo.transpose(0, 2, 1, 3, 5, 4) + W_ooooov = np.empty((nocc,) * 5 + (nvir,), dtype=t2.dtype) + einsum('mnek,ijae->kjinma', t1_eris[:nocc, :nocc, nocc:, :nocc], t2, out=W_ooooov, alpha=1.0, beta=0.0) + einsum('mnef,ijkaef->kjinma', t1_eris[:nocc, :nocc, nocc:, nocc:], t3, out=W_ooooov, alpha=0.5, beta=1.0) + W_ooooov += W_ooooov.transpose(1, 0, 2, 4, 3, 5) W_vvoooo = np.empty((nvir,) * 2 + (nocc,) * 4, dtype=t2.dtype) einsum('amef,ijkebf->abmijk', t1_eris[nocc:, :nocc, nocc:, nocc:], t3, out=W_vvoooo, alpha=1.0, beta=0.0) @@ -161,10 +159,10 @@ def intermediates_t4(mycc, imds, t2, t3, t4): einsum('mnef,nmjkfabc->abcejk', t1_eris[:nocc, :nocc, nocc:, nocc:], c_t4, out=W_vvvvoo, alpha=-0.5, beta=1.0) c_t4 = None - W_ovvvoo += W_ovvvoo.transpose(0, 2, 1, 3, 5, 4) + W_oovvvo += W_oovvvo.transpose(1, 0, 2, 4, 3, 5) W_vvoooo += W_vvoooo.transpose(1, 0, 2, 4, 3, 5) W_vvvvoo += W_vvvvoo.transpose(0, 2, 1, 3, 5, 4) - imds.W_ovvvoo, imds.W_ovvovo, imds.W_vooooo = W_ovvvoo, W_ovvovo, W_vooooo + imds.W_oovvvo, imds.W_ovovvo, imds.W_ooooov = W_oovvvo, W_ovovvo, W_ooooov imds.W_vvoooo, imds.W_vvvvoo = W_vvoooo, W_vvvvoo return imds @@ -183,7 +181,7 @@ def compute_r4(mycc, imds, t2, t3, t4): F_oo, F_vv = imds.F_oo, imds.F_vv W_oooo, W_ovvo, W_ovov = imds.W_oooo, imds.W_ovvo, imds.W_ovov W_vvvo, W_vooo, W_vvvv = imds.W_vvvo, imds.W_vooo, imds.W_vvvv - W_ovvvoo, W_ovvovo, W_vooooo = imds.W_ovvvoo, imds.W_ovvovo, imds.W_vooooo + W_oovvvo, W_ovovvo, W_ooooov = imds.W_oovvvo, imds.W_ovovvo, imds.W_ooooov W_vvoooo, W_vvvvoo = imds.W_vvoooo, imds.W_vvvvoo r4 = np.empty_like(t4) @@ -225,19 +223,19 @@ def compute_r4(mycc, imds, t2, t3, t4): c_t3 = np.empty_like(t3) t3_spin_summation(t3, c_t3, nocc**3, nvir, "P3_201", 1.0, 0.0) - einsum('mabeij,mklecd->ijklabcd', W_ovvvoo, c_t3, out=r4, alpha=0.125, beta=1.0) - W_ovvvoo = imds.W_ovvvoo = None + einsum('ijeabm,mklecd->ijklabcd', W_oovvvo, c_t3, out=r4, alpha=0.125, beta=1.0) + W_oovvvo = imds.W_oovvvo = None c_t3 = None - time1 = log.timer_debug1('t4: W_ovvvoo * c_t3', *time1) + time1 = log.timer_debug1('t4: W_oovvvo * c_t3', *time1) - einsum('mabiej,kmlecd->ijklabcd', W_ovvovo, t3, out=r4, alpha=-0.5, beta=1.0) - einsum('mcbiej,kmlead->ijklabcd', W_ovvovo, t3, out=r4, alpha=-1.0, beta=1.0) - W_ovvovo = imds.W_ovvovo = None - time1 = log.timer_debug1('t4: W_ovvovo * t3', *time1) + einsum('iejabm,kmlecd->ijklabcd', W_ovovvo, t3, out=r4, alpha=-0.5, beta=1.0) + einsum('iejcbm,kmlead->ijklabcd', W_ovovvo, t3, out=r4, alpha=-1.0, beta=1.0) + W_ovovvo = imds.W_ovovvo = None + time1 = log.timer_debug1('t4: W_ovovvo * t3', *time1) - einsum('amnijk,mnlbcd->ijklabcd', W_vooooo, t3, out=r4, alpha=0.5, beta=1.0) - W_vooooo = imds.W_vooooo = None - time1 = log.timer_debug1('t4: W_vooooo * t3', *time1) + einsum('kjinma,mnlbcd->ijklabcd', W_ooooov, t3, out=r4, alpha=0.5, beta=1.0) + W_ooooov = imds.W_ooooov = None + time1 = log.timer_debug1('t4: W_ooooov * t3', *time1) einsum('abmijk,mlcd->ijklabcd', W_vvoooo, t2, out=r4, alpha=-0.5, beta=1.0) W_vvoooo = imds.W_vvoooo = None @@ -266,7 +264,7 @@ def update_amps_rccsdtq_(mycc, tamps, eris): t1, t2, t3, t4 = tamps mo_energy = eris.mo_energy - imds = _IMDS + imds = _IMDS() # t1, t2 update_t1_fock_eris(mycc, imds, t1, eris) @@ -280,15 +278,9 @@ def update_amps_rccsdtq_(mycc, tamps, eris): # symmetrization r2 += r2.transpose(1, 0, 3, 2) time1 = log.timer_debug1('t1t2: symmetrize r2', *time1) - # divide by eijkabc + # divide by eijab r1r2_divide_e_(mycc, r1, r2, mo_energy) time1 = log.timer_debug1('t1t2: divide r1 & r2 by eia & eijab', *time1) - - res_norm = [np.linalg.norm(r1), np.linalg.norm(r2)] - - t1 += r1 - t2 += r2 - time1 = log.timer_debug1('t1t2: update t1 & t2', *time1) time0 = log.timer_debug1('t1t2 total', *time0) # t3 @@ -306,12 +298,6 @@ def update_amps_rccsdtq_(mycc, tamps, eris): # divide by eijkabc r3_divide_e_(mycc, r3, mo_energy) time1 = log.timer_debug1('t3: divide r3 by eijkabc', *time1) - - res_norm.append(np.linalg.norm(r3)) - - t3 += r3 - r3 = None - time1 = log.timer_debug1('t3: update t3', *time1) time0 = log.timer_debug1('t3 total', *time0) # t4 @@ -323,18 +309,22 @@ def update_amps_rccsdtq_(mycc, tamps, eris): time1 = log.timer_debug1('t4: compute r4', *time1) # symmetrization t4_perm_symmetrize_inplace_(r4, nocc, nvir, 1.0, 0.0) - t4_spin_summation_inplace_(r4, nocc**4, nvir, "P4_full", -1.0 / 24.0, 1.0) + t4_project_1_minus_p4_p31_inplace_(r4, nocc**4, nvir) purify_tamps_(r4) time1 = log.timer_debug1('t4: symmetrize r4', *time1) # divide by eijkabc r4_divide_e_(mycc, r4, mo_energy) time1 = log.timer_debug1('t4: divide r4 by eijklabcd', *time1) - res_norm.append(np.linalg.norm(r4)) + res_norm = [np.linalg.norm(r1), np.linalg.norm(r2), np.linalg.norm(r3), np.linalg.norm(r4)] + t1 += r1 + t2 += r2 + t3 += r3 + # C implementation of t4 += r4 t4_add_(t4, r4, nocc**4, nvir) - r4 = None - time1 = log.timer_debug1('t4: update t4', *time1) + r1, r2, r3, r4 = None, None, None, None + time1 = log.timer_debug1('t4: update t1, t2, t3, t4', *time1) time0 = log.timer_debug1('t4 total', *time0) return res_norm diff --git a/pyscf/cc/test/test_ccsd_lambda.py b/pyscf/cc/test/test_ccsd_lambda.py index 3bbc7d0ebd..831185325c 100644 --- a/pyscf/cc/test/test_ccsd_lambda.py +++ b/pyscf/cc/test/test_ccsd_lambda.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile import unittest import numpy from functools import reduce @@ -112,7 +111,7 @@ def test_ccsd(self): self.assertAlmostEqual(numpy.dot(numpy.sin(l2new.flatten()), numpy.arange(35**2)), 507.656936701192, 8) def test_restart(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = mycc.copy() cc1.max_cycle = 5 cc1.solve_lambda() diff --git a/pyscf/cc/test/test_gccsd.py b/pyscf/cc/test/test_gccsd.py index ed248645d1..bc021fd8e2 100644 --- a/pyscf/cc/test/test_gccsd.py +++ b/pyscf/cc/test/test_gccsd.py @@ -505,6 +505,36 @@ def test_mbpt2(self): emp2 = mp.MP2(mf).kernel()[0] self.assertAlmostEqual(e, emp2, 9) + def test_complex_orbitals(self): + mol = gto.M(atom=''' + O 0. 0. 0. + H 0. -0.757 0.587 + H 0. 0.757 0.587''', + basis='6-31g*') + mf = mol.RHF().run() + cc = mf.CCSD().run() + nr_ref = cc.ecc + + mf = mol.GHF() + dm = mf.get_init_guess() + 0j + nao = mol.nao + # Mixing alpha and beta spins + dm[nao:,:nao] = .02j + dm[:nao,nao:] = -.02j + mf.kernel(dm0=dm) + # test eris_incore + cc = mf.CCSD().run() + self.assertAlmostEqual(cc.ecc, nr_ref, 6) + + # test eris_outcore + mf._eri = None + cc = mf.CCSD().run() + self.assertAlmostEqual(cc.ecc, nr_ref, 6) + + # With SOC, correlation energy is slightly different from NR ref value + mf = mf.x2c().run() + cc = mf.CCSD().run() + self.assertAlmostEqual(cc.ecc, -0.19527045, 6) if __name__ == "__main__": print("Tests for GCCSD") diff --git a/pyscf/cc/test/test_rccsd.py b/pyscf/cc/test/test_rccsd.py index ff77224dae..d53cfb2217 100644 --- a/pyscf/cc/test/test_rccsd.py +++ b/pyscf/cc/test/test_rccsd.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile from functools import reduce import unittest import copy @@ -43,8 +42,8 @@ def setUpModule(): mol.basis = '631g' mol.build() mf = scf.RHF(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.conv_tol_grad = 1e-8 + mf.chkfile = lib.NamedTemporaryFile().name mf.kernel() mycc = rccsd.RCCSD(mf) @@ -152,7 +151,7 @@ def test_no_diis(self): self.assertAlmostEqual(cc1.e_corr, -0.13516622806104395, 7) def test_restart(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.CCSD(mf) cc1.max_cycle = 5 cc1.kernel() diff --git a/pyscf/cc/test/test_rccsd_lambda.py b/pyscf/cc/test/test_rccsd_lambda.py index ae46eb507e..12945144f3 100644 --- a/pyscf/cc/test/test_rccsd_lambda.py +++ b/pyscf/cc/test/test_rccsd_lambda.py @@ -262,7 +262,7 @@ def test_rdm_trace(self): +numpy.einsum('pkkq->pq', eri0[:nocc,:nocc,:nocc,:nocc]).trace()) self.assertAlmostEqual(e2, -794721.197459942, 8) self.assertAlmostEqual(numpy.einsum('pqrs,pqrs', dm2, eri0)*.5 + - numpy.einsum('pq,qp', dm1, h1), e2, 9) + numpy.einsum('pq,qp', dm1, h1), e2, 8) self.assertAlmostEqual(abs(dm2-dm2.transpose(1,0,3,2)).max(), 0, 9) self.assertAlmostEqual(abs(dm2-dm2.transpose(2,3,0,1)).max(), 0, 9) diff --git a/pyscf/cc/test/test_rccsdt.py b/pyscf/cc/test/test_rccsdt.py index 90fc4ef609..83e5f53f10 100644 --- a/pyscf/cc/test/test_rccsdt.py +++ b/pyscf/cc/test/test_rccsdt.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile from functools import reduce import unittest import copy @@ -41,8 +40,8 @@ def setUpModule(): mol.basis = '631g' mol.build() mf = scf.RHF(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.conv_tol_grad = 1e-8 + mf.chkfile = lib.NamedTemporaryFile().name mf.kernel() mycc = rccsdt.RCCSDT(mf) @@ -135,10 +134,10 @@ def test_no_diis(self): cc1.diis = False cc1.max_cycle = 4 cc1.kernel() - self.assertAlmostEqual(cc1.e_corr, -0.1362172678103062, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13620561873465928, 7) def test_restart(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.RCCSDT(mf) cc1.max_cycle = 5 cc1.kernel() @@ -149,7 +148,7 @@ def test_restart(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.13618790413398396, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13601543222004697, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0] - cc1.t1).max(), 0, 9) @@ -160,7 +159,7 @@ def test_restart(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.13636637468987364, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13632994594327189, 7) cc1.diis = adiis cc1.max_cycle = 2 diff --git a/pyscf/cc/test/test_rccsdt_highm.py b/pyscf/cc/test/test_rccsdt_highm.py index 6232b636e1..aad955ea84 100644 --- a/pyscf/cc/test/test_rccsdt_highm.py +++ b/pyscf/cc/test/test_rccsdt_highm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile from functools import reduce import unittest import copy @@ -42,8 +41,8 @@ def setUpModule(): mol.basis = '631g' mol.build() mf = scf.RHF(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.conv_tol_grad = 1e-8 + mf.chkfile = lib.NamedTemporaryFile().name mf.kernel() mycc = rccsdt_highm.RCCSDT(mf) @@ -92,10 +91,10 @@ def test_no_diis(self): cc1.diis = False cc1.max_cycle = 4 cc1.kernel() - self.assertAlmostEqual(cc1.e_corr, -0.1362172678103062, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13620561873465487, 7) def test_restart(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.RCCSDT(mf, compact_tamps=False) cc1.max_cycle = 5 cc1.kernel() @@ -106,7 +105,7 @@ def test_restart(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.13618790413398396, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13601543222004753, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0] - cc1.t1).max(), 0, 9) @@ -117,7 +116,7 @@ def test_restart(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.13636637468987364, 7) + self.assertAlmostEqual(cc1.e_corr, -0.1363299459432733, 7) cc1.diis = adiis cc1.max_cycle = 2 diff --git a/pyscf/cc/test/test_rccsdt_q.py b/pyscf/cc/test/test_rccsdt_q.py new file mode 100644 index 0000000000..9dd4427f3d --- /dev/null +++ b/pyscf/cc/test/test_rccsdt_q.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy +from functools import reduce + +from pyscf import gto, scf, lib, symm +from pyscf import cc +from pyscf import ao2mo +from pyscf.cc import rccsdt_q + + +def setUpModule(): + global mol, rhf, mcc, mcc2 + mol = gto.Mole() + mol.atom = [ + [8 , (0. , 0. , 0.)], + [1 , (0. , -.757 , .487)], + [1 , (0. , .757 , .687)]] + mol.symmetry = True + mol.verbose = 7 + mol.output = '/dev/null' + mol.basis = 'ccpvdz' + mol.build() + rhf = scf.RHF(mol) + rhf.conv_tol = 1e-14 + rhf.scf() + + mcc = cc.CCSDT(rhf, compact_tamps=True) + mcc.conv_tol = 1e-10 + mcc.blksize = 2 + mcc.blksize_oooo = 2 + mcc.blksize_oovv = 2 + mcc.ccsdt() + + mcc2 = cc.CCSDT(rhf, compact_tamps=False) + mcc2.conv_tol = 1e-10 + mcc2.ccsdt() + +def tearDownModule(): + global mol, rhf, mcc, mcc2 + mol.stdout.close() + del mol, rhf, mcc, mcc2 + +class KnownValues(unittest.TestCase): + def test_rccsdt_q(self): + e_q_bracket, e_q_paren = mcc.ccsdt_q() + self.assertAlmostEqual(e_q_bracket, -0.00044374834015582527, 9) + self.assertAlmostEqual(e_q_paren, -0.0004917163848923114, 9) + e_q_bracket2, e_q_paren2 = mcc2.ccsdt_q() + self.assertAlmostEqual(e_q_bracket2, -0.00044374834015582527, 9) + self.assertAlmostEqual(e_q_paren2, -0.0004917163848923114, 9) + + def test_random(self): + mol = gto.M() + numpy.random.seed(42) + nocc, nvir = 5, 9 + nmo = nocc + nvir + + eris = cc.rccsdt._PhysicistsERIs() + eri1 = numpy.random.random((nmo, nmo, nmo, nmo)) - .5 + eri1 = eri1 + eri1.transpose(2, 1, 0, 3) + eri1 = eri1 + eri1.transpose(0, 3, 2, 1) + eri1 = eri1 + eri1.transpose(1, 0, 3, 2) + eri1 *= .1 + eris.pppp = eri1 + f = numpy.random.random((nmo, nmo)) * .1 + eris.fock = f + f.T + numpy.diag(numpy.arange(nmo)) + eris.mo_energy = eris.fock.diagonal() + + t1 = numpy.random.random((nocc, nvir)) * .1 + t2 = numpy.random.random((nocc, nocc, nvir, nvir)) * .1 + t2 = t2 + t2.transpose(1, 0, 3, 2) + t3_full = numpy.random.random((nocc, nocc, nocc, nvir, nvir, nvir)) * .1 + t3_full = t3_full + t3_full.transpose(1, 0, 2, 4, 3, 5) + t3_full.transpose(2, 1, 0, 5, 4, 3) + t3_full = t3_full + t3_full.transpose(0, 2, 1, 3, 5, 4) + mf = scf.RHF(mol) + mycc = cc.CCSDT(mf, compact_tamps=False) + mycc.incore_complete = True + mycc.mo_energy = mycc._scf.mo_energy = numpy.arange(0., nocc + nvir) + e_q_bracket, e_q_paren = rccsdt_q.kernel(mycc, eris, (t1, t2, t3_full)) + self.assertAlmostEqual(e_q_bracket, -1.1359579193293403, 9) + self.assertAlmostEqual(e_q_paren, -256.1325101409764, 9) + + idx_i, idx_j, idx_k = numpy.meshgrid(numpy.arange(nocc), numpy.arange(nocc), numpy.arange(nocc), indexing='ij') + t3_tri = t3_full[(idx_i <= idx_j) & (idx_j <= idx_k)].reshape(-1, nvir, nvir, nvir) + mycc2 = cc.CCSDT(mf, compact_tamps=True) + mycc2.incore_complete = True + mycc2.mo_energy = mycc2._scf.mo_energy = numpy.arange(0., nocc + nvir) + mycc2.nocc, mycc2.nmo = nocc, nmo + e_q_bracket2, e_q_paren2 = rccsdt_q.kernel(mycc2, eris, (t1, t2, t3_tri)) + self.assertAlmostEqual(e_q_bracket2, -1.1359579193293403, 9) + self.assertAlmostEqual(e_q_paren2, -256.1325101409764, 9) + +if __name__ == "__main__": + print("Full Tests for RCCSDT(Q)") + unittest.main() diff --git a/pyscf/cc/test/test_rccsdtq.py b/pyscf/cc/test/test_rccsdtq.py index 361a722dc2..e994241657 100644 --- a/pyscf/cc/test/test_rccsdtq.py +++ b/pyscf/cc/test/test_rccsdtq.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile from functools import reduce import unittest import copy @@ -42,8 +41,8 @@ def setUpModule(): mol.basis = 'sto3g' mol.build() mf = scf.RHF(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.conv_tol_grad = 1e-8 + mf.chkfile = lib.NamedTemporaryFile().name mf.kernel() mycc = rccsdtq.RCCSDTQ(mf) @@ -120,10 +119,10 @@ def test_no_diis(self): cc1.diis = False cc1.max_cycle = 4 cc1.kernel() - self.assertAlmostEqual(cc1.e_corr, -0.04931187059105583, 7) + self.assertAlmostEqual(cc1.e_corr, -0.049309044956853954, 7) def test_restart(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.RCCSDTQ(mf) cc1.max_cycle = 5 cc1.kernel() @@ -134,7 +133,7 @@ def test_restart(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.04958018529884438, 7) + self.assertAlmostEqual(cc1.e_corr, -0.04957847496659795, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0] - cc1.t1).max(), 0, 9) @@ -146,7 +145,7 @@ def test_restart(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.04956154962282544, 7) + self.assertAlmostEqual(cc1.e_corr, -0.04956142543268752, 7) cc1.diis = adiis cc1.max_cycle = 2 diff --git a/pyscf/cc/test/test_rccsdtq_highm.py b/pyscf/cc/test/test_rccsdtq_highm.py index 5761f5c69c..a768e4548c 100644 --- a/pyscf/cc/test/test_rccsdtq_highm.py +++ b/pyscf/cc/test/test_rccsdtq_highm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile from functools import reduce import unittest import copy @@ -42,8 +41,8 @@ def setUpModule(): mol.basis = 'sto3g' mol.build() mf = scf.RHF(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.conv_tol_grad = 1e-8 + mf.chkfile = lib.NamedTemporaryFile().name mf.kernel() mycc = rccsdtq_highm.RCCSDTQ(mf) @@ -88,10 +87,10 @@ def test_no_diis(self): cc1.diis = False cc1.max_cycle = 4 cc1.kernel() - self.assertAlmostEqual(cc1.e_corr, -0.04931187059105583, 7) + self.assertAlmostEqual(cc1.e_corr, -0.04930904495685323, 7) def test_restart(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.RCCSDTQ(mf, compact_tamps=False) cc1.max_cycle = 5 cc1.kernel() @@ -102,7 +101,7 @@ def test_restart(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.04958018529884438, 7) + self.assertAlmostEqual(cc1.e_corr, -0.04957847496659781, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0] - cc1.t1).max(), 0, 9) @@ -114,7 +113,7 @@ def test_restart(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.04956154962282544, 7) + self.assertAlmostEqual(cc1.e_corr, -0.04956142543268758, 7) cc1.diis = adiis cc1.max_cycle = 2 diff --git a/pyscf/cc/test/test_uccsdt.py b/pyscf/cc/test/test_uccsdt.py index f68bc63fdc..eaeb66b699 100644 --- a/pyscf/cc/test/test_uccsdt.py +++ b/pyscf/cc/test/test_uccsdt.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile import unittest import copy import numpy @@ -185,7 +184,7 @@ def test_with_df_s2(self): self.assertAlmostEqual(mycc.e_tot, -75.83479685448731, 8) def test_restart_s0(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.UCCSDT(mf) cc1.max_cycle = 5 cc1.kernel() @@ -196,7 +195,7 @@ def test_restart_s0(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.13617537767875998, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13598921953216506, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0][0] - cc1.t1[0]).max(), 0, 9) @@ -213,7 +212,7 @@ def test_restart_s0(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.13636112399459543, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13631662652255083, 7) cc1.diis = adiis cc1.max_cycle = 2 @@ -233,7 +232,7 @@ def test_restart_s0(self): self.assertAlmostEqual(abs(cc1.t3[3] - cc2.t3[3]).max(), 0, 9) def test_restart_s2(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.UCCSDT(mf_s2) cc1.max_cycle = 5 cc1.kernel() @@ -244,7 +243,7 @@ def test_restart_s2(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.10899528342067309, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10890900976962495, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0][0] - cc1.t1[0]).max(), 0, 9) @@ -261,7 +260,7 @@ def test_restart_s2(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.10909663534556953, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10908025852894825, 7) cc1.diis = adiis cc1.max_cycle = 2 @@ -281,7 +280,7 @@ def test_restart_s2(self): self.assertAlmostEqual(abs(cc1.t3[3] - cc2.t3[3]).max(), 0, 9) def test_restart_s2_not_do_diis_max_t(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.UCCSDT(mf_s2) cc1.max_cycle = 5 cc1.do_diis_max_t = False @@ -293,7 +292,7 @@ def test_restart_s2_not_do_diis_max_t(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.10900065442286336, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10890253107679486, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) tamps.append(cc1.tamps[2]) @@ -307,7 +306,7 @@ def test_restart_s2_not_do_diis_max_t(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.10907414414270558, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10903201331931782, 7) cc1.diis = adiis cc1.max_cycle = 2 diff --git a/pyscf/cc/test/test_uccsdt_highm.py b/pyscf/cc/test/test_uccsdt_highm.py index 8399638510..acb6c2bd80 100644 --- a/pyscf/cc/test/test_uccsdt_highm.py +++ b/pyscf/cc/test/test_uccsdt_highm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile import unittest import copy import numpy @@ -85,7 +84,7 @@ def test_with_df_s2(self): self.assertAlmostEqual(mycc.e_tot, -75.83479685448731, 8) def test_restart_s0(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.UCCSDT(mf, compact_tamps=False) cc1.max_cycle = 5 cc1.kernel() @@ -96,7 +95,7 @@ def test_restart_s0(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.13617537767875998, 7) + self.assertAlmostEqual(cc1.e_corr, -0.13598921953216658, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0][0] - cc1.t1[0]).max(), 0, 9) @@ -121,7 +120,7 @@ def test_restart_s0(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.13636112399459543, 7) + self.assertAlmostEqual(cc1.e_corr, -0.1363166265225506, 7) cc1.diis = adiis cc1.max_cycle = 2 @@ -141,7 +140,7 @@ def test_restart_s0(self): self.assertAlmostEqual(abs(cc1.t3[3] - cc2.t3[3]).max(), 0, 9) def test_restart_s2(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.UCCSDT(mf_s2, compact_tamps=False) cc1.max_cycle = 5 cc1.kernel() @@ -152,7 +151,7 @@ def test_restart_s2(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.10899528342067309, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10890900976962473, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) self.assertAlmostEqual(abs(tamps[0][0] - cc1.t1[0]).max(), 0, 9) @@ -169,7 +168,7 @@ def test_restart_s2(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.10909663534556953, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10908025852894809, 7) cc1.diis = adiis cc1.max_cycle = 2 @@ -189,7 +188,7 @@ def test_restart_s2(self): self.assertAlmostEqual(abs(cc1.t3[3] - cc2.t3[3]).max(), 0, 9) def test_restart_s2_not_do_diis_max_t(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() cc1 = cc.UCCSDT(mf_s2, compact_tamps=False) cc1.max_cycle = 5 cc1.do_diis_max_t = False @@ -201,7 +200,7 @@ def test_restart_s2_not_do_diis_max_t(self): cc1.diis = adiis cc1.max_cycle = 3 cc1.kernel(tamps=None) - self.assertAlmostEqual(cc1.e_corr, -0.10900065442286336, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10890253107679505, 7) tamps = cc1.vector_to_amplitudes(adiis.extrapolate()) tamps.append(cc1.tamps[2]) @@ -215,7 +214,7 @@ def test_restart_s2_not_do_diis_max_t(self): import copy tmp_tamps = copy.deepcopy(tamps) cc1.kernel(tmp_tamps) - self.assertAlmostEqual(cc1.e_corr, -0.10907414414270558, 7) + self.assertAlmostEqual(cc1.e_corr, -0.10903201331931785, 7) cc1.diis = adiis cc1.max_cycle = 2 diff --git a/pyscf/cc/uccsdt.py b/pyscf/cc/uccsdt.py index 76c052b05b..0e24ffe6aa 100644 --- a/pyscf/cc/uccsdt.py +++ b/pyscf/cc/uccsdt.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -450,7 +450,7 @@ def energy_uhf(mycc, tamps, eris=None): eos += lib.einsum('ia,JB,iJaB', t1a, t1b, eris.pPpP[:nocca, :noccb, nocca:, noccb:]) if abs((ess + eos).imag) > 1e-4: - logger.warn(mycc, 'Non-zero imaginary part found in %s energy %s', mycc.__class__.name, ess + eos) + logger.warn(mycc, 'Non-zero imaginary part found in %s energy %s', mycc.__class__.__name__, ess + eos) mycc.e_corr = lib.tag_array((ess + eos).real, e_corr_ss=ess.real, e_corr_os=eos.real) return mycc.e_corr.real @@ -553,12 +553,12 @@ def intermediates_t1t2_uhf(mycc, imds, t2): einsum('kldc,jdlc->kj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=F_oo, alpha=1.0, beta=1.0) W_oooo = t1_erisaa[:nocca, :nocca, :nocca, :nocca].copy() einsum('klcd,ijcd->klij', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=W_oooo, alpha=0.5, beta=1.0) - W_ovvo = t1_erisaa[:nocca, nocca:, nocca:, :nocca].copy() - einsum('klcd,jlbd->kbcj', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=W_ovvo, alpha=0.5, beta=1.0) - einsum('klcd,jbld->kbcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_ovvo, alpha=0.5, beta=1.0) - W_OvVo = t1_erisab[nocca:, :noccb, :nocca, noccb:].transpose(1, 0, 3, 2).copy() - einsum('klcd,jbld->kbcj', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2ab, out=W_OvVo, alpha=0.5, beta=1.0) - einsum('lkdc,jlbd->kbcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2aa, out=W_OvVo, alpha=0.5, beta=1.0) + W_voov = t1_erisaa[nocca:, :nocca, :nocca, nocca:].copy() + einsum('klcd,jlbd->bkjc', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=W_voov, alpha=0.5, beta=1.0) + einsum('klcd,jbld->bkjc', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_voov, alpha=0.5, beta=1.0) + W_vOoV = t1_erisab[nocca:, :noccb, :nocca, noccb:].copy() + einsum('klcd,jbld->bkjc', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2ab, out=W_vOoV, alpha=0.5, beta=1.0) + einsum('lkdc,jlbd->bkjc', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2aa, out=W_vOoV, alpha=0.5, beta=1.0) F_VV = t1_fockb[noccb:, noccb:].copy() einsum('klcd,klbd->bc', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=F_VV, alpha=-0.5, beta=1.0) @@ -568,36 +568,31 @@ def intermediates_t1t2_uhf(mycc, imds, t2): einsum('lkcd,lcjd->kj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=F_OO, alpha=1.0, beta=1.0) W_OOOO = t1_erisbb[:noccb, :noccb, :noccb, :noccb].copy() einsum('klcd,ijcd->klij', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=W_OOOO, alpha=0.5, beta=1.0) - W_OVVO = t1_erisbb[:noccb, noccb:, noccb:, :noccb].copy() - einsum('klcd,jlbd->kbcj', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=W_OVVO, alpha=0.5, beta=1.0) - einsum('lkdc,ldjb->kbcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_OVVO, alpha=0.5, beta=1.0) + W_VOOV = t1_erisbb[noccb:, :noccb, :noccb, noccb:].copy() + einsum('klcd,jlbd->bkjc', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=W_VOOV, alpha=0.5, beta=1.0) + einsum('lkdc,ldjb->bkjc', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_VOOV, alpha=0.5, beta=1.0) + W_oVvO = t1_erisab[:nocca, noccb:, nocca:, :noccb].copy() einsum('klcd,ldjb->kbcj', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2ab, out=W_oVvO, alpha=0.5, beta=1.0) einsum('klcd,jlbd->kbcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2bb, out=W_oVvO, alpha=0.5, beta=1.0) - W_oOoO = t1_erisab[:nocca, :noccb, :nocca, :noccb].copy() einsum('klcd,icjd->klij', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_oOoO, alpha=1.0, beta=1.0) - W_vOvO = - t1_erisab[nocca:, :noccb, nocca:, :noccb] - einsum('lkcd,lajd->akcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_vOvO, alpha=0.5, beta=1.0) - W_VoVo = - t1_erisab[:nocca, noccb:, :nocca, noccb:].transpose(1, 0, 3, 2) - einsum('kldc,idlb->bkci', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_VoVo, alpha=0.5, beta=1.0) - W_vovo = - t1_erisaa[nocca:, :nocca, nocca:, :nocca] - einsum('klcd,lida->akci', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=W_vovo, alpha=0.5, beta=1.0) - einsum('klcd,iald->akci', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_vovo, alpha=0.5, beta=1.0) - W_VOVO = - t1_erisbb[noccb:, :noccb, noccb:, :noccb] - einsum('klcd,ljdb->bkcj', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=W_VOVO, alpha=0.5, beta=1.0) - einsum('lkdc,ldjb->bkcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_VOVO, alpha=0.5, beta=1.0) - W_vOVo = t1_erisab[nocca:, :noccb, :nocca, noccb:].transpose(0, 1, 3, 2).copy() - einsum('lkdc,ilad->akci', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2aa, out=W_vOVo, alpha=0.5, beta=1.0) - einsum('lkdc,iald->akci', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2ab, out=W_vOVo, alpha=0.5, beta=1.0) - W_VovO = t1_erisab[:nocca, noccb:, nocca:, :noccb].transpose(1, 0, 2, 3).copy() - einsum('klcd,ljdb->bkcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2bb, out=W_VovO, alpha=0.5, beta=1.0) - einsum('lkdc,ldjb->bkcj', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2ab, out=W_VovO, alpha=0.5, beta=1.0) + W_vOvO = t1_erisab[nocca:, :noccb, nocca:, :noccb].copy() + einsum('lkcd,lajd->akcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_vOvO, alpha=-0.5, beta=1.0) + W_oVoV = t1_erisab[:nocca, noccb:, :nocca, noccb:].copy() + einsum('kldc,idlb->kbic', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_oVoV, alpha=-0.5, beta=1.0) + W_vovo = t1_erisaa[nocca:, :nocca, nocca:, :nocca].copy() + einsum('klcd,lida->akci', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=W_vovo, alpha=-0.5, beta=1.0) + einsum('klcd,iald->akci', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_vovo, alpha=-0.5, beta=1.0) + W_VOVO = t1_erisbb[noccb:, :noccb, noccb:, :noccb].copy() + einsum('klcd,ljdb->bkcj', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=W_VOVO, alpha=-0.5, beta=1.0) + einsum('lkdc,ldjb->bkcj', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_VOVO, alpha=-0.5, beta=1.0) + imds.F_oo, imds.F_OO, imds.F_vv, imds.F_VV = F_oo, F_OO, F_vv, F_VV imds.W_oooo, imds.W_oOoO, imds.W_OOOO = W_oooo, W_oOoO, W_OOOO - imds.W_ovvo, imds.W_oVvO, imds.W_OvVo, imds.W_OVVO = W_ovvo, W_oVvO, W_OvVo, W_OVVO, - imds.W_vovo, imds.W_vOvO, imds.W_vOVo = W_vovo, W_vOvO, W_vOVo - imds.W_VovO, imds.W_VoVo, imds.W_VOVO = W_VovO, W_VoVo, W_VOVO + imds.W_voov, imds.W_oVvO, imds.W_VOOV = W_voov, W_oVvO, W_VOOV + imds.W_vovo, imds.W_vOvO, imds.W_vOoV = W_vovo, W_vOvO, W_vOoV + imds.W_oVoV, imds.W_VOVO = W_oVoV, W_VOVO return imds def compute_r1r2_uhf(mycc, imds, t2): @@ -613,9 +608,9 @@ def compute_r1r2_uhf(mycc, imds, t2): F_oo, F_OO, F_vv, F_VV = imds.F_oo, imds.F_OO, imds.F_vv, imds.F_VV W_oooo, W_oOoO, W_OOOO = imds.W_oooo, imds.W_oOoO, imds.W_OOOO - W_ovvo, W_oVvO, W_OvVo, W_OVVO = imds.W_ovvo, imds.W_oVvO, imds.W_OvVo, imds.W_OVVO - W_vovo, W_vOvO, W_vOVo = imds.W_vovo, imds.W_vOvO, imds.W_vOVo - W_VovO, W_VoVo, W_VOVO = imds.W_VovO, imds.W_VoVo, imds.W_VOVO + W_voov, W_oVvO, W_VOOV = imds.W_voov, imds.W_oVvO, imds.W_VOOV + W_vovo, W_vOvO, W_vOoV = imds.W_vovo, imds.W_vOvO, imds.W_vOoV + W_oVoV, W_VOVO = imds.W_oVoV, imds.W_VOVO r1a = t1_focka[nocca:, :nocca].T.copy() einsum('kc,ikac->ia', t1_focka[:nocca, nocca:], t2aa, out=r1a, alpha=1.0, beta=1.0) @@ -638,10 +633,8 @@ def compute_r1r2_uhf(mycc, imds, t2): einsum("kj,ikab->ijab", F_oo, t2aa, out=r2aa, alpha=-0.5, beta=1.0) einsum("abcd,ijcd->ijab", t1_erisaa[nocca:, nocca:, nocca:, nocca:], t2aa, out=r2aa, alpha=0.125, beta=1.0) einsum("klij,klab->ijab", W_oooo, t2aa, out=r2aa, alpha=0.125, beta=1.0) - einsum("kbcj,ikac->ijab", W_ovvo, t2aa, out=r2aa, alpha=1.0, beta=1.0) - einsum("kbcj,iakc->ijab", W_OvVo, t2ab, out=r2aa, alpha=1.0, beta=1.0) - W_ovvo = imds.W_ovvo = None - W_OvVo = imds.W_OvVo = None + einsum("bkjc,ikac->ijab", W_voov, t2aa, out=r2aa, alpha=1.0, beta=1.0) + einsum("bkjc,iakc->ijab", W_vOoV, t2ab, out=r2aa, alpha=1.0, beta=1.0) r2ab = t1_erisab[nocca:, noccb:, :nocca, :noccb].transpose(2, 3, 0, 1).copy() r2ab = r2ab.transpose(0, 2, 1, 3) @@ -651,17 +644,13 @@ def compute_r1r2_uhf(mycc, imds, t2): einsum("ki,kajb->iajb", F_oo, t2ab, out=r2ab, alpha=-1.0, beta=1.0) einsum("abcd,icjd->iajb", t1_erisab[nocca:, noccb:, nocca:, noccb:], t2ab, out=r2ab, alpha=1.0, beta=1.0) einsum("klij,kalb->iajb", W_oOoO, t2ab, out=r2ab, alpha=1.0, beta=1.0) - einsum("akcj,ickb->iajb", W_vOvO, t2ab, out=r2ab, alpha=1.0, beta=1.0) - einsum("akci,kcjb->iajb", W_vovo, t2ab, out=r2ab, alpha=1.0, beta=1.0) - einsum("akci,kjcb->iajb", W_vOVo, t2bb, out=r2ab, alpha=1.0, beta=1.0) - einsum("bkcj,ikac->iajb", W_VovO, t2aa, out=r2ab, alpha=1.0, beta=1.0) - einsum("bkcj,iakc->iajb", W_VOVO, t2ab, out=r2ab, alpha=1.0, beta=1.0) - einsum("bkci,kajc->iajb", W_VoVo, t2ab, out=r2ab, alpha=1.0, beta=1.0) + einsum("akcj,ickb->iajb", W_vOvO, t2ab, out=r2ab, alpha=-1.0, beta=1.0) + einsum("akci,kcjb->iajb", W_vovo, t2ab, out=r2ab, alpha=-1.0, beta=1.0) + einsum("akic,kjcb->iajb", W_vOoV, t2bb, out=r2ab, alpha=1.0, beta=1.0) + einsum("kbcj,ikac->iajb", W_oVvO, t2aa, out=r2ab, alpha=1.0, beta=1.0) + einsum("bkcj,iakc->iajb", W_VOVO, t2ab, out=r2ab, alpha=-1.0, beta=1.0) + einsum("kbic,kajc->iajb", W_oVoV, t2ab, out=r2ab, alpha=-1.0, beta=1.0) W_vovo = imds.W_vovo = None - W_vOvO = imds.W_vOvO = None - W_vOVo = imds.W_vOVo = None - W_VovO = imds.W_VovO = None - W_VoVo = imds.W_VoVo = None W_VOVO = imds.W_VOVO = None r2bb = 0.25 * t1_erisbb[noccb:, noccb:, :noccb, :noccb].T @@ -669,10 +658,8 @@ def compute_r1r2_uhf(mycc, imds, t2): einsum("kj,ikab->ijab", F_OO, t2bb, out=r2bb, alpha=-0.5, beta=1.0) einsum("abcd,ijcd->ijab", t1_erisbb[noccb:, noccb:, noccb:, noccb:], t2bb, out=r2bb, alpha=0.125, beta=1.0) einsum("klij,klab->ijab", W_OOOO, t2bb, out=r2bb, alpha=0.125, beta=1.0) - einsum("kbcj,ikac->ijab", W_OVVO, t2bb, out=r2bb, alpha=1.0, beta=1.0) + einsum("bkjc,ikac->ijab", W_VOOV, t2bb, out=r2bb, alpha=1.0, beta=1.0) einsum("kbcj,kcia->ijab", W_oVvO, t2ab, out=r2bb, alpha=1.0, beta=1.0) - W_oVvO = imds.W_oVvO = None - W_OVVO = imds.W_OVVO = None return [r1a, r1b], [r2aa, r2ab, r2bb] def r1r2_add_t3_tri_uhf_(mycc, imds, r1, r2, t3): @@ -855,12 +842,6 @@ def intermediates_t3_uhf(mycc, imds, t2): W_vvvv = t1_erisaa[nocca:, nocca:, nocca:, nocca:].copy() einsum('lmde,lmab->abde', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=W_vvvv, alpha=0.5, beta=1.0) - W_voov = t1_erisaa[nocca:, :nocca, :nocca, nocca:].copy() - einsum('mled,imae->alid', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=W_voov, alpha=1.0, beta=1.0) - einsum('lmde,iame->alid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_voov, alpha=1.0, beta=1.0) - W_vOoV = t1_erisab[nocca:, :noccb, :nocca, noccb:].copy() - einsum('mled,imae->alid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2aa, out=W_vOoV, alpha=1.0, beta=1.0) - einsum('mled,iame->alid', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2ab, out=W_vOoV, alpha=1.0, beta=1.0) W_vvvo = t1_erisaa[nocca:, nocca:, nocca:, :nocca].copy() einsum('lbed,klce->bcdk', t1_erisaa[:nocca, nocca:, nocca:, nocca:], t2aa, out=W_vvvo, alpha=2.0, beta=1.0) einsum('blde,kcle->bcdk', t1_erisab[nocca:, :noccb, nocca:, noccb:], t2ab, out=W_vvvo, alpha=2.0, beta=1.0) @@ -870,15 +851,8 @@ def intermediates_t3_uhf(mycc, imds, t2): einsum('mldj,kmcd->lcjk', t1_erisaa[:nocca, :nocca, nocca:, :nocca], t2aa, out=W_ovoo, alpha=2.0, beta=1.0) einsum('lmjd,kcmd->lcjk', t1_erisab[:nocca, :noccb, :nocca, noccb:], t2ab, out=W_ovoo, alpha=2.0, beta=1.0) einsum('lcde,jkde->lcjk', t1_erisaa[:nocca, nocca:, nocca:, nocca:], t2aa, out=W_ovoo, alpha=0.5, beta=1.0) - W_VVVV = t1_erisbb[noccb:, noccb:, noccb:, noccb:].copy() einsum('lmde,lmab->abde', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=W_VVVV, alpha=0.5, beta=1.0) - W_VOOV = t1_erisbb[noccb:, :noccb, :noccb, noccb:].copy() - einsum('mled,imae->alid', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=W_VOOV, alpha=1.0, beta=1.0) - einsum('mled,meia->alid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_VOOV, alpha=1.0, beta=1.0) - W_VoOv = t1_erisab[:nocca, noccb:, nocca:, :noccb].transpose(1, 0, 3, 2).copy() - einsum('lmde,imae->alid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2bb, out=W_VoOv, alpha=1.0, beta=1.0) - einsum('mled,meia->alid', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2ab, out=W_VoOv, alpha=1.0, beta=1.0) W_VVVO = t1_erisbb[noccb:, noccb:, noccb:, :noccb].copy() einsum('lbed,klce->bcdk', t1_erisbb[:noccb, noccb:, noccb:, noccb:], t2bb, out=W_VVVO, alpha=2.0, beta=1.0) einsum('lbed,lekc->bcdk', t1_erisab[:nocca, noccb:, nocca:, noccb:], t2ab, out=W_VVVO, alpha=2.0, beta=1.0) @@ -888,13 +862,8 @@ def intermediates_t3_uhf(mycc, imds, t2): einsum('mldj,kmcd->lcjk', t1_erisbb[:noccb, :noccb, noccb:, :noccb], t2bb, out=W_OVOO, alpha=2.0, beta=1.0) einsum('mldj,mdkc->lcjk', t1_erisab[:nocca, :noccb, nocca:, :noccb], t2ab, out=W_OVOO, alpha=2.0, beta=1.0) einsum('lcde,jkde->lcjk', t1_erisbb[:noccb, noccb:, noccb:, noccb:], t2bb, out=W_OVOO, alpha=0.5, beta=1.0) - W_vVvV = t1_erisab[nocca:, noccb:, nocca:, noccb:].copy() einsum('lmed,lbmc->bced', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_vVvV, alpha=1.0, beta=1.0) - W_oVoV = t1_erisab[:nocca, noccb:, :nocca, noccb:].copy() - einsum('lmed,iemc->lcid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_oVoV, alpha=-1.0, beta=1.0) - W_vOvO = t1_erisab[nocca:, :noccb, nocca:, :noccb].copy() - einsum('mlde,make->aldk', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=W_vOvO, alpha=-1.0, beta=1.0) W_vVvO = t1_erisab[nocca:, noccb:, nocca:, :noccb].copy() einsum('lbed,lekc->bcdk', t1_erisaa[:nocca, nocca:, nocca:, nocca:], t2ab, out=W_vVvO, alpha=1.0, beta=1.0) einsum('blde,lkec->bcdk', t1_erisab[nocca:, :noccb, nocca:, noccb:], t2bb, out=W_vVvO, alpha=1.0, beta=1.0) @@ -919,10 +888,19 @@ def intermediates_t3_uhf(mycc, imds, t2): einsum('alde,jdke->aljk', t1_erisab[nocca:, :noccb, nocca:, noccb:], t2ab, out=W_vOoO, alpha=1.0, beta=1.0) imds.W_ovoo, imds.W_oVoO, imds.W_OVOO = W_ovoo, W_oVoO, W_OVOO imds.W_vOoO, imds.W_vVoV = W_vOoO, W_vVoV - imds.W_voov, imds.W_vOoV, imds.W_VoOv, imds.W_VOOV = W_voov, W_vOoV, W_VoOv, W_VOOV - imds.W_oVoV, imds.W_vOvO = W_oVoV, W_vOvO imds.W_vvvo, imds.W_vVvO, imds.W_VVVO = W_vvvo, W_vVvO, W_VVVO imds.W_vvvv, imds.W_vVvV, imds.W_VVVV = W_vvvv, W_vVvV, W_VVVV + + einsum('mled,imae->alid', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2aa, out=imds.W_voov, alpha=0.5, beta=1.0) + einsum('lmde,iame->alid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=imds.W_voov, alpha=0.5, beta=1.0) + einsum('mled,imae->alid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2aa, out=imds.W_vOoV, alpha=0.5, beta=1.0) + einsum('mled,iame->alid', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2ab, out=imds.W_vOoV, alpha=0.5, beta=1.0) + einsum('mled,imae->alid', t1_erisbb[:noccb, :noccb, noccb:, noccb:], t2bb, out=imds.W_VOOV, alpha=0.5, beta=1.0) + einsum('mled,meia->alid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=imds.W_VOOV, alpha=0.5, beta=1.0) + einsum('lmde,imae->ladi', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2bb, out=imds.W_oVvO, alpha=0.5, beta=1.0) + einsum('mled,meia->ladi', t1_erisaa[:nocca, :nocca, nocca:, nocca:], t2ab, out=imds.W_oVvO, alpha=0.5, beta=1.0) + einsum('lmed,iemc->lcid', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=imds.W_oVoV, alpha=-0.5, beta=1.0) + einsum('mlde,make->aldk', t1_erisab[:nocca, :noccb, nocca:, noccb:], t2ab, out=imds.W_vOvO, alpha=-0.5, beta=1.0) return imds def intermediates_t3_add_t3_tri_uhf(mycc, imds, t3): @@ -1289,7 +1267,7 @@ def compute_r3bbb_tri_uhf(mycc, imds, t2, t3): F_OO, F_VV = imds.F_OO, imds.F_VV W_OOOO, W_OVOO, W_VVVO, W_VVVV = imds.W_OOOO, imds.W_OVOO, imds.W_VVVO, imds.W_VVVV - W_VoOv, W_VOOV = imds.W_VoOv, imds.W_VOOV + W_oVvO, W_VOOV = imds.W_oVvO, imds.W_VOOV r3bbb = np.zeros_like(t3bbb) @@ -1465,39 +1443,39 @@ def compute_r3bbb_tri_uhf(mycc, imds, t2, t3): _unp_bba_(mycc, t3bba, t3_tmp_2, j0, j1, k0, k1, b0, b1, c0, c1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("alid,jkbcld->ijkabc", W_VoOv[a0:a1, :, i0:i1, :], + einsum("ladi,jkbcld->ijkabc", W_oVvO[:, a0:a1, :, i0:i1], t3_tmp_2[:bj, :bk, :bb, :bc], out=r3_tmp[bijkabc], alpha=1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, j0, j1, k0, k1, a0, a1, c0, c1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("blid,jkacld->ijkabc", W_VoOv[b0:b1, :, i0:i1, :], + einsum("lbdi,jkacld->ijkabc", W_oVvO[:, b0:b1, :, i0:i1], t3_tmp_2[:bj, :bk, :ba, :bc], out=r3_tmp[bijkabc], alpha=-1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, j0, j1, k0, k1, a0, a1, b0, b1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("clid,jkabld->ijkabc", W_VoOv[c0:c1, :, i0:i1, :], + einsum("lcdi,jkabld->ijkabc", W_oVvO[:, c0:c1, :, i0:i1], t3_tmp_2[:bj, :bk, :ba, :bb], out=r3_tmp[bijkabc], alpha=1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, i0, i1, k0, k1, b0, b1, c0, c1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("aljd,ikbcld->ijkabc", W_VoOv[a0:a1, :, j0:j1, :], + einsum("ladj,ikbcld->ijkabc", W_oVvO[:, a0:a1, :, j0:j1], t3_tmp_2[:bi, :bk, :bb, :bc], out=r3_tmp[bijkabc], alpha=-1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, i0, i1, k0, k1, a0, a1, c0, c1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("bljd,ikacld->ijkabc", W_VoOv[b0:b1, :, j0:j1, :], + einsum("lbdj,ikacld->ijkabc", W_oVvO[:, b0:b1, :, j0:j1], t3_tmp_2[:bi, :bk, :ba, :bc], out=r3_tmp[bijkabc], alpha=1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, i0, i1, k0, k1, a0, a1, b0, b1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("cljd,ikabld->ijkabc", W_VoOv[c0:c1, :, j0:j1, :], + einsum("lcdj,ikabld->ijkabc", W_oVvO[:, c0:c1, :, j0:j1], t3_tmp_2[:bi, :bk, :ba, :bb], out=r3_tmp[bijkabc], alpha=-1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, i0, i1, j0, j1, b0, b1, c0, c1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("alkd,ijbcld->ijkabc", W_VoOv[a0:a1, :, k0:k1, :], + einsum("ladk,ijbcld->ijkabc", W_oVvO[:, a0:a1, :, k0:k1], t3_tmp_2[:bi, :bj, :bb, :bc], out=r3_tmp[bijkabc], alpha=1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, i0, i1, j0, j1, a0, a1, c0, c1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("blkd,ijacld->ijkabc", W_VoOv[b0:b1, :, k0:k1, :], + einsum("lbdk,ijacld->ijkabc", W_oVvO[:, b0:b1, :, k0:k1], t3_tmp_2[:bi, :bj, :ba, :bc], out=r3_tmp[bijkabc], alpha=-1.0, beta=1.0) _unp_bba_(mycc, t3bba, t3_tmp_2, i0, i1, j0, j1, a0, a1, b0, b1, blk_i=blksize_o_aaa, blk_j=blksize_o_aaa, blk_a=blksize_v_aaa, blk_b=blksize_v_aaa) - einsum("clkd,ijabld->ijkabc", W_VoOv[c0:c1, :, k0:k1, :], + einsum("lcdk,ijabld->ijkabc", W_oVvO[:, c0:c1, :, k0:k1], t3_tmp_2[:bi, :bj, :ba, :bb], out=r3_tmp[bijkabc], alpha=1.0, beta=1.0) _update_packed_bbb_(mycc, r3bbb, r3_tmp, i0, i1, j0, j1, k0, k1, @@ -1525,7 +1503,7 @@ def compute_r3aab_tri_uhf(mycc, imds, t2, t3): F_oo, F_vv, F_OO, F_VV = imds.F_oo, imds.F_vv, imds.F_OO, imds.F_VV W_oooo, W_oOoO, W_ovoo, W_oVoO = imds.W_oooo, imds.W_oOoO, imds.W_ovoo, imds.W_oVoO W_vOoO, W_oVoV, W_vOvO, W_vVoV = imds.W_vOoO, imds.W_oVoV, imds.W_vOvO, imds.W_vVoV - W_voov, W_vOoV, W_VoOv, W_VOOV = imds.W_voov, imds.W_vOoV, imds.W_VoOv, imds.W_VOOV + W_voov, W_vOoV, W_oVvO, W_VOOV = imds.W_voov, imds.W_vOoV, imds.W_oVvO, imds.W_VOOV W_vvvo, W_vVvO, W_vvvv, W_vVvV = imds.W_vvvo, imds.W_vVvO, imds.W_vvvv, imds.W_vVvV r3aab = np.zeros_like(t3aab) @@ -1688,7 +1666,7 @@ def compute_r3aab_tri_uhf(mycc, imds, t2, t3): _unp_aaa_(mycc, t3aaa, t3_tmp_3, i0, i1, j0, j1, 0, nocca, a0, a1, b0, b1, 0, nvira, blk_i=blksize_o_aab, blk_j=blksize_o_aab, blk_k=nocca, blk_a=blksize_v_aab, blk_b=blksize_v_aab, blk_c=nvira) - einsum("clkd,ijlabd->ijabkc", W_VoOv, t3_tmp_3[:bi, :bj, :, :ba, :bb, :], + einsum("lcdk,ijlabd->ijabkc", W_oVvO, t3_tmp_3[:bi, :bj, :, :ba, :bb, :], out=r3_tmp[:bi, :bj, :ba, :bb], alpha=1.0, beta=1.0) _update_packed_aab_(mycc, r3aab, r3_tmp, i0, i1, j0, j1, a0, a1, b0, b1) @@ -1720,7 +1698,7 @@ def compute_r3bba_tri_uhf(mycc, imds, t2, t3): F_oo, F_vv, F_OO, F_VV = imds.F_oo, imds.F_vv, imds.F_OO, imds.F_VV W_oOoO, W_OOOO, W_oVoO, W_OVOO = imds.W_oOoO, imds.W_OOOO, imds.W_oVoO, imds.W_OVOO W_vOoO, W_oVoV, W_vOvO, W_vVoV = imds.W_vOoO, imds.W_oVoV, imds.W_vOvO, imds.W_vVoV - W_voov, W_vOoV, W_VoOv, W_VOOV = imds.W_voov, imds.W_vOoV, imds.W_VoOv, imds.W_VOOV + W_voov, W_vOoV, W_oVvO, W_VOOV = imds.W_voov, imds.W_vOoV, imds.W_oVvO, imds.W_VOOV W_vVvO, W_VVVO, W_vVvV, W_VVVV = imds.W_vVvO, imds.W_VVVO, imds.W_vVvV, imds.W_VVVV r3bba = np.zeros_like(t3bba) @@ -1867,16 +1845,16 @@ def compute_r3bba_tri_uhf(mycc, imds, t2, t3): bd = d1 - d0 _unp_aab_(mycc, t3aab, t3_tmp_2, l0, l1, 0, nocca, d0, d1, 0, nvira, blk_j=nocca, blk_b=nvira) - einsum("alid,lkdcjb->ijabkc", W_VoOv[a0:a1, l0:l1, i0:i1, d0:d1], + einsum("ladi,lkdcjb->ijabkc", W_oVvO[l0:l1, a0:a1, d0:d1, i0:i1], t3_tmp_2[:bl, :, :bd, :, j0:j1, b0:b1], out=r3_tmp[:bi, :bj, :ba, :bb], alpha=1.0, beta=1.0) - einsum("blid,lkdcja->ijabkc", W_VoOv[b0:b1, l0:l1, i0:i1, d0:d1], + einsum("lbdi,lkdcja->ijabkc", W_oVvO[l0:l1, b0:b1, d0:d1, i0:i1], t3_tmp_2[:bl, :, :bd, :, j0:j1, a0:a1], out=r3_tmp[:bi, :bj, :ba, :bb], alpha=-1.0, beta=1.0) - einsum("aljd,lkdcib->ijabkc", W_VoOv[a0:a1, l0:l1, j0:j1, d0:d1], + einsum("ladj,lkdcib->ijabkc", W_oVvO[l0:l1, a0:a1, d0:d1, j0:j1], t3_tmp_2[:bl, :, :bd, :, i0:i1, b0:b1], out=r3_tmp[:bi, :bj, :ba, :bb], alpha=-1.0, beta=1.0) - einsum("bljd,lkdcia->ijabkc", W_VoOv[b0:b1, l0:l1, j0:j1, d0:d1], + einsum("lbdj,lkdcia->ijabkc", W_oVvO[l0:l1, b0:b1, d0:d1, j0:j1], t3_tmp_2[:bl, :, :bd, :, i0:i1, a0:a1], out=r3_tmp[:bi, :bj, :ba, :bb], alpha=1.0, beta=1.0) @@ -1905,7 +1883,7 @@ def compute_r3bba_tri_uhf(mycc, imds, t2, t3): W_oOoO = imds.W_oOoO = None W_oVoV = imds.W_oVoV = None W_vOvO = imds.W_vOvO = None - W_VoOv = imds.W_VoOv = None + W_oVvO = imds.W_oVvO = None W_VOOV = imds.W_VOOV = None W_VVVV = imds.W_VVVV = None W_VVVO = imds.W_VVVO = None @@ -2045,20 +2023,10 @@ def update_amps_uccsdt_tri_(mycc, tamps, eris): # antisymmetrization antisymmetrize_r2_uhf_(r2) time1 = log.timer_debug1('t1t2: antisymmetrize r2', *time1) - # divide by eijkabc + # divide by eijab r1r2_divide_e_uhf_(mycc, r1, r2, mo_energy) (r1a, r1b), (r2aa, r2ab, r2bb) = r1, r2 time1 = log.timer_debug1('t1t2: divide r1 & r2 by eia & eijab', *time1) - - res_norm = [np.linalg.norm(r1a), np.linalg.norm(r1b), - np.linalg.norm(r2aa), np.linalg.norm(r2ab), np.linalg.norm(r2bb)] - - t1a += r1a - t1b += r1b - t2aa += r2aa - t2ab += r2ab - t2bb += r2bb - time1 = log.timer_debug1('t1t2: update t1 & t2', *time1) time0 = log.timer_debug1('t1t2 total', *time0) # t3 @@ -2074,8 +2042,16 @@ def update_amps_uccsdt_tri_(mycc, tamps, eris): r3aaa, r3aab, r3bba, r3bbb = r3 time1 = log.timer_debug1('t3: divide r3 by eijkabc', *time1) - res_norm += [np.linalg.norm(r3aaa), np.linalg.norm(r3aab), np.linalg.norm(r3bba), np.linalg.norm(r3bbb)] + res_norm = [np.linalg.norm(r1a), np.linalg.norm(r1b), + np.linalg.norm(r2aa), np.linalg.norm(r2ab), np.linalg.norm(r2bb), + np.linalg.norm(r3aaa), np.linalg.norm(r3aab), np.linalg.norm(r3bba), np.linalg.norm(r3bbb)] + t1a += r1a + t1b += r1b + t2aa += r2aa + t2ab += r2ab + t2bb += r2bb + r1a, r1b, r2aa, r2ab, r2bb = None, None, None, None, None t3aaa += r3aaa r3aaa = None t3bbb += r3bbb @@ -2085,7 +2061,7 @@ def update_amps_uccsdt_tri_(mycc, tamps, eris): t3bba += r3bba r3bba = None t3 = [t3aaa, t3aab, t3bba, t3bbb] - time1 = log.timer_debug1('t3: update t3', *time1) + time1 = log.timer_debug1('t3: update t1, t2, t3', *time1) time0 = log.timer_debug1('t3 total', *time0) tamps = [t1, t2, t3] @@ -2280,7 +2256,7 @@ def restore_from_diis_(mycc, diis_file, inplace=True): else: n1, nocc1, nvir1, n2, nocc2, nvir2 = nb, noccb, nvirb, na, nocca, nvira if mycc.do_tri_max_t: - if n2 >= 0: + if n2 > 0: shape = (nx(nocc1, n1),) + (nx(nvir1, n1),) + (nx(nocc2, n2),) + (nx(nvir2, n2),) else: shape = (nx(nocc1, n1),) + (nx(nvir1, n1),) @@ -2471,6 +2447,7 @@ def dump_chk(mycc, tamps=None, frozen=None, mo_coeff=None, mo_occ=None): lib.chkfile.save(mycc.chkfile, 'uccsdt', cc_chk) else: lib.chkfile.save(mycc.chkfile, 'uccsdt_highm', cc_chk) + return mycc def tamps_tri2full_uhf(mycc, tamps_tri): '''Convert triangular-stored T amplitudes to their full tensor form (UHF case).''' @@ -2912,11 +2889,11 @@ def __init__(self): self.F_oo, self.F_OO = None, None self.F_vv, self.F_VV = None, None self.W_oooo, self.W_oOoO, self.W_OOOO = None, None, None - self.W_ovoo, self.W_oVoO, self.W_OVOO = None, None, None - self.W_vOoO, self.W_oVoV, self.W_vOvO, self.W_vVoV = None, None, None, None - self.W_voov, self.W_vOoV, self.W_VoOv, self.W_VOOV = None, None, None, None - self.W_vvvo, self.W_vVvO, self.W_VVVO = None, None, None + self.W_voov, self.W_vOoV, self.W_oVvO, self.W_VOOV = None, None, None, None + self.W_vovo, self.W_vOvO, self.W_oVoV, self.W_VOVO = None, None, None, None self.W_vvvv, self.W_vVvV, self.W_VVVV = None, None, None + self.W_vvvo, self.W_vVvO, self.W_vVoV, self.W_VVVO = None, None, None, None + self.W_ovoo, self.W_oVoO, self.W_vOoO, self.W_OVOO = None, None, None, None if __name__ == "__main__": diff --git a/pyscf/cc/uccsdt_highm.py b/pyscf/cc/uccsdt_highm.py index d02de7ccbe..407c1984ea 100644 --- a/pyscf/cc/uccsdt_highm.py +++ b/pyscf/cc/uccsdt_highm.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,14 +28,10 @@ ''' import numpy as np -import numpy import functools -from pyscf import lib from pyscf.lib import logger -from pyscf.mp.mp2 import get_e_hf -from pyscf.mp.ump2 import get_nocc, get_nmo, get_frozen_mask from pyscf.cc import uccsdt -from pyscf.cc.rccsdt import _einsum, run_diis, _finalize +from pyscf.cc.rccsdt import _einsum from pyscf.cc.uccsdt import (update_t1_fock_eris_uhf, intermediates_t1t2_uhf, compute_r1r2_uhf, antisymmetrize_r2_uhf_, r1r2_divide_e_uhf_, intermediates_t3_uhf, _PhysicistsERIs, _IMDS) from pyscf import __config__ @@ -132,7 +128,7 @@ def compute_r3_uhf(mycc, imds, t2, t3): W_oooo, W_oOoO, W_OOOO = imds.W_oooo, imds.W_oOoO, imds.W_OOOO W_ovoo, W_oVoO, W_OVOO = imds.W_ovoo, imds.W_oVoO, imds.W_OVOO W_vOoO, W_oVoV, W_vOvO, W_vVoV = imds.W_vOoO, imds.W_oVoV, imds.W_vOvO, imds.W_vVoV - W_voov, W_vOoV, W_VoOv, W_VOOV = imds.W_voov, imds.W_vOoV, imds.W_VoOv, imds.W_VOOV + W_voov, W_vOoV, W_oVvO, W_VOOV = imds.W_voov, imds.W_vOoV, imds.W_oVvO, imds.W_VOOV W_vvvo, W_vVvO, W_VVVO = imds.W_vvvo, imds.W_vVvO, imds.W_VVVO W_vvvv, W_vVvV, W_VVVV = imds.W_vvvv, imds.W_vVvV, imds.W_VVVV @@ -155,7 +151,7 @@ def compute_r3_uhf(mycc, imds, t2, t3): einsum("abde,ijkdec->ijkabc", W_VVVV, t3bbb, out=r3bbb, alpha=1.0 / 24.0, beta=1.0) einsum("lmij,lmkabc->ijkabc", W_OOOO, t3bbb, out=r3bbb, alpha=1.0 / 24.0, beta=1.0) einsum("alid,ljkdbc->ijkabc", W_VOOV, t3bbb, out=r3bbb, alpha=0.25, beta=1.0) - einsum("alid,jkbcld->ijkabc", W_VoOv, t3bba, out=r3bbb, alpha=0.25, beta=1.0) + einsum("ladi,jkbcld->ijkabc", W_oVvO, t3bba, out=r3bbb, alpha=0.25, beta=1.0) time1 = log.timer_debug1('t3: r3bbb', *time1) r3aab = np.empty_like(t3aab) @@ -177,7 +173,7 @@ def compute_r3_uhf(mycc, imds, t2, t3): einsum("alid,lkdcjb->ijabkc", W_vOoV, t3bba, out=r3aab, alpha=1.0, beta=1.0) einsum("lcid,ljabkd->ijabkc", W_oVoV, t3aab, out=r3aab, alpha=-0.5, beta=1.0) einsum("aldk,ijdblc->ijabkc", W_vOvO, t3aab, out=r3aab, alpha=-0.5, beta=1.0) - einsum("clkd,ijlabd->ijabkc", W_VoOv, t3aaa, out=r3aab, alpha=0.25, beta=1.0) + einsum("lcdk,ijlabd->ijabkc", W_oVvO, t3aaa, out=r3aab, alpha=0.25, beta=1.0) einsum("clkd,ijabld->ijabkc", W_VOOV, t3aab, out=r3aab, alpha=0.25, beta=1.0) W_vvvo = imds.W_vvvo = None W_ovoo = imds.W_ovoo = None @@ -201,7 +197,7 @@ def compute_r3_uhf(mycc, imds, t2, t3): einsum("lmij,lmabkc->ijabkc", W_OOOO, t3bba, out=r3bba, alpha=0.125, beta=1.0) einsum("mlki,ljabmc->ijabkc", W_oOoO, t3bba, out=r3bba, alpha=0.5, beta=1.0) einsum("alid,ljdbkc->ijabkc", W_VOOV, t3bba, out=r3bba, alpha=1.0, beta=1.0) - einsum("alid,lkdcjb->ijabkc", W_VoOv, t3aab, out=r3bba, alpha=1.0, beta=1.0) + einsum("ladi,lkdcjb->ijabkc", W_oVvO, t3aab, out=r3bba, alpha=1.0, beta=1.0) einsum("cldi,ljabkd->ijabkc", W_vOvO, t3bba, out=r3bba, alpha=-0.5, beta=1.0) einsum("lakd,ijdblc->ijabkc", W_oVoV, t3bba, out=r3bba, alpha=-0.5, beta=1.0) einsum("clkd,ijlabd->ijabkc", W_vOoV, t3bbb, out=r3bba, alpha=0.25, beta=1.0) @@ -220,7 +216,7 @@ def compute_r3_uhf(mycc, imds, t2, t3): W_oOoO = imds.W_oOoO = None W_oVoV = imds.W_oVoV = None W_vOvO = imds.W_vOvO = None - W_VoOv = imds.W_VoOv = None + W_oVvO = imds.W_oVvO = None W_VOOV = imds.W_VOOV = None W_VVVV = imds.W_VVVV = None W_VVVO = imds.W_VVVO = None @@ -292,20 +288,10 @@ def update_amps_uccsdt_(mycc, tamps, eris): # antisymmetrization antisymmetrize_r2_uhf_(r2) time1 = log.timer_debug1('t1t2: antisymmetrize r2', *time1) - # divide by eijkabc + # divide by eijab r1r2_divide_e_uhf_(mycc, r1, r2, mo_energy) (r1a, r1b), (r2aa, r2ab, r2bb) = r1, r2 time1 = log.timer_debug1('t1t2: divide r1 & r2 by eia & eijab', *time1) - - res_norm = [np.linalg.norm(r1a), np.linalg.norm(r1b), - np.linalg.norm(r2aa), np.linalg.norm(r2ab), np.linalg.norm(r2bb)] - - t1a += r1a - t1b += r1b - t2aa += r2aa - t2ab += r2ab - t2bb += r2bb - time1 = log.timer_debug1('t1t2: update t1 & t2', *time1) time0 = log.timer_debug1('t1t2 total', *time0) # t3 @@ -324,8 +310,16 @@ def update_amps_uccsdt_(mycc, tamps, eris): r3aaa, r3aab, r3bba, r3bbb = r3 time1 = log.timer_debug1('t3: divide r3 by eijkabc', *time1) - res_norm += [np.linalg.norm(r3aaa), np.linalg.norm(r3aab), np.linalg.norm(r3bba), np.linalg.norm(r3bbb)] + res_norm = [np.linalg.norm(r1a), np.linalg.norm(r1b), + np.linalg.norm(r2aa), np.linalg.norm(r2ab), np.linalg.norm(r2bb), + np.linalg.norm(r3aaa), np.linalg.norm(r3aab), np.linalg.norm(r3bba), np.linalg.norm(r3bbb)] + t1a += r1a + t1b += r1b + t2aa += r2aa + t2ab += r2ab + t2bb += r2bb + r1a, r1b, r2aa, r2ab, r2bb = None, None, None, None, None t3aaa += r3aaa r3aaa = None t3bbb += r3bbb @@ -335,7 +329,7 @@ def update_amps_uccsdt_(mycc, tamps, eris): t3bba += r3bba r3bba = None t3 = (t3aaa, t3aab, t3bba, t3bbb) - time1 = log.timer_debug1('t3: update t3', *time1) + time1 = log.timer_debug1('t3: update t1, t2, t3', *time1) time0 = log.timer_debug1('t3 total', *time0) tamps = [t1, t2, t3] diff --git a/pyscf/cc/uintermediates_slow.py b/pyscf/cc/uintermediates_slow.py index 4b7c949f06..977bc004f6 100644 --- a/pyscf/cc/uintermediates_slow.py +++ b/pyscf/cc/uintermediates_slow.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import tempfile import h5py import numpy as np from pyscf import lib @@ -73,7 +72,7 @@ def cc_Wvvvv(t1,t2,eris): #Wabef += 0.25*einsum('mnab,mnef->abef',tau,eris.oovv) if t1.dtype == np.complex128: ds_type = 'c16' else: ds_type = 'f8' - _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + _tmpfile1 = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) fimd = h5py.File(_tmpfile1.name) nocc, nvir = t1.shape Wabef = fimd.create_dataset('vvvv', (nvir,nvir,nvir,nvir), ds_type) @@ -120,7 +119,7 @@ def Wvvvv(t1,t2,eris): #Wabef = cc_Wvvvv(t1,t2,eris) + 0.25*einsum('mnab,mnef->abef',tau,eris.oovv) if t1.dtype == np.complex128: ds_type = 'c16' else: ds_type = 'f8' - _tmpfile1 = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + _tmpfile1 = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) fimd = h5py.File(_tmpfile1.name) nocc, nvir = t1.shape Wabef = fimd.create_dataset('vvvv', (nvir,nvir,nvir,nvir), ds_type) diff --git a/pyscf/ci/test/test_cisd.py b/pyscf/ci/test/test_cisd.py index 20f0cac12a..32eb4ac4dd 100644 --- a/pyscf/ci/test/test_cisd.py +++ b/pyscf/ci/test/test_cisd.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile import numpy from functools import reduce @@ -332,7 +331,7 @@ def test_dump_chk(self): H 0. -0.757 0.587 H 0. 0.757 0.587''', basis='631g') mf = scf.RHF(mol).run() - mf.chkfile = tempfile.NamedTemporaryFile().name + mf.chkfile = lib.NamedTemporaryFile().name ci_scanner = ci.CISD(mf).as_scanner() ci_scanner(mol) ci_scanner.nmo = mf.mo_energy.size diff --git a/pyscf/df/df.py b/pyscf/df/df.py index 987521e04b..47c37775a0 100644 --- a/pyscf/df/df.py +++ b/pyscf/df/df.py @@ -21,7 +21,6 @@ ''' -import tempfile import contextlib import numpy import h5py @@ -171,7 +170,7 @@ def build(self): max_memory=max_memory, verbose=log) else: if self._cderi_to_save is None: - self._cderi_to_save = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + self._cderi_to_save = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) cderi = self._cderi_to_save if is_custom_storage: diff --git a/pyscf/df/outcore.py b/pyscf/df/outcore.py index e59bf464f4..033752ccfa 100644 --- a/pyscf/df/outcore.py +++ b/pyscf/df/outcore.py @@ -17,7 +17,6 @@ # -import tempfile import numpy import scipy.linalg import h5py @@ -57,7 +56,7 @@ def cholesky_eri(mol, erifile, auxbasis='weigend+etb', dataname='j3c', tmpdir=No if tmpdir is None: tmpdir = lib.param.TMPDIR - swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) + swapfile = lib.NamedTemporaryFile(dir=tmpdir) cholesky_eri_b(mol, swapfile.name, auxbasis, dataname, int3c, aosym, int2c, comp, max_memory, auxmol, verbose=log) fswap = h5py.File(swapfile.name, 'r') @@ -243,7 +242,7 @@ def general(mol, mo_coeffs, erifile, auxbasis='weigend+etb', dataname='eri_mo', if tmpdir is None: tmpdir = lib.param.TMPDIR - swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) + swapfile = lib.NamedTemporaryFile(dir=tmpdir) cholesky_eri_b(mol, swapfile.name, auxbasis, dataname, int3c, aosym, int2c, comp, max_memory, verbose=log) fswap = h5py.File(swapfile.name, 'r') diff --git a/pyscf/df/test/test_addons.py b/pyscf/df/test/test_addons.py index 1a62a905a9..00918e2326 100644 --- a/pyscf/df/test/test_addons.py +++ b/pyscf/df/test/test_addons.py @@ -17,7 +17,6 @@ import unittest import itertools -import tempfile import numpy as np from pyscf import lib from pyscf import gto diff --git a/pyscf/df/test/test_df.py b/pyscf/df/test/test_df.py index 11b07b91be..a386b02e94 100644 --- a/pyscf/df/test/test_df.py +++ b/pyscf/df/test/test_df.py @@ -17,7 +17,6 @@ import os import unittest -import tempfile import numpy from pyscf import lib from pyscf import gto @@ -73,7 +72,7 @@ def test_ao2mo(self): def test_cderi_to_save(self): with open(os.devnull, 'w') as f: - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() dfobj = df.DF(mol) dfobj.auxmol = df.addons.make_auxmol(mol, 'weigend') dfobj.verbose = 5 @@ -132,7 +131,7 @@ def test_rsh_df_custom_storage(self): mol = gto.M(atom = 'H 0 0 0; F 0 0 1.1', basis='ccpvdz', max_memory=10, verbose=0) mf = mol.RKS().density_fit() mf.xc = 'lda+0.5*SR_HF(0.3)' - with tempfile.NamedTemporaryFile() as ftmp: + with lib.NamedTemporaryFile() as ftmp: mf.with_df._cderi_to_save = ftmp.name mf.run() self.assertAlmostEqual(mf.e_tot, -103.4965622991, 6) diff --git a/pyscf/df/test/test_df_grad.py b/pyscf/df/test/test_df_grad.py index 80d39fe064..a892d77220 100644 --- a/pyscf/df/test/test_df_grad.py +++ b/pyscf/df/test/test_df_grad.py @@ -17,7 +17,6 @@ import os import unittest -import tempfile import numpy from pyscf import lib from pyscf import gto diff --git a/pyscf/df/test/test_df_hessian.py b/pyscf/df/test/test_df_hessian.py index 078b967aa3..e22e79fd0e 100644 --- a/pyscf/df/test/test_df_hessian.py +++ b/pyscf/df/test/test_df_hessian.py @@ -17,7 +17,6 @@ import os import unittest -import tempfile import numpy from pyscf import lib from pyscf import gto diff --git a/pyscf/df/test/test_outcore.py b/pyscf/df/test/test_outcore.py index 2e498267dd..910124e8d0 100644 --- a/pyscf/df/test/test_outcore.py +++ b/pyscf/df/test/test_outcore.py @@ -16,7 +16,6 @@ # import unittest -import tempfile import numpy import scipy.linalg import h5py @@ -47,7 +46,7 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_outcore(self): - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) cderi0 = df.incore.cholesky_eri(mol) df.outcore.cholesky_eri(mol, ftmp.name) with h5py.File(ftmp.name, 'r') as feri: @@ -73,7 +72,7 @@ def test_outcore(self): with h5py.File(ftmp.name, 'r') as feri: self.assertTrue(numpy.allclose(feri['j3c'], cderi0.reshape(naux,-1))) - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) numpy.random.seed(1) co = numpy.random.random((nao,4)) cv = numpy.random.random((nao,25)) @@ -96,7 +95,7 @@ def test_outcore(self): self.assertTrue(numpy.allclose(feri['eri_mo'], cderi0)) def test_lindep(self): - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) df.outcore.cholesky_eri(mol, ftmp.name, auxmol=auxmol, verbose=7) with h5py.File(ftmp.name, 'r') as f: cderi0 = f['j3c'][:] @@ -111,7 +110,7 @@ def test_lindep(self): self.assertAlmostEqual(abs(eri0-eri1).max(), 0, 9) # def test_int3c2e_ip(self): -# ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) +# ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) # df.outcore.cholesky_eri(mol, ftmp.name, int3c='int3c2e_ip1', # auxmol=auxmol, comp=3) # with h5py.File(ftmp.name, 'r') as f: diff --git a/pyscf/dft/libxc.py b/pyscf/dft/libxc.py index 5e7db2f381..3f3385f3bb 100644 --- a/pyscf/dft/libxc.py +++ b/pyscf/dft/libxc.py @@ -276,6 +276,7 @@ def _xc_key_without_underscore(xc_keys): 'M05_2X' : 'HYB_MGGA_X_M05_2X,MGGA_C_M05_2X', 'M06_2X' : 'HYB_MGGA_X_M06_2X,MGGA_C_M06_2X', 'M06_HF' : 'HYB_MGGA_X_M06_HF,MGGA_C_M06_HF', + 'CF22D' : 'HYB_MGGA_X_CF22D,MGGA_C_CF22D', # extra aliases 'SOGGA11X' : 'SOGGA11_X', 'M06L' : 'M06_L', diff --git a/pyscf/dft/radi.py b/pyscf/dft/radi.py index 43bfb7b71d..75a62b51b5 100644 --- a/pyscf/dft/radi.py +++ b/pyscf/dft/radi.py @@ -37,7 +37,7 @@ # P.M.W. Gill, B.G. Johnson, J.A. Pople, Chem. Phys. Letters 209 (1993) 506-512 SG1RADII = numpy.array(( - 0, + 1.0000, # Ghost 1.0000, 0.5882, 3.0769, 2.0513, 1.5385, 1.2308, 1.0256, 0.8791, 0.7692, 0.6838, 4.0909, 3.1579, 2.5714, 2.1687, 1.8750, 1.6514, 1.4754, 1.3333)) diff --git a/pyscf/dft/test/test_h2o.py b/pyscf/dft/test/test_h2o.py index aa55453984..ca44e6cdf4 100644 --- a/pyscf/dft/test/test_h2o.py +++ b/pyscf/dft/test/test_h2o.py @@ -22,7 +22,7 @@ from pyscf import scf try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None def setUpModule(): diff --git a/pyscf/dft/test/test_he.py b/pyscf/dft/test/test_he.py index 00400d0f17..67ce66dbc4 100644 --- a/pyscf/dft/test/test_he.py +++ b/pyscf/dft/test/test_he.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile import numpy from pyscf import gto from pyscf import lib @@ -215,14 +214,14 @@ def test_convert(self): # issue 1986 def test_init_guess_chkfile(self): - with tempfile.NamedTemporaryFile() as tmpf: - mol = gto.M(atom='He 0 0 0', basis='631g', charge=1, spin=1) + mol = gto.M(atom='He 0 0 0', basis='631g', charge=1, spin=1) + with lib.NamedTemporaryFile() as tmpf: mf = dft.RKS(mol) mf.chkfile = tmpf.name e1 = mf.kernel() mf = dft.RKS(mol) - mf.init_guess = 'chkfile' mf.chkfile = tmpf.name + mf.init_guess = 'chkfile' mf.max_cycle = 1 e2 = mf.kernel() self.assertAlmostEqual(e1, e2, 9) diff --git a/pyscf/dft/test/test_libxc.py b/pyscf/dft/test/test_libxc.py index 7f47c5a624..a820cf4b96 100644 --- a/pyscf/dft/test/test_libxc.py +++ b/pyscf/dft/test/test_libxc.py @@ -446,6 +446,7 @@ def test_dft_parser(self): self.assertEqual(parse_dft('b3lyp-d3zerom'), ('b3lyp', '', 'd3zerom')) self.assertEqual(parse_dft('wb97x-d3bj'), ('wb97x-v', False, 'd3bj')) self.assertEqual(parse_dft('wb97x-d3zero2b'), ('wb97x', '', 'd3zero2b')) + self.assertEqual(parse_dft('wb97x-3c'), ('wb97x-v', False, 'd4:wb97x-3c')) def test_set_param(self): XC_ID_B97_2 = 410 diff --git a/pyscf/eph/test/test_rhf.py b/pyscf/eph/test/test_rhf.py index 3c72e92cf6..5ac5aba9bf 100644 --- a/pyscf/eph/test/test_rhf.py +++ b/pyscf/eph/test/test_rhf.py @@ -14,7 +14,6 @@ # limitations under the License. # -import tempfile from pyscf import scf, gto, lib from pyscf.eph import eph_fd, rhf import numpy as np @@ -33,7 +32,6 @@ def setUpModule(): mol.output = '/dev/null' mol.build() mf = scf.RHF(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.conv_tol = 1e-14 mf.conv_tol_grad = 1e-9 mf.kernel() diff --git a/pyscf/eph/test/test_rks.py b/pyscf/eph/test/test_rks.py index 4383b89d45..3fd9cd4d39 100644 --- a/pyscf/eph/test/test_rks.py +++ b/pyscf/eph/test/test_rks.py @@ -14,7 +14,6 @@ # limitations under the License. # -import tempfile from pyscf import dft, gto, lib from pyscf.eph import eph_fd, rks import numpy as np @@ -33,7 +32,6 @@ def setUpModule(): mol.output = '/dev/null' mol.build() mf = dft.RKS(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.grids.level = 3 mf.xc = 'b3lyp5' mf.conv_tol = 1e-14 diff --git a/pyscf/eph/test/test_uhf.py b/pyscf/eph/test/test_uhf.py index ac20518f83..3508afb2ed 100644 --- a/pyscf/eph/test/test_uhf.py +++ b/pyscf/eph/test/test_uhf.py @@ -14,7 +14,6 @@ # limitations under the License. # -import tempfile from pyscf import scf, gto, lib from pyscf.eph import eph_fd, uhf import numpy as np @@ -33,7 +32,6 @@ def setUpModule(): mol.output = '/dev/null' mol.build() mf = scf.UHF(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.conv_tol = 1e-14 mf.conv_tol_grad = 1e-9 mf.kernel() diff --git a/pyscf/eph/test/test_uks.py b/pyscf/eph/test/test_uks.py index 0cdba70440..2308ce88e0 100644 --- a/pyscf/eph/test/test_uks.py +++ b/pyscf/eph/test/test_uks.py @@ -14,7 +14,6 @@ # limitations under the License. # -import tempfile from pyscf import dft, gto, lib from pyscf.eph import eph_fd, uks import numpy as np @@ -34,7 +33,6 @@ def setUpModule(): mol.build() mf = dft.UKS(mol) - mf.chkfile = tempfile.NamedTemporaryFile().name mf.grids.level = 3 mf.xc = 'b3lyp5' mf.conv_tol = 1e-14 diff --git a/pyscf/fci/test/test_rdm.py b/pyscf/fci/test/test_rdm.py index 7366783c7d..7c69d03ea0 100644 --- a/pyscf/fci/test/test_rdm.py +++ b/pyscf/fci/test/test_rdm.py @@ -267,7 +267,7 @@ def test_full_alpha(self): h2 = numpy.random.random((npair,npair)) * .1 h2 = h2 + h2.T cis = fci.direct_spin1.FCI() - e, c = cis.kernel(h1, h2, norb, nelec, verbose=5) + e, c = cis.kernel(h1, h2, norb, nelec) dm1s, dm2s = cis.make_rdm12s(c, norb, nelec) self.assertAlmostEqual(abs(dm1s[0]).sum(), 6, 9) self.assertAlmostEqual(dm1s[1].trace(), 3, 9) @@ -285,7 +285,7 @@ def test_0beta(self): h2 = numpy.random.random((npair,npair)) * .1 h2 = h2 + h2.T cis = fci.direct_spin1.FCI() - e, c = cis.kernel(h1, h2, norb, nelec, verbose=5) + e, c = cis.kernel(h1, h2, norb, nelec) dm1s, dm2s = cis.make_rdm12s(c, norb, nelec) self.assertAlmostEqual(dm1s[0].trace(), 3, 9) self.assertAlmostEqual(abs(dm1s[1]).sum(), 0, 9) diff --git a/pyscf/grad/dispersion.py b/pyscf/grad/dispersion.py index 1f954930b9..3b1695f172 100644 --- a/pyscf/grad/dispersion.py +++ b/pyscf/grad/dispersion.py @@ -28,10 +28,12 @@ def get_dispersion(mf_grad, disp=None, with_3body=None, verbose=None): '''gradient of DFTD3/DFTD4 dispersion correction''' mf = mf_grad.base mol = mf.mol - disp_version = check_disp(mf, disp) - if not disp_version: + if not check_disp(mf, disp): return np.zeros([mol.natm,3]) + if disp is None: + disp = getattr(mf, 'disp', None) + try: from pyscf.dispersion import dftd3, dftd4 except ImportError: @@ -39,9 +41,9 @@ def get_dispersion(mf_grad, disp=None, with_3body=None, verbose=None): raise method = getattr(mf, 'xc', 'hf') - method, _, disp_with_3body = parse_disp(method) + method, disp_version, disp_with_3body = parse_disp(method, disp) - if with_3body is not None: + if with_3body is None: with_3body = disp_with_3body if disp_version[:2].upper() == 'D3': diff --git a/pyscf/grad/test/test_lpdft.py b/pyscf/grad/test/test_lpdft.py index 0ced79fe05..61b17eb05f 100644 --- a/pyscf/grad/test/test_lpdft.py +++ b/pyscf/grad/test/test_lpdft.py @@ -286,7 +286,7 @@ def test_rohf_sanity (self): de_ref = mc_grad_ref.kernel(state=i)[1, 0] self.assertAlmostEqual (de, de_ref, 6) - def test_dfrohf_sanity (self): + def test_dfrohf_sanity_high_cost (self): n_states = 3 mc_grad = diatomic( "Li", "H", 1.4, "ftpbe", "6-31g", 4, 2, n_states, density_fit=True, spin=2 diff --git a/pyscf/grad/test/test_mcpdft.py b/pyscf/grad/test/test_mcpdft.py index af65a7ea34..82f7b025eb 100644 --- a/pyscf/grad/test/test_mcpdft.py +++ b/pyscf/grad/test/test_mcpdft.py @@ -43,23 +43,26 @@ def auto_setup (xyz='Li 0 0 0\nH 1.5 0 0'): mol_sym = gto.M (atom = xyz, basis = 'sto3g', symmetry=True, output = '/dev/null', verbose = 0) mf_nosym = scf.RHF (mol_nosym).run () - mc_nosym = mcscf.CASSCF (mf_nosym, 5, 2).run () + mc_nosym = mcscf.CASSCF (mf_nosym, 5, 2) mf_sym = scf.RHF (mol_sym).run () mc_sym = mcscf.CASSCF (mf_sym, 5, 2).run () + mc_nosym.run (mo_coeff=mc_sym.mo_coeff) mcp_ss_nosym = mcpdft.CASSCF (mc_nosym, 'ftLDA,VWN3', 5, 2, grids_level=1).run () mcp_ss_sym = mcpdft.CASSCF (mc_sym, 'ftLDA,VWN3', 5, 2, grids_level=1).run () - mcp_sa_0 = mcp_ss_nosym.state_average ([1.0/5,]*5).run () + mcp_sa_0 = mcp_ss_nosym.state_average ([1.0/5,]*5) solver_S = fci.solver (mol_nosym, singlet=True).set (spin=0, nroots=2) solver_T = fci.solver (mol_nosym, singlet=False).set (spin=2, nroots=3) mcp_sa_1 = mcp_ss_nosym.state_average_mix ( - [solver_S,solver_T], [1.0/5,]*5).set(ci=None).run () + [solver_S,solver_T], [1.0/5,]*5).set(ci=None) solver_A1 = fci.solver (mol_sym).set (wfnsym='A1', nroots=3) solver_E1x = fci.solver (mol_sym).set (wfnsym='E1x', nroots=1, spin=2) solver_E1y = fci.solver (mol_sym).set (wfnsym='E1y', nroots=1, spin=2) mcp_sa_2 = mcp_ss_sym.state_average_mix ( [solver_A1,solver_E1x,solver_E1y], [1.0/5,]*5).set(ci=None).run () + mcp_sa_0.run (mo_coeff=mcp_sa_2.mo_coeff) + mcp_sa_1.run (mo_coeff=mcp_sa_2.mo_coeff) mcp = [[mcp_ss_nosym, mcp_ss_sym], [mcp_sa_0, mcp_sa_1, mcp_sa_2]] nosym = [mol_nosym, mf_nosym, mc_nosym] sym = [mol_sym, mf_sym, mc_sym] diff --git a/pyscf/grad/test/test_rhf.py b/pyscf/grad/test/test_rhf.py index 087e6cf40e..30896ab731 100644 --- a/pyscf/grad/test/test_rhf.py +++ b/pyscf/grad/test/test_rhf.py @@ -20,7 +20,7 @@ try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None def setUpModule(): diff --git a/pyscf/grad/test/test_rks.py b/pyscf/grad/test/test_rks.py index fe6e509122..42c815ebe9 100644 --- a/pyscf/grad/test/test_rks.py +++ b/pyscf/grad/test/test_rks.py @@ -20,7 +20,7 @@ from pyscf.grad import rks try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None diff --git a/pyscf/grad/test/test_uhf.py b/pyscf/grad/test/test_uhf.py index 25d3885031..971e61627f 100644 --- a/pyscf/grad/test/test_uhf.py +++ b/pyscf/grad/test/test_uhf.py @@ -19,7 +19,7 @@ from pyscf import grad try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None @@ -196,8 +196,8 @@ def test_finite_diff_df_uhf_grad(self): H -0.43459905 0.65805058 -0.00861418''') self.assertAlmostEqual(g[2,1], (e2-e1)/2e-4*lib.param.BOHR, 7) - @unittest.skipIf(dftd4 is None, "requires the dftd4 library") - def test_finite_diff_df_uhf_d4_grad(self): + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_finite_diff_df_uhf_d3_grad(self): mf = scf.UHF(mol).density_fit () mf.conv_tol = 1e-14 mf.disp = 'd3bj' diff --git a/pyscf/grad/test/test_uks.py b/pyscf/grad/test/test_uks.py index ed77ba14b3..8331fee615 100644 --- a/pyscf/grad/test/test_uks.py +++ b/pyscf/grad/test/test_uks.py @@ -20,7 +20,7 @@ from pyscf.grad import uks try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None diff --git a/pyscf/gto/basis/__init__.py b/pyscf/gto/basis/__init__.py index bbcd110f7b..84ec26c04f 100644 --- a/pyscf/gto/basis/__init__.py +++ b/pyscf/gto/basis/__init__.py @@ -711,15 +711,33 @@ def load(filename_or_basisname, symb, optimize=OPTIMIZE_CONTRACTION): raise BasisNotFoundError(f'Unknown basis format or basis name for {filename_or_basisname}') - if 'dat' in basmod: - b = fload(join(basis_dir, basmod), symb, optimize) - elif isinstance(basmod, (tuple, list)) and isinstance(basmod[0], str): - b = [] - for f in basmod: - b += fload(join(basis_dir, f), symb, optimize) - else: - mod = importlib.import_module('.'+basmod, __package__) - b = mod.__getattribute__(symb) + try: + if 'dat' in basmod: + b = fload(join(basis_dir, basmod), symb, optimize) + elif isinstance(basmod, (tuple, list)) and isinstance(basmod[0], str): + b = [] + for f in basmod: + b += fload(join(basis_dir, f), symb, optimize) + else: + mod = importlib.import_module('.'+basmod, __package__) + b = mod.__getattribute__(symb) + except (BasisNotFoundError, AttributeError): + # When basis set is recognized but its .dat file lacks required elements (e.g., lanthanides), fallback to BSE + from pyscf.gto.basis import bse + if bse.basis_set_exchange is None: + warnings.warn( + 'Basis may be available in basis-set-exchange. ' + 'It is recommended to install basis-set-exchange: ' + 'pip install basis-set-exchange') + raise BasisNotFoundError( + f'Basis set not found for {symb} in {filename_or_basisname}') + try: + bse_obj = bse.basis_set_exchange.api.get_basis( + filename_or_basisname, elements=symb) + except KeyError: + raise BasisNotFoundError( + f'Basis set not found for {symb} in {filename_or_basisname}') + b = bse._orbital_basis(bse_obj)[0][symb] if contr_scheme != 'Full': b = _truncate(b, contr_scheme, symb, split_name) @@ -739,6 +757,23 @@ def load_ecp(filename_or_basisname, symb): return parse_nwchem_ecp.load(join(_BASIS_DIR, basmod), symb) if '\n' not in filename_or_basisname: + from pyscf.gto.basis import bse + if bse.basis_set_exchange is None: + warnings.warn( + 'ECP may be available in basis-set-exchange. ' + 'It is recommended to install basis-set-exchange: ' + 'pip install basis-set-exchange') + else: + try: + bse_obj = bse.basis_set_exchange.api.get_basis( + filename_or_basisname, elements=symb) + except KeyError: + raise BasisNotFoundError(filename_or_basisname) + ecp_basis = bse._ecp_basis(bse_obj) + if symb not in ecp_basis: + raise BasisNotFoundError( + f'No ECP defined for {symb} in {filename_or_basisname}') + return ecp_basis[symb] raise RuntimeError(f'Unable to parse the input ECP data\n{filename_or_basisname}') try: @@ -760,20 +795,6 @@ def load_ecp(filename_or_basisname, symb): except BasisNotFoundError: pass - # Last, a trial to access Basis Set Exchange database - from pyscf.gto.basis import bse - if bse.basis_set_exchange is not None: - try: - bse_obj = bse.basis_set_exchange.api.get_basis( - filename_or_basisname, elements=symb) - except KeyError: - raise BasisNotFoundError(filename_or_basisname) - ecp_basis = bse._ecp_basis(bse_obj) - if len(ecp_basis) > 0: - return ecp_basis[symb] - else: - return {} - raise BasisNotFoundError('Unknown ECP format or ECP name') # PP_NAME_PATTERN follows the convention of CP2K orbital basis and pseudo names diff --git a/pyscf/gto/test/test_basis_parser.py b/pyscf/gto/test/test_basis_parser.py index 1d6cbbf3b0..b1da3118d3 100644 --- a/pyscf/gto/test/test_basis_parser.py +++ b/pyscf/gto/test/test_basis_parser.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import unittest -import tempfile from functools import reduce import numpy from pyscf import gto @@ -30,14 +30,15 @@ class KnownValues(unittest.TestCase): def test_parse_pople(self): + join = os.path.join self.assertEqual(gto.basis._parse_pople_basis('631g(d)', 'C'), - ('pople-basis/6-31G.dat', 'pople-basis/6-31G-polarization-d.dat')) + (join('pople-basis', '6-31G.dat'), join('pople-basis', '6-31G-polarization-d.dat'))) self.assertEqual(gto.basis._parse_pople_basis('631g**', 'C'), - ('pople-basis/6-31Gss.dat',)) + (join('pople-basis', '6-31Gss.dat'),)) self.assertEqual(gto.basis._parse_pople_basis('631++g**', 'C'), - ('pople-basis/6-31++Gss.dat',)) + (join('pople-basis', '6-31++Gss.dat'),)) self.assertEqual(gto.basis._parse_pople_basis('6311+g(d,p)', 'C'), - ('pople-basis/6-311+G.dat', 'pople-basis/6-311G-polarization-d.dat')) + (join('pople-basis', '6-311+G.dat'), join('pople-basis', '6-311G-polarization-d.dat'))) self.assertRaises(KeyError, gto.basis._parse_pople_basis, '631g++', 'C') def test_basis_load(self): @@ -61,7 +62,7 @@ def test_basis_load(self): self.assertEqual(len(gto.basis.load('def2-svp', 'Rn')), 16) def test_basis_load_from_file(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() ftmp.write(''' Li S 16.1195750 0.15432897 @@ -401,7 +402,7 @@ def test_parse_gaussian_basis(self): self.assertEqual(ref, basis1) def test_parse_gaussian_load_basis(self): - with tempfile.NamedTemporaryFile(mode='w+') as f: + with lib.NamedTemporaryFile(mode='w+') as f: f.write(''' **** H 0 @@ -412,7 +413,7 @@ def test_parse_gaussian_load_basis(self): f.flush() self.assertEqual(parse_gaussian.load(f.name, 'H'), [[0, [1., 1.]]]) - with tempfile.NamedTemporaryFile(mode='w+') as f: + with lib.NamedTemporaryFile(mode='w+') as f: f.write(''' H 0 S 1 1.0 @@ -422,7 +423,7 @@ def test_parse_gaussian_load_basis(self): f.flush() self.assertEqual(parse_gaussian.load(f.name, 'H'), [[0, [1., 1.]]]) - with tempfile.NamedTemporaryFile(mode='w+') as f: + with lib.NamedTemporaryFile(mode='w+') as f: f.write(''' **** H 0 @@ -432,7 +433,7 @@ def test_parse_gaussian_load_basis(self): f.flush() self.assertEqual(parse_gaussian.load(f.name, 'H'), [[0, [1., 1.]]]) - with tempfile.NamedTemporaryFile(mode='w+') as f: + with lib.NamedTemporaryFile(mode='w+') as f: f.write(''' H 0 S 1 1.0 diff --git a/pyscf/gto/test/test_ecp.py b/pyscf/gto/test/test_ecp.py index b65b7dcb80..cee3a99dac 100644 --- a/pyscf/gto/test/test_ecp.py +++ b/pyscf/gto/test/test_ecp.py @@ -420,6 +420,53 @@ def test_ecp_f_in_valence(self): self.assertEqual(mol.ao_labels()[40], '0 U 5f-3 ') self.assertAlmostEqual(lib.fp(mf.get_hcore()), -55.38627201912257) + def test_large_exponent_ecp_closed_form(self): + # Regression test for the adaptive Gauss-Chebyshev radial quadrature + # in nr_ecp.c. At large combined exponents the integrand is sharply + # peaked at small r; two successive coarse rules would happen to agree + # to 1e-12 even when both were under-resolved, so the loop declared + # premature convergence and the integral could be off by 1e-5. + # + # The radial integral with a same-center primitive AO of angular + # momentum l_ao (single primitive, exponent al) and a same-center + # local/semilocal ECP channel c * r^(n-2) * exp(-g r^2) factorises so + # that the ratio I(g1)/I(g2) at fixed alpha and l is + # ((2 alpha + g2) / (2 alpha + g1)) ** ((n + 2*l_ao + 1) / 2) + # independent of the AO normalisation, providing a stringent + # closed-form check on the radial quadrature. + L_SYM = {0: 'S', 1: 'P', 2: 'D'} + + def build_local(n, l, al, g): + basis = {'Kr': [[l, [al, 1.0]]]} + ecp = 'ECP\nKr nelec 0\nKr ul\n%d %.10e 1.0\nEND\n' % (n, g) + return gto.M(atom='Kr 0 0 0', basis=basis, + ecp={'Kr': ecp}, verbose=0) + + def build_semilocal(n, l, al, g): + basis = {'Kr': [[l, [al, 1.0]]]} + # zero ul keeps a local channel present (required by parser) + ecp = ('ECP\nKr nelec 0\nKr ul\n2 1.0 0.0\n' + 'Kr %s\n%d %.10e 1.0\nEND\n' % (L_SYM[l], n, g)) + return gto.M(atom='Kr 0 0 0', basis=basis, + ecp={'Kr': ecp}, verbose=0) + + def ratio_closed(n, l, al, g1, g2): + p = (n + 2 * l + 1) / 2.0 + return ((2 * al + g2) / (2 * al + g1)) ** p + + worst = 0.0 + for build in (build_local, build_semilocal): + for n in (1, 2): + for l in (0, 1, 2): + for al in (1e0, 1e2, 1e4, 3e5): + for g1, g2 in ((1e1, 1e7), (1e3, 1e7), (1e5, 1e7)): + m1 = build(n, l, al, g1).intor('ECPscalar')[0, 0] + m2 = build(n, l, al, g2).intor('ECPscalar')[0, 0] + r = m1 / m2 + err = abs(r / ratio_closed(n, l, al, g1, g2) - 1) + worst = max(worst, err) + self.assertLess(worst, 1e-10) + if __name__ == '__main__': print("Full Tests for ECP") diff --git a/pyscf/gto/test/test_mole.py b/pyscf/gto/test/test_mole.py index a2f7aa6afa..fc7577d3d9 100644 --- a/pyscf/gto/test/test_mole.py +++ b/pyscf/gto/test/test_mole.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile from functools import reduce import numpy import numpy as np @@ -44,7 +43,7 @@ def setUpModule(): mol0.spin = 1 mol0.verbose = 7 mol0.ecp = {'O1': 'lanl2dz'} - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() mol0.output = ftmp.name mol0.build() @@ -257,7 +256,7 @@ def test_first_argument(self): self.assertEqual(mol1.natm, 1) def test_atom_as_file(self): - ftmp = tempfile.NamedTemporaryFile('w') + ftmp = lib.NamedTemporaryFile('w') # file in raw format ftmp.write('He 0 0 0\nHe 0 0 1\n') ftmp.flush() @@ -265,14 +264,14 @@ def test_atom_as_file(self): self.assertEqual(mol1.natm, 2) # file in xyz format - ftmp = tempfile.NamedTemporaryFile('w', suffix='.xyz') + ftmp = lib.NamedTemporaryFile('w', suffix='.xyz') ftmp.write('2\n\nHe 0 0 0\nHe 0 0 1\n') ftmp.flush() mol1 = gto.M(atom=ftmp.name) self.assertEqual(mol1.natm, 2) # file in zmatrix format - ftmp = tempfile.NamedTemporaryFile('w', suffix='.zmat') + ftmp = lib.NamedTemporaryFile('w', suffix='.zmat') ftmp.write('He\nHe 1 1.5\n') ftmp.flush() mol1 = gto.M(atom=ftmp.name) @@ -621,7 +620,7 @@ def test_atom_method(self): def test_dump_loads_skip(self): import json - with tempfile.NamedTemporaryFile() as tmpfile: + with lib.NamedTemporaryFile() as tmpfile: lib.chkfile.save_mol(mol0, tmpfile.name) mol1 = gto.Mole() mol1.update(tmpfile.name) @@ -975,7 +974,7 @@ def test_ao2mo(self): self.assertAlmostEqual(eri[0,0], 1.0557129427350722, 12) def test_tofile(self): - tmpfile = tempfile.NamedTemporaryFile() + tmpfile = lib.NamedTemporaryFile() mol = gto.M(atom=[[1 , (0.,1.,1.)], ["O1", (0.,0.,0.)], [1 , (1.,1.,0.)], ]) @@ -990,7 +989,7 @@ def test_tofile(self): self.assertEqual(f.read(), ref) self.assertEqual(out1, ref[:-1]) - tmpfile = tempfile.NamedTemporaryFile(suffix='.zmat') + tmpfile = lib.NamedTemporaryFile(suffix='.zmat') str1 = mol.tofile(tmpfile.name, format='zmat') #FIXME:self.assertEqual(mol._atom, mol.fromfile(tmpfile.name)) @@ -1020,7 +1019,7 @@ def test_fromstring(self): print(mol.unit == 'Angstrom') def test_fromfile(self): - with tempfile.NamedTemporaryFile(mode='w+', suffix='.xyz') as f: + with lib.NamedTemporaryFile(mode='w+', suffix='.xyz') as f: f.write('2\n\nH 0 0 1; H 0 -1 0') f.flush() mol = gto.Mole() diff --git a/pyscf/gw/bse.py b/pyscf/gw/bse.py new file mode 100644 index 0000000000..3fc5597d7a --- /dev/null +++ b/pyscf/gw/bse.py @@ -0,0 +1,2006 @@ +#!/usr/bin/env python +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Tianyu Zhu +# Author: Christopher Hillenbrand +# Author: Jiachen Li +# + +""" +Bethe-Salpeter equation (BSE) for excitation energy. +Both restricted and unrestricted cases are supported. +BSE can be solved with (energy-specific) Davidson algorithm, Lanczos algorithms or fully diagonalization. + +References: + Hillenbrand, Christopher, Jiachen Li, and Tianyu Zhu. J. Chem. Phys. 162, 174117 (2025). + J. Comput. Chem. 38, 383 (2017). + Ghosh, S. K and Chattaraj, P. K. (Eds.). (2013). + SIAM J. Matrix Anal. Appl. 39, 683 (2018). +""" + +import time + +import numpy as np +import scipy +import scipy.linalg as sla +import h5py + +from pyscf import lib +from pyscf.data import nist +from pyscf.tools import mo_mapping + +HARTREE2EV = nist.HARTREE2EV + +einsum = lib.einsum + + +def bse_full_diagonalization(multi, nocc, mo_energy, Lpq, TDA=False): + """Full diagonalization of BSE equation. + BSE equation is defined as equation 1 in doi.org/10.1002/jcc.24688. + Spin-adapted formalism can be found in chapter 18.3.2 in "Concepts and methods in modern theoretical chemistry. + Electronic structure (2013, CRC) Ghosh S.K., Chattaraj P.K. (eds.)" + The working equation is rewritten as equation 15 in doi.org/10.1063/1.477483. + + Parameters + ---------- + multi : str + multiplicity, 's'=singlet, 't'=triplet, 'u'=unrestricted. + nocc : int array + numbers of occupied orbitals. + mo_energy : double array + orbital energy. + Lpq : double array + three-center density-fitting matrix in MO. + TDA : bool, optional + use Tamm-Dancoff approximation, by default False + + Returns + ------- + exci : double array + excitation energy. + X_vec : list of double ndarray + X blocks of eigenvectors (excitations). + Y_vec : list of double ndarray + Y blocks of eigenvectors (de-excitation). + """ + nspin, _, nmo, _ = Lpq.shape + + # determine dimension + nvir = [(nmo - nocc[i]) for i in range(nspin)] + dim = [(nocc[i] * nvir[i]) for i in range(nspin)] + full_dim = dim[0] + dim[1] if nspin == 2 else dim[0] + apb = np.zeros(shape=[full_dim, full_dim], dtype=np.double) + + # amb is not allocated if TDA is true, since B=0 + if not TDA: + amb = np.zeros_like(apb) + + Lpq_bar = _get_lpq_bar(nocc=nocc, mo_energy=mo_energy, Lpq=Lpq) + + # scale Coulomb matrix + scale = 4.0 / nspin + if TDA: + scale /= 2.0 + + # Coulomb part + if multi == 's' or multi == 'u': + for i in range(nspin): + for j in range(nspin): + apb[i * dim[0] : i * dim[0] + dim[i], j * dim[0] : j * dim[0] + dim[j]] += einsum( + 'Lia,Ljb->iajb', Lpq[i][:, : nocc[i], nocc[i] :], Lpq[j][:, : nocc[j], nocc[j] :] + ).reshape(dim[i], dim[j]) + apb *= scale + + # W part + for i in range(nspin): + WA = -einsum('Lij,Lab->iajb', Lpq[i][:, : nocc[i], : nocc[i]], Lpq_bar[i][:, nocc[i] :, nocc[i] :]) + WA = WA.reshape(nocc[i] * nvir[i], nocc[i] * nvir[i]) + apb[i * dim[0] : i * dim[0] + dim[i], i * dim[0] : i * dim[0] + dim[i]] += WA + if not TDA: + amb[i * dim[0] : i * dim[0] + dim[i], i * dim[0] : i * dim[0] + dim[i]] += WA + WB = -einsum('Lib,Laj->iajb', Lpq[i][:, : nocc[i], nocc[i] :], Lpq_bar[i][:, nocc[i] :, : nocc[i]]) + WB = WB.reshape(nocc[i] * nvir[i], nocc[i] * nvir[i]) + apb[i * dim[0] : i * dim[0] + dim[i], i * dim[0] : i * dim[0] + dim[i]] += WB + amb[i * dim[0] : i * dim[0] + dim[i], i * dim[0] : i * dim[0] + dim[i]] -= WB + + # orbital energy contribution to A+B and A-B matrix + orb_diff = [] + for i in range(nspin): + orb_diff.append((mo_energy[i][None, nocc[i] :] - mo_energy[i][: nocc[i], None]).reshape(-1)) + orb_diff = np.concatenate(orb_diff, axis=0) + if not TDA: + np.fill_diagonal(amb, orb_diff + np.diag(amb)) + np.fill_diagonal(apb, orb_diff + np.diag(apb)) + + if TDA: + # Diagonalizing A is numerically more stable than + # diagonalizing A^2. Solve standard hermitian eigenvalue problem + + # B = 0, so A = apb + exci, xpy = scipy.linalg.eigh(apb) + X_vec = xpy.T + Y_vec = np.zeros_like(xpy) + + else: + # equation 15 in doi/10.1063/1.477483, solved by LAPACK function dsygvd + exci_sqr, xpy_w = scipy.linalg.eigh(apb, amb, type=3) + exci = np.sqrt(exci_sqr) + + # dsygvd normalizes xpy_w such that + # xpy_w @ xpy_w.T = A - B + # Using the fact that A - B = (X+Y) @ diag(w) @ (X+Y).T, + # we calculate X+Y = xpy_w @ diag(1/sqrt(w)). + xpy = xpy_w / np.sqrt(exci)[None, :] + + # (A+B) |X+Y> = w |X-Y>, so + # |X-Y> = w^-1 (A+B) |X+Y> + xmy = (apb @ xpy) / exci[None, :] + + # Rows of X_vec and Y_vec are the eigenvectors, hence the transpose. + X_vec = (xpy + xmy).T / 2.0 + Y_vec = (xpy - xmy).T / 2.0 + + # reshape X and Y eigenvector + if nspin == 1: + X_vec = [X_vec.reshape(-1, nocc[0], nvir[0])] + Y_vec = [Y_vec.reshape(-1, nocc[0], nvir[0])] + else: + X_vec_a, X_vec_b, Y_vec_a, Y_vec_b = [], [], [], [] + for r in range(len(exci)): + X_vec_a.append(X_vec[r][: dim[0]].reshape(nocc[0], nvir[0])) + X_vec_b.append(X_vec[r][dim[0] :].reshape(nocc[1], nvir[1])) + Y_vec_a.append(Y_vec[r][: dim[0]].reshape(nocc[0], nvir[0])) + Y_vec_b.append(Y_vec[r][dim[0] :].reshape(nocc[1], nvir[1])) + X_vec = [np.asarray(X_vec_a), np.asarray(X_vec_b)] + Y_vec = [np.asarray(Y_vec_a), np.asarray(Y_vec_b)] + + return exci, X_vec, Y_vec + + +def davidson_restart(Mp, Mm, tri_vec, nvec_pair_to_save, e_min=0.0): + """Restart Davidson algorithm. + + Parameters + ---------- + Mp : ndarray + The matrix + Mm : ndarray or None + The matrix + tri_vec : ndarray + Trial vectors. + nvec_pair_to_save : int + Number of vector pairs to save. + e_min : double, optional + Minimum desired excitation energy, by default 0.0 + + Returns + ------- + int + Number of new trial vectors returned. + """ + # Full BSE case. + if Mm is not None: + full_dim = tri_vec.shape[1] + assert tri_vec.shape[0] >= nvec_pair_to_save, ( + 'Requested number of saved trial vectors is larger than the allocated space.' + ) + Mp_sym = (Mp + Mp.T) / 2.0 + Mm_sym = (Mm + Mm.T) / 2.0 + nprod = Mm.shape[0] + exci_sqr, xpy_w = scipy.linalg.eigh(Mp_sym, Mm_sym, type=3) + e_tri = np.sqrt(exci_sqr) + emin_index = np.searchsorted(e_tri, e_min, side='left') + + if full_dim < 2 * nvec_pair_to_save: + #print('full_dim < 2*nvec_pair_to_save') + Q, _, _ = sla.qr(tri_vec[:nprod].T, mode='economic', pivoting=True) + tri_vec[:nprod] = Q.T + return nprod + + # Truncate the eigenvectors and eigenvalues outside the target energy range. + e_tri = e_tri[emin_index:] + xpy_w = xpy_w[:, emin_index:] + nvec_pair_to_save = min(nvec_pair_to_save, e_tri.size) + + # Calculate normalized |X+Y> and |X-Y> in subspace. + xpy = xpy_w / np.sqrt(e_tri)[None, :] + xmy = (Mp_sym @ xpy) / e_tri[None, :] + + # Write out the left and right vectors in the full space to a temporary file. + # They're written to disk because they may be too large to fit in memory. + with lib.H5TmpFile() as chkf: + dset = chkf.create_dataset('tri_vec', shape=(2 * nvec_pair_to_save, full_dim), fillvalue=0) + blksize = 10 + buf = np.empty((blksize, full_dim)) + for i in range(0, nvec_pair_to_save, blksize): + if i + blksize < nvec_pair_to_save: + np.matmul(xpy[:, i : i + blksize].T, tri_vec[:nprod], out=buf) + dset.write_direct(buf, dest_sel=np.s_[2 * i : 2 * i + blksize]) + np.matmul(xmy[:, i : i + blksize].T, tri_vec[:nprod], out=buf) + dset.write_direct(buf, dest_sel=np.s_[2 * i + blksize : 2 * i + 2 * blksize]) + else: + remaining = nvec_pair_to_save - i + np.matmul(xpy[:, i : i + remaining].T, tri_vec[:nprod], out=buf[:remaining]) + dset.write_direct(buf[:remaining], dest_sel=np.s_[2 * i : 2 * i + remaining]) + np.matmul(xmy[:, i : i + remaining].T, tri_vec[:nprod], out=buf[:remaining]) + dset.write_direct(buf[:remaining], dest_sel=np.s_[2 * i + remaining : 2 * i + 2 * remaining]) + + # Read the vectors back in and orthogonalize them. + assert (tri_vec[: 2 * nvec_pair_to_save].T).flags.f_contiguous + dset.read_direct( + tri_vec, source_sel=np.s_[: 2 * nvec_pair_to_save], dest_sel=np.s_[: 2 * nvec_pair_to_save] + ) + + # In-place QR decomposition leaving orthogonal vectors Q as the rows + # of tri_vec. + lwork = sla.lapack.dgeqrf_lwork(2 * nvec_pair_to_save, full_dim) + _, tau, _, _ = sla.lapack.dgeqrf(tri_vec[: 2 * nvec_pair_to_save].T, lwork=lwork, overwrite_a=1) + sla.lapack.dorgqr(tri_vec[: 2 * nvec_pair_to_save].T, tau, overwrite_a=1) + return 2 * nvec_pair_to_save + + # TDA case. + else: + full_dim = tri_vec.shape[1] + assert ( + tri_vec.shape[0] >= nvec_pair_to_save + ), 'Requested number of saved trial vectors is larger than the allocated space.' + Mp_sym = (Mp + Mp.T) / 2.0 + nprod = Mp.shape[0] + e_tri, x = scipy.linalg.eigh(Mp_sym) + emin_index = np.searchsorted(e_tri, e_min, side='left') + + if full_dim < nvec_pair_to_save: + #print('full_dim < 2*nvec_pair_to_save') + Q, R, _ = sla.qr(tri_vec[:nprod].T, mode='economic', pivoting=True) + tri_vec[:nprod] = Q.T + return nprod + + # Truncate the eigenvectors and eigenvalues outside the target energy range. + e_tri = e_tri[emin_index:] + x = x[:, emin_index:] + nvec_pair_to_save = min(nvec_pair_to_save, e_tri.size) + + # Write out the left and right vectors in the full space to a temporary file. + # They're written to disk because they may be too large to fit in memory. + with lib.H5TmpFile() as chkf: + dset = chkf.create_dataset('tri_vec', shape=(nvec_pair_to_save, full_dim), fillvalue=0) + blksize = 10 + buf = np.empty((blksize, full_dim)) + for i in range(0, nvec_pair_to_save, blksize): + if i + blksize < nvec_pair_to_save: + np.matmul(x[:, i : i + blksize].T, tri_vec[:nprod], out=buf) + dset.write_direct(buf, dest_sel=np.s_[i : i + blksize]) + else: + remaining = nvec_pair_to_save - i + np.matmul(x[:, i : i + remaining].T, tri_vec[:nprod], out=buf[:remaining]) + dset.write_direct(buf[:remaining], dest_sel=np.s_[i : i + remaining]) + + # Read the vectors back in and orthogonalize them. + assert (tri_vec[:nvec_pair_to_save].T).flags.f_contiguous + dset.read_direct(tri_vec, source_sel=np.s_[:nvec_pair_to_save], dest_sel=np.s_[:nvec_pair_to_save]) + + # In-place QR decomposition leaving orthogonal vectors Q as the rows + # of tri_vec. + lwork = sla.lapack.dgeqrf_lwork(nvec_pair_to_save, full_dim) + _, tau, _, _ = sla.lapack.dgeqrf(tri_vec[:nvec_pair_to_save].T, lwork=lwork, overwrite_a=1) + sla.lapack.dorgqr(tri_vec[:nvec_pair_to_save].T, tau, overwrite_a=1) + return nvec_pair_to_save + + +def davidson_save_checkpoint(chkfile, tri_vec, amb_prod, apb_prod, nprod): + """Save the current state of the Davidson algorithm to a checkpoint file. + + Parameters + ---------- + chkfile : str, pathlib.Path + Path to the checkpoint file. + tri_vec : ndarray + Trial vectors. + amb_prod : ndarray + The vectors (A-B)|tri_vec>. + apb_prod : ndarray + The vectors (A+B)|tri_vec>. + nprod : int + The number of vectors to be written---we save the slice tri_vec[:nprod]. + """ + full_dim = tri_vec.shape[1] + with h5py.File(chkfile, 'a') as chkf: + if 'tri_vec' not in chkf: + chkf.create_dataset('tri_vec', data=tri_vec, maxshape=(None, full_dim), chunks=(1, full_dim)) + chkf.create_dataset('amb_prod', data=amb_prod, maxshape=(None, full_dim), chunks=(1, full_dim)) + chkf.create_dataset('apb_prod', data=apb_prod, maxshape=(None, full_dim), chunks=(1, full_dim)) + else: + old_ntri = chkf['tri_vec'].shape[0] + # Discard old contents if we're overwriting. + if nprod < old_ntri: + old_ntri = 0 + writesel = np.s_[old_ntri:nprod] + chkf['tri_vec'].resize((nprod, full_dim)) + chkf['amb_prod'].resize((nprod, full_dim)) + chkf['apb_prod'].resize((nprod, full_dim)) + chkf['tri_vec'].write_direct(tri_vec, source_sel=writesel, dest_sel=writesel) + chkf['amb_prod'].write_direct(amb_prod, source_sel=writesel, dest_sel=writesel) + chkf['apb_prod'].write_direct(apb_prod, source_sel=writesel, dest_sel=writesel) + return + + +def davidson_load_from_checkpoint(chkfile, tri_vec, amb_prod, apb_prod, nload=None): + """Load the contents of a checkpoint file into the Davidson algorithm. + + Parameters + ---------- + chkfile : str, pathlib.Path + Path to the checkpoint file. + tri_vec : ndarray + Array to contain trial vectors. + amb_prod : ndarray + Array to contain the vectors (A-B)|tri_vec>. + apb_prod : ndarray + Array to contain the vectors (A+B)|tri_vec>. + nload : int, optional + Maximum number of trial vectors to load. If None, load all vectors. + + Returns + ------- + int + The number of trial vectors loaded. + """ + with h5py.File(chkfile, 'r') as chkf: + if 'tri_vec' not in chkf: + raise ValueError('Checkpoint file does not contain tri_vec.') + ntri = chkf['tri_vec'].shape[0] + if nload is not None: + ntri = min(ntri, nload) + sel = np.s_[:ntri] + for array in (tri_vec, amb_prod, apb_prod): + if array.shape[0] < ntri: + raise ValueError(f'max_vec is too small to load {ntri} vectors as requested.') + chkf['tri_vec'].read_direct(tri_vec, source_sel=sel, dest_sel=sel) + chkf['amb_prod'].read_direct(amb_prod, source_sel=sel, dest_sel=sel) + chkf['apb_prod'].read_direct(apb_prod, source_sel=sel, dest_sel=sel) + return ntri + + +def bse_davidson( + bse, + multi, + e_min=0.0, + delta=0.0, + core_orbs=None, + init_from_chkfile=None, + expand_only_core=False, + precond_exact_diag=False, +): + """Davidson algorithm for BSE. + The Davidson algorithm follows doi.org/10.1063/1.477483. + BSE equation is defined as equation 1 in doi.org/10.1002/jcc.24688. + Spin-adapted formalism can be found in chapter 18.3.2 in "Concepts and methods in modern theoretical chemistry. + Electronic structure (2013, CRC) Ghosh S.K., Chattaraj P.K. (eds.)" + + Parameters + ---------- + bse : BSE + BSE object. + multi : str + multiplicity, 's'=singlet, 't'=triplet, 'u'=unrestricted. + e_min : float, optional + minimum desired excitation energy. Defaults to 0.0. + delta : float, optional + energy shift for trial vector generation, typically <=0.0. Defaults to 0.0. + core_orbs : optional + filter function or AO labels or AO index, for generating trial vectors from core orbitals. + If this is provided, then e_min and delta are not used to generate trial vectors. + init_from_chkfile : str, optional + checkpoint file to initialize the Davidson algorithm. Defaults to None. + expand_only_core : bool, optional + expand only the core orbitals. Defaults to False. + precond_exact_diag : bool, optional + use exact diagonal preconditioning. Defaults to False. + + Returns + ------- + exci : double array + excitation energy. + X_vec : list of double ndarray + X block of eigenvector (excitation). + Y_vec : list of double ndarray + Y block of eigenvector (de-excitation). + """ + # load matrix + nspin = bse.nspin + nmo = bse.nmo + nocc = bse.nocc + mo_energy = bse.mo_energy + Lpq = bse.Lpq + # load parameter + TDA = bse.TDA + max_vec = bse.max_vec + nroot = bse.nroot + max_iter = bse.max_iter + max_expand = bse.max_expand + init_ntri = max(2, bse.init_ntri) + residue_thresh = bse.residue_thresh + + # determine dimension + nvir = [(nmo - nocc[i]) for i in range(nspin)] + dim = [(nocc[i] * nvir[i]) for i in range(nspin)] + full_dim = dim[0] + dim[1] if nspin == 2 else dim[0] + + # initialize trial vector + tri_vec = np.zeros(shape=[max_vec, full_dim], dtype=np.double) + ntri = min(init_ntri, full_dim) # initial guess size should be larger than nroot + + if bse.trial == 'identity': + ntri_found, tri_vec_found = get_davidson_trial_vector( + bse, ntri=ntri, nocc=nocc, mo_energy=mo_energy, e_min=e_min, delta=delta, core_orbs=core_orbs + ) + elif bse.trial == 'subspace': + ntri_found, tri_vec_found = get_davidson_trial_vector_diag( + ntri, multi, nocc, mo_energy, Lpq, nocc_sub=bse.nocc_sub, nvir_sub=bse.nvir_sub, e_min=e_min, delta=delta, + TDA=TDA + ) + else: + raise ValueError + + if ntri_found < ntri: + lib.logger.info(bse, f'only {ntri_found} trial vectors are generated rather than {ntri}.') + ntri = ntri_found + if ntri_found < init_ntri: + raise ValueError('cannot find enough trial vectors; lower e_min or add more trial vectors') + tri_vec[:ntri, :] = tri_vec_found + del tri_vec_found + + # initialize Davidson matrix + apb_prod = np.zeros_like(tri_vec) + if not TDA: + amb_prod = np.zeros_like(tri_vec) + else: + amb_prod = None + + Lia = [np.ascontiguousarray(Lpq[s][:, : nocc[s], nocc[s] :]) for s in range(nspin)] + Laa = [np.ascontiguousarray(Lpq[s][:, nocc[s] :, nocc[s] :]) for s in range(nspin)] + Lii_bar, Lia_bar = _get_lpq_bar_by_block( + nocc=nocc, mo_energy=mo_energy, Lii=[Lpq[s][:, : nocc[s], : nocc[s]] for s in range(nspin)], Lia=Lia + ) + + if precond_exact_diag: + assert TDA + Laa_diag = [np.diagonal(Laa[s], axis1=1, axis2=2) for s in range(nspin)] + Lii_bar_diag = [np.diagonal(Lii_bar[s], axis1=0, axis2=2) for s in range(nspin)] + v_iaia = [ + 2 / nspin * np.vecdot(Lia[s].reshape(-1, nocc[s] * nvir[s]).T, Lia[s].reshape(-1, nocc[s] * nvir[s]).T) + for s in range(nspin) + ] + Wiiaa = [(Lii_bar_diag[s].T @ Laa_diag[s]).reshape(nocc[s] * nvir[s]) for s in range(nspin)] + Wiaia = [ + np.vecdot(Lia_bar[s].reshape(-1, nocc[s] * nvir[s]).T, Lia[s].reshape(-1, nocc[s] * nvir[s]).T).reshape( + nocc[s] * nvir[s] + ) + for s in range(nspin) + ] + if TDA: + apb_diag = [v_iaia[s] - Wiiaa[s] - Wiaia[s] for s in range(nspin)] + #amb_diag = apb_diag + else: + apb_diag = [2 * v_iaia[s] - Wiiaa[s] - Wiaia[s] for s in range(nspin)] + #amb_diag = [Wiaia[s] - Wiiaa[s] for s in range(nspin)] + Laa_diag = None + Lii_bar_diag = None + + # We no longer need Lpq in this function. + Lpq = None + + # Delete Lpq if it is not needed anymore. + if bse.delete_lpq: + bse.Lpq = None + + iter = 0 + nprod = 0 # the number of contracted vectors + total_contract_work = 0 + total_linalg_work = 0 + + Mm = None + Mp = None + + if init_from_chkfile is not None: + ntri = davidson_load_from_checkpoint(init_from_chkfile, tri_vec, amb_prod, apb_prod) + lib.logger.info(bse, f'Loaded {ntri} trial vectors from {init_from_chkfile}.') + nprod = ntri + + chk_last = 0 + + while iter < max_iter: + lib.logger.info(bse, '\nBSE Davidson #%d iteration, ntri= %d , nprod= %d .', iter + 1, ntri, nprod) + if not TDA: + apb_prod[nprod:ntri, :], amb_prod[nprod:ntri, :], contract_work_this_iter = _bse_contraction( + multi=multi, + nocc=nocc, + mo_energy=mo_energy, + Lia=Lia, + Laa=Laa, + Lii_bar=Lii_bar, + Lia_bar=Lia_bar, + tri_vec=tri_vec[nprod:ntri, :], + TDA=False, + ) + else: + apb_prod[nprod:ntri, :], _, contract_work_this_iter = _bse_contraction( + multi=multi, + nocc=nocc, + mo_energy=mo_energy, + Lia=Lia, + Laa=Laa, + Lii_bar=Lii_bar, + Lia_bar=Lia_bar, + tri_vec=tri_vec[nprod:ntri, :], + TDA=True, + ) + total_contract_work += contract_work_this_iter + lib.logger.info(bse, f'work for iter {iter+1}: {float(contract_work_this_iter):.2E}') + + Mp, Mm, mmwork = update_mp_mm(Mp, Mm, tri_vec, apb_prod, amb_prod, ntri, nprod) + Mp_sym = (Mp + Mp.T) / 2.0 + if not TDA: + Mm_sym = (Mm + Mm.T) / 2.0 + total_linalg_work += mmwork + nprod_prev, nprod = nprod, ntri + + if bse.chkfile is not None: + if nprod - chk_last >= bse.chk_every: + davidson_save_checkpoint(bse.chkfile, tri_vec, amb_prod, apb_prod, nprod) + lib.logger.info(bse, f'Saving progress at iteration {iter+1} to {bse.chkfile}: {chk_last}->{nprod}.') + chk_last = nprod + + nroot_current = min(nroot, ntri) + # equation 15 in doi/10.1063/1.477483, solved by LAPACK function dsygvd + + # Save current NumPy error handling settings + nperrhandling = np.geterr()['invalid'] + try: + if not TDA: + exci_sqr, xpy_w = scipy.linalg.eigh(Mp_sym.T, Mm_sym.T, type=3) + np.seterr(invalid='raise') + e_tri = np.sqrt(exci_sqr) + else: + np.seterr(invalid='raise') + e_tri, xpy_w = scipy.linalg.eigh(Mp_sym.T, driver='evd') + + except (scipy.linalg.LinAlgError, FloatingPointError): + lib.logger.warn(bse, 'Restarting Davidson algorithm.') + # restart Davidson algorithm + # Throw away most recent trial vectors, since they are likely to be linearly dependent + if bse.restart_max_size is None: + nvec_pair_to_save = nprod_prev + else: + nvec_pair_to_save = min(nprod_prev, bse.restart_max_size) + if not TDA: + ntri = davidson_restart( + Mp[:nprod_prev, :nprod_prev], + Mm[:nprod_prev, :nprod_prev], + tri_vec, + nvec_pair_to_save=nvec_pair_to_save, + e_min=e_min, + ) + else: + ntri = davidson_restart( + Mp[:nprod_prev, :nprod_prev], None, tri_vec, nvec_pair_to_save=nvec_pair_to_save, e_min=e_min + ) + + # Set nprod to 0 to recalculate all mat-vec products. + nprod = 0 + Mp = None + Mm = None + iter += 1 + continue + + finally: + # Restore NumPy error handling settings + np.seterr(invalid=nperrhandling) + + if not TDA: + # dsygvd normalizes xpy_w such that + # xpy_w @ xpy_w.T = A - B + # Using the fact that A - B = (X+Y) @ diag(w) @ (X+Y).T, + # we calculate X+Y = xpy_w @ diag(1/sqrt(w)). + xpy = xpy_w / np.sqrt(e_tri)[None, :] + + # (A+B) |X+Y> = w |X-Y>, so + # |X-Y> = w^-1 (A+B) |X+Y> + xmy = (Mp_sym @ xpy) / e_tri[None, :] + + # Thanks to the use of the generalized eigensolver, + # xpy and xmy already form a biorthonormal system. + + else: + # TDA is easy + xpy = xpy_w + + total_linalg_work += ntri**3 + + found_roots = np.flatnonzero(e_tri >= e_min) + nrootfound = min(nroot, found_roots.size) + lib.logger.debug(bse, 'lowest %d exci above minimum: \n%s', nrootfound, e_tri[found_roots[:nrootfound]]) + emin_index = np.searchsorted(e_tri, e_min, side='left') + if emin_index + nroot_current > ntri: + emin_index = ntri - nroot_current + if ntri >= nroot: + lib.logger.info(bse, 'fewer than nroot exci found above e_min.') + + if core_orbs is not None and nspin == 1 and expand_only_core: + if not hasattr(bse, 'mol'): + raise ValueError('mol object is required if core_orbs is given.') + # Select those occupied orbitals with a significant contribution from given core orbitals. + occ_we_want = np.flatnonzero( + mo_mapping.mo_comps(core_orbs, bse.mol, bse.mo_coeff[0][:, : nocc[0]]) >= 0.3 + ) + core_roots = [] + + for idx in range(emin_index, ntri): + if not TDA: + Xvec = (0.5 * (xpy[:, idx].T + xmy[:, idx].T)) @ tri_vec[:ntri, :] + else: + Xvec = xpy[:, idx].T @ tri_vec[:ntri, :] + Xvec = Xvec.reshape(nocc[0], nvir[0]) + Xvecsqr = np.linalg.norm(Xvec, axis=1) + X_core_component = np.linalg.norm(Xvecsqr[occ_we_want]) + if X_core_component > 0.3: + core_roots.append(idx) + if len(core_roots) >= nroot_current: + break + exci_candidate_indices = np.asarray(core_roots, dtype=int) + lib.logger.debug( + bse, + 'lowest %d core excitations above minimum: \n%s', + exci_candidate_indices.size, + e_tri[exci_candidate_indices], + ) + + else: + exci_candidate_indices = np.s_[emin_index : emin_index + nroot_current] + + exci = e_tri[exci_candidate_indices] + # get left and right eigenvector in the full space, equation 25 and 26 in doi.org/10.1063/1.477483 + + right_vec_tri = xpy.T[exci_candidate_indices, :] + right_vec = np.matmul(right_vec_tri, tri_vec[:ntri, :]) + total_linalg_work += nroot_current * ntri * full_dim + + if not TDA: + left_vec_tri = xmy.T[exci_candidate_indices, :] + left_vec = np.matmul(left_vec_tri, tri_vec[:ntri, :]) + total_linalg_work += nroot_current * ntri * full_dim + + if not TDA: + right_res = -exci[:, None] * left_vec + left_res = -exci[:, None] * right_vec + right_res += np.matmul(right_vec_tri, apb_prod[:ntri, :]) + left_res += np.matmul(left_vec_tri, amb_prod[:ntri, :]) + + # check convergence + res_norms_left = np.linalg.norm(left_res, axis=1) ** 2 + res_norms_right = np.linalg.norm(right_res, axis=1) ** 2 + res_norms = np.maximum(res_norms_left, res_norms_right) + + else: # TDA + right_res = -exci[:, None] * right_vec + right_res += np.matmul(right_vec_tri, apb_prod[:ntri, :]) + res_norms = np.linalg.norm(right_res, axis=1) ** 2 + + max_res_norm = np.max(res_norms) + conv_vec = res_norms < residue_thresh + lib.logger.info(bse, 'max residue norm = %.4e', max_res_norm) + if conv_vec.size >= nroot: + if np.all(conv_vec[:nroot]): + conv = True + break + + not_converged = np.flatnonzero(~conv_vec) + errs_not_converged = res_norms[not_converged] + assert np.max(errs_not_converged) == max_res_norm + srt_errs = np.argsort(errs_not_converged)[::-1] + nexpand = min(max_expand, nroot_current, not_converged.size, full_dim - ntri) + candidates_to_expand = not_converged[srt_errs[:nexpand]] + + # Gather both left and right residues + if not TDA: + all_res = np.empty(shape=(2 * nexpand, full_dim), dtype=np.double) + else: + all_res = np.empty(shape=(nexpand, full_dim), dtype=np.double) + + # preconditioning the residues, equation 29 in doi.org/10.1063/1.477483. + for s in range(nspin): + q_vec = exci[candidates_to_expand, None, None] - ( + mo_energy[s][None, None, nocc[s] :] - mo_energy[s][None, : nocc[s], None] + ) + q_vec = q_vec.reshape(-1, nocc[s] * nvir[s]) + if precond_exact_diag: + q_vec -= apb_diag[s].reshape(-1, nocc[s] * nvir[s]) + all_res[:nexpand, s * dim[0] : s * dim[0] + dim[s]] = ( + right_res[candidates_to_expand, s * dim[0] : s * dim[0] + dim[s]] / q_vec + ) + if not TDA: + all_res[nexpand:, s * dim[0] : s * dim[0] + dim[s]] = ( + left_res[candidates_to_expand, s * dim[0] : s * dim[0] + dim[s]] / q_vec + ) + + # The rows of all_res are now the preconditioned left residues + # followed by the preconditioned right residues. + + # Orthogonalize residues against current trial vectors + all_res -= (all_res @ tri_vec[:ntri, :].T) @ tri_vec[:ntri, :] + # Orthogonalize residues amongst themselves + Q, R, _ = scipy.linalg.qr(all_res.T, mode='economic', pivoting=True) + + # Don't care about the small residues + orth_res = Q.T[np.abs(np.diag(R)) > 1e-10] + # But we should take at least one new vector. + if orth_res.size == 0: + orth_res = Q.T[:1] + + # Make sure the residues are orthogonal to the trial vectors + # and normalize them. + orth_res -= (orth_res @ tri_vec[:ntri, :].T) @ tri_vec[:ntri, :] + orth_res /= np.linalg.norm(orth_res, axis=1)[:, None] + + n_new_vec = min(orth_res.shape[0], full_dim - ntri) + if n_new_vec > 0: + if ntri + n_new_vec > tri_vec.shape[0]: + raise ValueError('Exceeded max_vec. Davidson algorithm for BSE is not converged!') + tri_vec[ntri : ntri + n_new_vec] = orth_res[:n_new_vec] + ntri += n_new_vec + lib.logger.info(bse, 'add %d new trial vectors.', n_new_vec) + else: + # We need to restart. + lib.logger.warn(bse, 'Restarting Davidson algorithm.') + if bse.restart_max_size is None: + nvec_pair_to_save = ntri + else: + nvec_pair_to_save = min(ntri, bse.restart_max_size) + ntri = davidson_restart(Mp, Mm, tri_vec, nvec_pair_to_save=nvec_pair_to_save, e_min=e_min) + conv = False + + iter += 1 + if conv is True: + break + + assert conv is True, 'Davidson algorithm for BSE is not converged!' + + if bse.chkfile is not None: + davidson_save_checkpoint(bse.chkfile, tri_vec, amb_prod, apb_prod, nprod) + lib.logger.info(bse, f'Saving progress at iteration {iter+1} to {bse.chkfile}: {chk_last}->{nprod}.') + chk_last = nprod + + lib.logger.info(bse, f'BSE converged in {iter} iterations, final subspace size = {nprod}') + lib.logger.info(bse, f'total work for contraction: {float(total_contract_work):.2E}') + lib.logger.info(bse, f'total work for linalg: {float(total_linalg_work):.2E}') + lib.logger.info(bse, f'Mp condition number: {np.linalg.cond(Mp_sym)}') + if Mm is not None: + lib.logger.info(bse, f'Mm condition number: {np.linalg.cond(Mm_sym)}') + + found_roots = np.flatnonzero((exci >= e_min) & conv_vec) + nrootfound = found_roots.size + lib.logger.debug(bse, 'Finished with %d converged roots: \n%s', nrootfound, exci[found_roots]) + + # transfer left and right eigenvector to X and Y + + if not TDA: + X_vec = (left_vec[found_roots] + right_vec[found_roots]) * 0.5 + Y_vec = (-left_vec[found_roots] + right_vec[found_roots]) * 0.5 + else: + X_vec = right_vec[found_roots] + Y_vec = np.zeros_like(X_vec) + + # reshape X and Y eigenvector + if nspin == 1: + X_vec = [X_vec.reshape(nrootfound, nocc[0], nvir[0])] + Y_vec = [Y_vec.reshape(nrootfound, nocc[0], nvir[0])] + else: + X_vec_a, X_vec_b, Y_vec_a, Y_vec_b = [], [], [], [] + for r in range(nrootfound): + X_vec_a.append(X_vec[r][: dim[0]].reshape(nocc[0], nvir[0])) + X_vec_b.append(X_vec[r][dim[0] :].reshape(nocc[1], nvir[1])) + Y_vec_a.append(Y_vec[r][: dim[0]].reshape(nocc[0], nvir[0])) + Y_vec_b.append(Y_vec[r][dim[0] :].reshape(nocc[1], nvir[1])) + X_vec = [np.asarray(X_vec_a), np.asarray(X_vec_b)] + Y_vec = [np.asarray(Y_vec_a), np.asarray(Y_vec_b)] + + bse.exci = exci[found_roots] + bse.X_vec = X_vec + bse.Y_vec = Y_vec + + return exci[found_roots], X_vec, Y_vec + + +def update_mp_mm(Mp, Mm, tri_vec, apb_prod, amb_prod, ntri, nprod): + """Update Mp and Mm to reflect the new trial vectors. + + Parameters + ---------- + Mp : ndarray + The matrix + Mm : ndarray or None + The matrix + tri_vec : ndarray + Trial vectors (stored as rows). + apb_prod : ndarray + The vectors (A+B)|tri_vec> (stored as rows). + amb_prod : ndarray or None + The vectors (A-B)|tri_vec> (stored as rows). + ntri : int + Number of valid trial vectors in tri_vec. + nprod : int + Number of valid trial vectors when Mm and Mp were last updated. + + Returns + ------- + (ndarray, ndarray, int) + Mm, Mp, work; where work is a rough estimate of the FLOP count. + """ + full_dim = tri_vec.shape[1] + work = 0 + if Mp is None or Mm is None: + # A+B and A-B in subspace, step 3 in doi.org/10.1063/1.477483 + if apb_prod is not None: + Mp = np.matmul(tri_vec[:ntri, :], apb_prod[:ntri, :].T) + work += ntri**2 * full_dim + + if amb_prod is not None: + Mm = np.matmul(tri_vec[:ntri, :], amb_prod[:ntri, :].T) + work += ntri**2 * full_dim + + else: + if apb_prod is not None: + Mp_new = np.zeros(shape=[ntri, ntri], dtype=np.double) + Mp_new[:nprod, :nprod] = Mp[:nprod, :nprod] + Mp_new[nprod:ntri, :ntri] = tri_vec[nprod:ntri, :] @ apb_prod[:ntri, :].T + Mp_new[:ntri, nprod:ntri] = Mp_new[nprod:ntri, :ntri].T + Mp_new[nprod:ntri, nprod:ntri] = tri_vec[nprod:ntri, :] @ apb_prod[nprod:ntri, :].T + Mp = Mp_new + work += (ntri**2 - nprod**2) * full_dim + + if amb_prod is not None: + Mm_new = np.zeros(shape=[ntri, ntri], dtype=np.double) + Mm_new[:nprod, :nprod] = Mm[:nprod, :nprod] + Mm_new[nprod:ntri, :ntri] = tri_vec[nprod:ntri, :] @ amb_prod[:ntri, :].T + Mm_new[:ntri, nprod:ntri] = Mm_new[nprod:ntri, :ntri].T + Mm_new[nprod:ntri, nprod:ntri] = tri_vec[nprod:ntri, :] @ amb_prod[nprod:ntri, :].T + Mm = Mm_new + work += (ntri**2 - nprod**2) * full_dim + + return Mp, Mm, work + + +def bse_lanczos(bse, multi, u1=None, core_orbs=None, nsteps=100): + """Lanczos algorithm for BSE. + Follows 10.1137/16M1102641. + + Parameters + ---------- + bse : BSE + BSE object. + multi : str + multiplicity, 's'=singlet, 't'=triplet, 'u'=unrestricted. + u1 : np.ndarray, optional + initial state for Lanczos algorithm, by default None + core_orbs : np.ndarray, optional + core orbitals, by default None + nsteps : int, optional + the number of Lanczos steps, by default 100 + + Returns + ------- + alphas : double array + coefficients from the Lanczos algorithm, diagonal elements of the tridiagonal matrix. + betas : double array + coefficients from the Lanczos algorithm, off-diagonal elements of the tridiagonal matrix. + """ + # load matrix + nspin = bse.nspin + nmo = bse.nmo + nocc = bse.nocc + mo_energy = bse.mo_energy + # load parameter + TDA = bse.TDA + + # determine dimension + nvir = [(nmo - nocc[i]) for i in range(nspin)] + dim = [(nocc[i] * nvir[i]) for i in range(nspin)] + full_dim = dim[0] + dim[1] if nspin == 2 else dim[0] + + Lia = [np.ascontiguousarray(bse.Lpq[s][:, : nocc[s], nocc[s] :]) for s in range(nspin)] + Laa = [np.ascontiguousarray(bse.Lpq[s][:, nocc[s] :, nocc[s] :]) for s in range(nspin)] + Lii_bar, Lia_bar = _get_lpq_bar_by_block( + nocc=nocc, mo_energy=mo_energy, Lii=[bse.Lpq[s][:, : nocc[s], : nocc[s]] for s in range(nspin)], Lia=Lia + ) + + prev_vecs = np.zeros((nsteps + 1, full_dim)) + + if core_orbs is not None: + assert u1 is None, 'u1 and core_orbs cannot be used together' + u1 = np.zeros(full_dim) + occ_to_take = [ + np.flatnonzero(mo_mapping.mo_comps(core_orbs, bse.mol, bse.mo_coeff[s]) >= 0.5) for s in range(nspin) + ] + for s in range(nspin): + vir_to_take = np.arange(nocc[s], nmo, dtype=int) + for o in occ_to_take[s]: + u1[s * dim[s] + o * nvir[s] + vir_to_take] = 1.0 + u1 = u1 / np.linalg.norm(u1) + + elif u1 is None: + eia = [] + for s in range(nspin): + eia.append(np.asarray(mo_energy[s][None, nocc[s] :] - mo_energy[s][: nocc[s], None]).reshape(-1)) + eia = np.concatenate(eia, axis=0) + u1 = np.random.random(full_dim) - 0.5 + u1 = u1 / np.linalg.norm(u1) + + apb_u1, _, _ = _bse_contraction( + multi=multi, + nocc=nocc, + mo_energy=mo_energy, + Lia=Lia, + Laa=Laa, + Lii_bar=Lii_bar, + Lia_bar=Lia_bar, + tri_vec=u1[None, :], + TDA=TDA, + ) + + apb_u1 = apb_u1.reshape(-1) + + betas = np.zeros(nsteps) + alphas = np.zeros(nsteps) + + if TDA is False: + u1_apbnorm = np.dot(u1, apb_u1) + u = u1 / np.sqrt(u1_apbnorm) + v = apb_u1 / np.sqrt(u1_apbnorm) + else: + u = u1 / np.linalg.norm(u1) + v = u + + u_last = np.zeros_like(u) + #v_last = np.zeros_like(v) + beta_last = 0.0 + + prev_vecs[0] = v + nprev = 1 + + for step in range(nsteps): + lib.logger.debug(bse, 'BSE Lanczos #%d iteration', step + 1) + if TDA is False: + # x = (A - B) v_j - beta_{j-1} u_{j-1} + _, amb_v, _ = _bse_contraction( + multi=multi, + nocc=nocc, + mo_energy=mo_energy, + Lia=Lia, + Laa=Laa, + Lii_bar=Lii_bar, + Lia_bar=Lia_bar, + tri_vec=v.reshape((1, -1)), + TDA=TDA, + ) + amb_v = amb_v.reshape(-1) + sla.blas.daxpy(u_last, amb_v, a=-beta_last) + x = amb_v + # alpha = v_j^T x + alphas[step] = np.dot(x, v) + # x = x - alpha u_j + sla.blas.daxpy(u, x, a=-alphas[step]) + # y = (A + B) x + y, _, _ = _bse_contraction( + multi=multi, + nocc=nocc, + mo_energy=mo_energy, + Lia=Lia, + Laa=Laa, + Lii_bar=Lii_bar, + Lia_bar=Lia_bar, + tri_vec=x.reshape((1, -1)), + TDA=TDA, + ) + y = y.reshape(-1) + # beta_j = sqrt(x^T y) + betas[step] = np.sqrt(np.dot(x, y)) + u_last = u + # v_last = v + # u_{j+1} = x / beta_j + # v_{j+1} = y / beta_j + sla.blas.dscal(1.0 / betas[step], x) + sla.blas.dscal(1.0 / betas[step], y) + u = x + v = y + else: + # TDA approximation + # v = A u_j - beta_{j-1} u_{j-1} + v, _, _ = _bse_contraction( + multi=multi, + nocc=nocc, + mo_energy=mo_energy, + Lia=Lia, + Laa=Laa, + Lii_bar=Lii_bar, + Lia_bar=Lia_bar, + tri_vec=u.reshape((1, -1)), + TDA=TDA, + ) + v = v.reshape(-1) + sla.blas.daxpy(u_last, v, a=-beta_last) + # alpha_j = u_j^T v + alphas[step] = np.dot(u, v) + # v = v - alpha u_j + sla.blas.daxpy(u, v, a=-alphas[step]) + + # orthogonalize against previous vectors + hs = prev_vecs[:nprev] @ v + v -= prev_vecs[:nprev].T @ hs + + # beta_j = ||v|| + betas[step] = np.linalg.norm(v) + # u_{j+1} = v / beta_j + sla.blas.dscal(1.0 / betas[step], v) + u_last = u + u = v + prev_vecs[nprev] = v + nprev += 1 + beta_last = betas[step] + return alphas, betas + + +def lanczos_roots_magnitudes(alphas, betas, TDA=False): + """Estimate the excitation spectrum density from the results of the Lanczos algorithm. + + Parameters + ---------- + alphas : double array + coefficients from the Lanczos algorithm, diagonal elements of the tridiagonal matrix. + betas : double array + coefficients from the Lanczos algorithm, off-diagonal elements of the tridiagonal matrix. + TDA : bool, optional + used TDA approximation, by default False + + Returns + ------- + roots_pos : double array + positive roots of excitation energies. + magnitudes : double array + the magnitude of each root. + """ + Tk_diag = np.concatenate([alphas, alphas[-2::-1]], axis=0) + Tk_offdiag = np.concatenate([betas, betas[-3::-1]], axis=0) + roots, S = scipy.linalg.eigh_tridiagonal(Tk_diag, Tk_offdiag, lapack_driver='stebz') + roots_pos = roots[roots > 0] + if TDA is False: + roots_pos = np.sqrt(roots_pos) + magnitudes = S[0, roots > 0] ** 2 + + if TDA: + return roots_pos, magnitudes + else: + return roots_pos, magnitudes / roots_pos + + +def lanczos_estimate_spectrum(alphas, betas, e_range, eta, nw, TDA=False): + """Estimate the excitation spectrum density from the results of the Lanczos algorithm. + + Parameters + ---------- + alphas : double array + coefficients from the Lanczos algorithm, diagonal elements of the tridiagonal matrix. + betas : double array + coefficients from the Lanczos algorithm, off-diagonal elements of the tridiagonal matrix. + e_range : tuple + energy range (e_min, e_max). + eta : float + broadening parameter. + nw : int + number of frequency points. + TDA : bool, optional + used TDA approximation, by default False + + Returns + ------- + freqs : double array + frequency points at which to compute density estimate. + density : double array + excitation spectrum density estimate. + """ + roots_pos, magnitudes = lanczos_roots_magnitudes(alphas, betas, TDA=TDA) + + freqs = np.linspace(e_range[0], e_range[1], nw) + + def gauss_broad(omega, eta, roots): + normalization = 1.0 / np.sqrt(2 * np.pi * eta**2) + return normalization * ( + np.exp(-((omega[:, None] - roots[None, :]) ** 2) / (2 * eta**2)) + - np.exp(-((omega[:, None] + roots[None, :]) ** 2) / (2 * eta**2)) + ) + + density = gauss_broad(freqs, eta, roots_pos) @ magnitudes + return freqs, density + + +def get_davidson_trial_vector(bse, ntri, nocc, mo_energy, e_min=0.0, delta=0.0, core_orbs=None): + """Generate initial trial vectors for particle-hole excitations. + The order is determined by the occ-vir pair orbital energy difference. + The initial trial vectors are diagonal. They are generated by taking + occ-vir pairs with an energy difference of >= e_min + delta. + + Parameters + ---------- + bse : BSE + BSE object + ntri : int + number of desired initial trial vectors. + nocc : int array + number of occupied orbitals. + mo_energy : double ndarray + orbital energy. + e_min : float, optional + minimum desired excitation energy, by default 0.0 + delta : float, optional + energy shift for trial vector generation, typically <=0.0, by default 0.0 + core_orbs : optional + core orbitals, by default None + + Returns + ------- + ntri : int + the number of actual trial vectors generated + tri_vec : double ndarray + initial trial vectors + """ + nspin, nmo = mo_energy.shape + nvir = [(nmo - nocc[i]) for i in range(nspin)] + dim = [(nocc[i] * nvir[i]) for i in range(nspin)] + full_dim = dim[0] + dim[1] if nspin == 2 else dim[0] + + if core_orbs is not None: + if not hasattr(bse, 'mol'): + raise ValueError('mol object is required for generating trial vectors for core excitations.') + # Select those occupied orbitals with a significant contribution from given core orbitals. + occ_to_take = [ + np.flatnonzero(mo_mapping.mo_comps(core_orbs, bse.mol, bse.mo_coeff[s]) >= 0.3) for s in range(nspin) + ] + else: + occ_to_take = [np.arange(nocc[s], dtype=int) for s in range(nspin)] + + e_diffs = [] + e_diffs_shp = [] + + for s in range(nspin): + # The shape of e_diffs_s is (nocc[s], nvir[s]) + # e_diffs_s[i, a] = mo_energy[s][a] - mo_energy[s][i] + e_diffs_s = mo_energy[s][None, nocc[s] :] - mo_energy[s][occ_to_take[s], None] + e_diffs_shp.append(e_diffs_s.shape) + # Flatten e_diffs[s] into a 1D array. + e_diffs_s = e_diffs_s.reshape(-1) + e_diffs.append(e_diffs_s) + + # At this point, the structure of e_diffs is as follows: + # e_diffs[spin, ia] = mo_energy[spin][a] - mo_energy[spin][i] + # where ia = a + nvir[spin] * i + + # Glue the e_diffs together into a 1D array. + all_ediffs = np.concatenate(e_diffs, axis=0) + + # Compute the sizes of the occ-vir blocks for each spin. + e_diffs_sizes = [0] + [nocc[s] * nvir[s] for s in range(nspin)] + # Compute the starting index of each spin's occ-vir block. + # This indicates where e_diffs[s] resides in all_ediffs, for each s. + e_diffs_starts = np.cumsum(e_diffs_sizes) + + # Find the indices which sort all_ediffs. + sort_index = np.argsort(all_ediffs) + + # Take the lowest ntri pairs with energy difference greater than e_min + delta. + e_min_index = np.searchsorted(all_ediffs, e_min + delta, side='left', sorter=sort_index) + if e_min_index + ntri > all_ediffs.size: + # cannot find enough pairs for trial vectors; lower e_min + ntri = all_ediffs.size - e_min_index + exci_to_take = sort_index[e_min_index : e_min_index + ntri] + + # exci_to_take is an index into all_ediffs. + # We need to convert it back to orbital indices. + + tri_vec = np.zeros(shape=[ntri, full_dim], dtype=np.double) + + cur_trivec = 0 + for s in range(nspin): + # Figure out which excitation indices are in this spin block. + exci_this_spin = np.extract( + (exci_to_take >= e_diffs_starts[s]) & (exci_to_take < e_diffs_starts[s + 1]), exci_to_take + ) + # Subtract the starting index of this spin's occ-vir block. + # They are now in the form ia = i * nvir[s] + a. + # That is, they are indices into e_diffs[s].reshape(-1). + exci_this_spin -= e_diffs_starts[s] + # Convert the indices from 1D form (i * nvir[s] + a) to 2D form (i, a). + ex_occ, ex_vir = np.unravel_index(exci_this_spin, e_diffs_shp[s]) + ex_occ = occ_to_take[s][ex_occ] + n_exci = exci_this_spin.size + + # The following is shorthand for + # for i, a in zip(ex_occ, ex_vir): + # tri_vec[cur_trivec, s * dim[s] + i * nvir[s] + a] = 1. + # cur_trivec += 1 + tri_vec[range(cur_trivec, cur_trivec + n_exci), s * dim[s] + ex_occ * nvir[s] + ex_vir] = 1.0 + cur_trivec += n_exci + + return ntri, tri_vec + + +def get_davidson_trial_vector_diag( + ntri, multi, nocc, mo_energy, Lpq, nocc_sub=50, nvir_sub=150, e_min=0.0, delta=0.0, TDA=False +): + """Get trial vectors from subspace diagnoalization. + + Parameters + ---------- + ntri : int + number of trial vectors + multi : str + multiplicity + nocc : list + number of occupied orbitals + mo_energy : ndarray + orbital energy + Lpq : ndarray + three-center density-fitting matrix + nocc_sub : int, optional + number of subspace occupied orbitals, by default 50 + nvir_sub : int, optional + number of subspace virtual orbitals, by default 150 + e_min : float, optional + minimum desired excitation energy, by default 0.0 + delta : float, optional + energy shift for trial vector generation, typically <=0.0, by default 0.0 + TDA : bool, optional + use Tamm-Dancoff approximation, by default False + + Returns + ------- + ntri : int + the number of actual trial vectors generated + tri_vec : double ndarray + initial trial vectors + """ + nspin, nmo = mo_energy.shape + nvir = [(nmo - nocc[i]) for i in range(nspin)] + dim = [(nocc[i] * nvir[i]) for i in range(nspin)] + + # adjust active space if necessary + nocc_sub = int(min(nocc[0], nocc_sub)) + nvir_sub = int(min(nvir[0], nvir_sub)) + + if nspin == 1: + nocc_sub = [nocc_sub] + nvir_sub = [nvir_sub] + else: + # numbers of beta orbitals are determined by alpha + spin = nocc[0] - nocc[1] + nocc_sub = [nocc_sub, nocc_sub - spin] + nvir_sub = [nvir_sub, nvir_sub + spin] + + # get active-space BSE input + start = [(nocc[s] - nocc_sub[s]) for s in range(nspin)] + end = [(nocc[s] + nvir_sub[s]) for s in range(nspin)] + mo_energy_sub = np.asarray([mo_energy[s, start[s] : end[s]] for s in range(nspin)]) + Lpq_sub = np.asarray([Lpq[s, :, start[s] : end[s], start[s] : end[s]] for s in range(nspin)]) + + exci, X_vec, Y_vec = bse_full_diagonalization( + multi=multi, nocc=nocc_sub, mo_energy=mo_energy_sub, Lpq=Lpq_sub, TDA=TDA + ) + + for i in range(len(exci)): + if exci[i] > (e_min + delta): + first_state = i + break + + ntri = min(ntri, len(exci) - first_state) + tri_vec = [] + for s in range(nspin): + tri_vec.append(np.zeros(shape=[ntri, nocc[s], nvir[s]], dtype=np.double)) + X_vec_tri = X_vec[s][first_state : first_state + ntri].reshape(ntri, nocc_sub[s], nvir_sub[s]) + tri_vec[s][:, nocc[s] - nocc_sub[s] :, :nvir_sub[s]] = X_vec_tri + tri_vec[s] = tri_vec[s].reshape(ntri, dim[s]) + tri_vec = np.concatenate(tri_vec, axis=1) + + return ntri, tri_vec + + +def _bse_contraction(multi, nocc, mo_energy, Lia, Laa, Lii_bar, Lia_bar, tri_vec, TDA=False): + """Contraction for BSE matrix and trial vectors. + W part is as equation 25 and 26 in doi.org/10.1002/jcc.24688. + + Parameters + ---------- + multi : str + multiplicity, 's'=singlet, 't'=triplet, 'u'=unrestricted. + nocc : int array + the number of occupied orbitals. + mo_energy : double ndarray + orbital energy. + Lia : double ndarray + 3-center density-fitting matrix, ov block. + Laa : double ndarray + 3-center density-fitting matrix, vv block. + Lii_bar : double ndarray + auxiliary 3-center matrix as equation 21 in doi.org/10.1002/jcc.24688. + Lia_bar : double ndarray + auxiliary 3-center matrix as equation 21 in doi.org/10.1002/jcc.24688. + tri_vec : double ndarray + trial vector. + TDA : bool, optional + use TDA approximation, by default False + + Returns + ------- + apb_prod : double ndarray + A+B matrix and trial vector contracted vectors. + amb_prod : double ndarray + A-B matrix and trial vector contracted vectors. + """ + nspin = len(Lia) + naux, _, _ = Lia[0].shape + nmo = Lia[0].shape[2] + Lia[0].shape[1] + ntri = tri_vec.shape[0] + + nvir = [(nmo - nocc[i]) for i in range(nspin)] + dim = [(nocc[i] * nvir[i]) for i in range(nspin)] + full_dim = dim[0] + dim[1] if nspin == 2 else dim[0] + + work_done = 0 + + scale = 4.0 / nspin + if TDA is True: + scale /= 2.0 + + apb_prod = np.zeros(shape=[ntri, full_dim], dtype=np.double) + if TDA: + amb_prod = None + else: + amb_prod = np.zeros(shape=[ntri, full_dim], dtype=np.double) + + # contraction: V + if multi != 't' and multi != 'T': + Lpq_z = np.empty(shape=[nspin, naux], dtype=np.double) + for ivec in range(ntri): + for s in range(nspin): + z = tri_vec[ivec][s * dim[0] : s * dim[0] + dim[s]].reshape(nocc[s], nvir[s]) + # The following code is exactly equivalent to + # Lpq_z[s] = einsum('Pjb,jb->P', Lia[s], z) + scipy.linalg.blas.dgemv( + alpha=1.0, + a=Lia[s].reshape(naux, -1).T, + x=z.reshape(-1), + y=Lpq_z[s], + overwrite_y=True, + trans=1, + ) + work_done += naux * nvir[s] * nocc[s] + + for s in range(nspin): + for t in range(nspin): + # vz = einsum('Pia,P->ia', Lia[s], Lpq_z[t]).reshape(-1) * scale + # apb_prod[ivec][s * dim[0] : s * dim[0] + dim[s]] += vz + scipy.linalg.blas.dgemv( + alpha=scale, + a=Lia[s].reshape(naux, -1).T, + x=Lpq_z[t], + beta=1.0, + y=apb_prod[ivec][s * dim[0] : s * dim[0] + dim[s]], + overwrite_y=True, + trans=0, + ) + work_done += naux * nvir[s] * nocc[s] + # No need to compute this for TDA + # if TDA is True and return_amb: + # amb_prod[ivec][s * dim[0]: s * dim[0] + dim[s]] += vz + + # contraction: W + for s in range(nspin): + jLa_zs = np.zeros(shape=[nocc[s], naux * nvir[s]], dtype=np.double) + waz = np.zeros((nocc[s], nvir[s]), dtype=np.double) + if not TDA: + jLi_zs = np.zeros(shape=[nocc[s], naux * nocc[s]], dtype=np.double) + wbz = np.zeros((nocc[s], nvir[s]), dtype=np.double) + for ivec in range(ntri): + z = tri_vec[ivec][s * dim[0] : s * dim[0] + dim[s]].reshape(nocc[s], nvir[s]) + # The following calculation for waz is equivalent to + # jLa_zs = einsum('Lab,jb->jLa', Laa[s], z) + # waz = -einsum('jLi,jLa->ia', Lii_bar[s], jLa_zs).reshape(-1) + np.matmul(z, Laa[s].reshape(-1, nvir[s]).T, out=jLa_zs) + scipy.linalg.blas.dgemm( + alpha=-1.0, + a=jLa_zs.reshape(nocc[s] * naux, nvir[s]).T, + b=Lii_bar[s].reshape(nocc[s] * naux, nocc[s]).T, + trans_a=0, + trans_b=1, + c=waz.T, + overwrite_c=True, + ) + work_done += naux * nocc[s] * nocc[s] * nvir[s] + + if not TDA: + # the following calculation for wbz is equivalent to + # jLi_zs = einsum('Lib,jb->Lij', Lia[s], z) + # wbz = -einsum('Lja,jLi->ia', Lia_bar[s], jLi_zs).reshape(-1) + np.matmul(z, Lia[s].reshape(-1, nvir[s]).T, out=jLi_zs) + scipy.linalg.blas.dgemm( + alpha=-1.0, + a=Lia_bar[s].reshape(nocc[s] * naux, nvir[s]).T, + b=jLi_zs.reshape(nocc[s] * naux, nocc[s]).T, + trans_a=0, + trans_b=1, + beta=0.0, + c=wbz.T, + overwrite_c=True, + ) + work_done += naux * nocc[s] * nocc[s] * nvir[s] + if not TDA: + apb_prod[ivec][s * dim[0] : s * dim[0] + dim[s]] += (waz + wbz).ravel() + amb_prod[ivec][s * dim[0] : s * dim[0] + dim[s]] += (waz - wbz).ravel() + else: + apb_prod[ivec][s * dim[0] : s * dim[0] + dim[s]] += waz.ravel() + + # contraction: orbital energy difference + for s in range(nspin): + orb_diff = np.asarray(mo_energy[s][None, nocc[s] :] - mo_energy[s][: nocc[s], None]).reshape(-1) + for ivec in range(ntri): + oz = orb_diff * tri_vec[ivec][s * dim[0] : s * dim[0] + dim[s]] + apb_prod[ivec][s * dim[0] : s * dim[0] + dim[s]] += oz + if not TDA: + amb_prod[ivec][s * dim[0] : s * dim[0] + dim[s]] += oz + work_done += 2 * oz.size + + return apb_prod, amb_prod, work_done + + +def _get_lpq_bar(nocc, mo_energy, Lpq): + """Calculate the auxiliary 3-center matrix. + Lpq_bar = (epsilon)^-1 * Lpq + Equation 11 in doi.org/10.1002/jcc.24688. + + Parameters + ---------- + nocc : int array + the number of occupied orbitals + mo_energy : double ndarray + orbital energy + Lpq : double ndarray + 3-center density-fitting matrix + + Returns + ------- + Lpq_bar : double ndarray + auxiliary three-center matrix + """ + nspin, naux, _, _ = Lpq.shape + + # calculate the response function in the auxiliary basis + X = np.zeros(shape=[naux, naux], dtype=np.double) + for i in range(nspin): + orb_diff = mo_energy[i][: nocc[i], None] - mo_energy[i][None, nocc[i] :] + orb_diff = 1.0 / orb_diff + X += 2.0 * einsum('Pia,ia,Qia->PQ', Lpq[i][:, : nocc[i], nocc[i] :], orb_diff, Lpq[i][:, : nocc[i], nocc[i] :]) + if nspin == 1: + X *= 2.0 + + # calculate the inverse dielectric function + InvD = np.linalg.inv((np.eye(naux) - X)) + + # calculate the auxiliary matrix + Lpq_bar = einsum('PQ,sQmn->sPmn', InvD, Lpq) + + return Lpq_bar + + +def _get_lpq_bar_by_block(nocc, mo_energy, Lii, Lia): + """Calculate the auxiliary 3-center matrix. + Lpq_bar = (epsilon)^-1 * Lpq + Equation 11 in doi.org/10.1002/jcc.24688. + + Parameters + ---------- + nocc : int array + numbers of occupied orbitals + mo_energy : double ndarray + orbital energy + Lii : double ndarray + 3-center density-fitting matrix + Lia : double ndarray + 3-center density-fitting matrix + + Returns + ------- + Lii_bar : double ndarray + auxiliary three-center matrix + Lia_bar : double ndarray + auxiliary three-center matrix + """ + nspin = len(Lia) + naux, _, _ = Lia[0].shape + nvir = [Lia_s.shape[2] for Lia_s in Lia] + + # calculate the response function in the auxiliary basis + X = np.zeros(shape=[naux, naux], dtype=np.double) + for i in range(nspin): + orb_diff = mo_energy[i][: nocc[i], None] - mo_energy[i][None, nocc[i] :] + orb_diff = 1.0 / orb_diff + Pia = Lia[i] * (orb_diff * 2.0) + + # This line computes Pi = einsum('Pia, Qia -> PQ', Pia, Lia) + X += Pia.reshape(naux, -1) @ Lia[i].reshape(naux, -1).T + # X += 2.0 * einsum('Pia,ia,Qia->PQ', Lia[i], orb_diff, Lia[i]) + if nspin == 1: + X *= 2.0 + + # calculate the inverse dielectric function + InvD = np.linalg.inv((np.eye(naux) - X)) + + Lia_bar = [] + Lii_bar = [] + + # calculate the auxiliary matrix + # Lpq_bar = einsum('PQ,sQmn->sPmn', InvD, Lpq) + for i in range(nspin): + Lia_bar.append(np.matmul(InvD, Lia[i].reshape(naux, -1)).reshape(naux, nocc[i], nvir[i])) + + Lii_bar.append(np.matmul(InvD, Lii[i].reshape(naux, -1)).reshape(naux, nocc[i], nocc[i])) + + # _bse_contraction reshapes these tensors assuming occupied-major layout. + Lii_bar = [np.ascontiguousarray(Lii_bar[s].transpose(1, 0, 2)) for s in range(nspin)] + Lia_bar = [np.ascontiguousarray(Lia_bar[s].transpose(1, 0, 2)) for s in range(nspin)] + + return Lii_bar, Lia_bar + + +def _get_oscillator_strength(multi, exci, X_vec, Y_vec, mo_coeff, nocc, mol): + """Get transition dipoles and oscillator strengths. + + Parameters + ---------- + multi : str + multiplicity. "s"=singlet, "t"=triplet, "u"=unrestricted. + exci : double array + excitation energy. + X_vec : double ndarray + X block of eigenvector (excitation). + Y_vec : double ndarray + Y block of eigenvector (de-excitation). + mo_coeff : double ndarray + coefficient from AO to MO. + nocc : int array + number of occupied orbitals. + mol : pyscf.gto.mole.Mole + Mole object for generating dipole matrix. + + Returns + ------- + dipole : double ndarray + transition dipoles of all excitations. + oscillator_strength : double array + oscillator strengths of all excitations. + """ + nspin, _, _ = mo_coeff.shape + nroot = X_vec[0].shape[0] + + dipole = np.zeros(shape=[3, nroot], dtype=np.double, order='F') + oscillator_strength = np.zeros(shape=[nroot], dtype=np.double) + + # BSE is blind to triplet oscillator strength + if multi == 't': + return dipole, oscillator_strength + + with mol.with_common_orig((0, 0, 0)): + ao_dip = mol.intor_symmetric('int1e_r', comp=3) + + # Transform AO dipole integrals to MO basis + mo_dip = [mo_coeff[s][:, : nocc[s]].T @ ao_dip @ mo_coeff[s][:, nocc[s] :] for s in range(nspin)] + + for j in range(nroot): + for s in range(nspin): + dipole[:, j] += np.einsum('ia,xia->x', X_vec[s][j], mo_dip[s]) + np.einsum( + 'ia,xia->x', Y_vec[s][j], mo_dip[s] + ) + + if nspin == 1: + dipole *= np.sqrt(2) + + oscillator_strength = (2 / 3) * exci * np.sum(dipole**2, axis=0) + + return dipole, oscillator_strength + + +def _get_spin_square(nocc, X_vec, Y_vec, mo_coeff, ovlp): + """Get expectation value. + + Parameters + ---------- + nocc : int array + number of occupied orbitals. + X_vec : double ndarray + X block of eigenvector (excitation). + Y_vec : double ndarray + Y block of eigenvector (de-excitation). + mo_coeff : double ndarray + coefficient from AO to MO. + ovlp : double ndarray + overlap matrix. + + Returns + ------- + s2 : double array + expectation value of excitations. + """ + nroot = X_vec[0].shape[0] + ab_ovlp = mo_coeff[0].T @ ovlp @ mo_coeff[1] + s2 = np.zeros(shape=[nroot], dtype=np.double) + s2[:] = nocc[0] - (nocc[0] - nocc[1]) / 2.0 + ((nocc[0] - nocc[1]) / 2.0) ** 2 + for iroot in range(nroot): + # alpha excitation ket + # a alpha and j beta exchange: alpha excitation bra + s2[iroot] -= einsum( + 'ia,ib,aj,bj->', + X_vec[0][iroot] + Y_vec[0][iroot], + X_vec[0][iroot] - Y_vec[0][iroot], + ab_ovlp[nocc[0] :, : nocc[1]], + ab_ovlp[nocc[0] :, : nocc[1]], + ) + # a alpha and j beta exchange: beta excitation bra + s2[iroot] -= einsum( + 'ia,jb,ij,ab->', + X_vec[0][iroot] + Y_vec[0][iroot], + X_vec[1][iroot] - Y_vec[1][iroot], + ab_ovlp[: nocc[0], : nocc[1]], + ab_ovlp[nocc[0] :, nocc[1] :], + ) + # i alpha and j beta exchange: same alpha excitation bra + s2[iroot] -= einsum( + 'ia,ia,jk->', + X_vec[0][iroot] + Y_vec[0][iroot], + X_vec[0][iroot] - Y_vec[0][iroot], + ab_ovlp[: nocc[0], : nocc[1]] ** 2, + ) + s2[iroot] += einsum( + 'ia,ia,ik->', + X_vec[0][iroot] + Y_vec[0][iroot], + X_vec[0][iroot] - Y_vec[0][iroot], + ab_ovlp[: nocc[0], : nocc[1]] ** 2, + ) + # beta excitation ket + # i alpha and b beta exchange: beta excitation bra + s2[iroot] -= einsum( + 'ia,ib,ja,jb->', + X_vec[1][iroot] + Y_vec[1][iroot], + X_vec[1][iroot] - Y_vec[1][iroot], + ab_ovlp[: nocc[0], nocc[1] :], + ab_ovlp[: nocc[0], nocc[1] :], + ) + # i alpha and b beta exchange: alpha excitation bra + s2[iroot] -= einsum( + 'ia,jb,ji,ba->', + X_vec[1][iroot] + Y_vec[1][iroot], + X_vec[0][iroot] - Y_vec[0][iroot], + ab_ovlp[: nocc[0], : nocc[1]], + ab_ovlp[nocc[0] :, nocc[1] :], + ) + # i alpha and j beta exchange: same alpha excitation bra + s2[iroot] -= einsum( + 'ia,ia,jk->', + X_vec[1][iroot] + Y_vec[1][iroot], + X_vec[1][iroot] - Y_vec[1][iroot], + ab_ovlp[: nocc[0], : nocc[1]] ** 2, + ) + s2[iroot] += einsum( + 'ia,ia,ji->', + X_vec[1][iroot] + Y_vec[1][iroot], + X_vec[1][iroot] - Y_vec[1][iroot], + ab_ovlp[: nocc[0], : nocc[1]] ** 2, + ) + + return s2 + + +class BSE(lib.StreamObject): + def __init__(self, gw): + """Initialize BSE object. + The BSE object can be initialized by a restricted or unrestricted mol/Gamma GW object. + + Parameters + ---------- + gw : GWAC/UGWAC, optional + GW object, by default None + """ + self.verbose = gw.verbose # verbose level + self.nspin = 1 if np.asarray(gw.mo_energy).ndim == 1 else 2 # 1 for restricted, 2 for unrestricted + self.mol = gw.mol # mol object + self.mf = gw._scf # mean-field object + self.nocc = np.asarray(gw.nocc) # number of occupied orbitals + if self.nocc.ndim == 0: + self.nocc = self.nocc[np.newaxis, ...] + self.mo_energy = np.asarray(gw.mo_energy) # orbital energy + if self.mo_energy.ndim == 1: + self.mo_energy = self.mo_energy[np.newaxis, ...] + self.mo_coeff = gw.mo_coeff # orbital coefficient from AO to MO + if self.mo_coeff.ndim == 2: + self.mo_coeff = self.mo_coeff[np.newaxis, ...] + self.nmo = self.mo_energy.shape[-1] # number of molecular orbitals + # initialize density-fitting matrix + if self.nspin == 2 and isinstance(gw.nmo, int): + gw.nmo = [gw.nmo, gw.nmo] + self.Lpq = gw.Lpq if hasattr(gw, 'Lpq') else None # three-center density-fitting matrix in MO + if self.Lpq is None: + self.Lpq = np.asarray(np.asarray(gw.ao2mo(gw.mo_coeff))) + if self.Lpq.ndim == 3: + self.Lpq = self.Lpq[np.newaxis, ...] + + # options + self.TDA = False # use TDA approximation to ignore B matrix + self.delete_lpq = False # delete Lpq after calculation + self.chkfile = None # checkpoint file + self.chk_every = 10 # checkpoint frequency + + # Davidson algorithm + self.multi = None # multiplicity + self.nroot = 10 # the number of desired roots + self.trial = 'identity' # mode to initialize trial vector + self.nocc_sub = 50 # number of occpuied orbitals in the trial vector subspace + self.nvir_sub = 150 # number of virtual orbitals in the trial vector subspace + self.max_vec = 12 * self.nroot # max allowed subspace size + self.max_iter = 100 # max Davidson iteration + # max number of trial vectors to expand per iteration + self.max_expand = min(100, self.nroot) + self.residue_thresh = 1e-8 # threshold if the residue needs to be added as a new trial vector + self.init_ntri = min(100, self.nroot) + self.restart_max_size = None # max number of trial vectors to keep during a restart + + # results + self.exci = None # excitation energy + self.X_vec = None # X block of eigenvector (excitation) + self.Y_vec = None # Y block of eigenvector (de-excitation) + return + + def dump_flags(self): + """Dump BSE flags.""" + log = lib.logger.Logger(self.stdout, self.verbose) + log.info('') + log.info('******** %s ********', self.__class__) + nvir = [(self.nmo - self.nocc[i]) for i in range(self.nspin)] + dim = [(self.nocc[i] * nvir[i]) for i in range(self.nspin)] + log.info('multiplicity = %s', self.multi) + log.info('nmo = %s', self.nmo) + log.info('nocc = %s', self.nocc[0] if self.nspin == 1 else self.nocc) + log.info('nvir = %s', nvir[0] if self.nspin == 1 else nvir) + log.info('occ-vir dimension = %s', dim[0] if self.nspin == 1 else dim) + if self.nspin == 2: + log.info('BSE full dimension = %s', dim[0] + dim[1]) + log.info('Tamm-Dancoff approximation = %s', self.TDA) + log.info('number of roots = %d', self.nroot) + log.info('trial vector = %s', self.trial) + if self.trial == 'subspace': + log.info('subspace nocc = %d nvir = %d', self.nocc_sub, self.nvir_sub) + log.info('max subspace size = %d', self.max_vec) + log.info('max iteration = %s', self.max_iter) + log.info('convergence tolerance = %s', self.residue_thresh) + log.info('') + return + + def check_memory(self): + """Check memory needed for the BSE calculation.""" + nvir = [(self.nmo - self.nocc[i]) for i in range(self.nspin)] + dim = [(self.nocc[i] * nvir[i]) for i in range(self.nspin)] + full_dim = dim[0] + dim[1] if self.nspin == 2 else dim[0] + naux = self.Lpq.shape[1] + + # Lpq and Lpq_bar; trial vector, A+B/A-B matrix with trial vector product + mem = (naux * self.nmo * self.nmo * 2 + self.max_vec * full_dim * 3) * 8 + lib.logger.info(self, 'BSE needs at least %.1f GB memory.', mem / 1.0e9) + + return + + def kernel(self, multi, e_min=0.0, delta=0.0, **kwargs): + """Davidson algorithm for BSE. + + Parameters + ---------- + multi : str + multiplicity. "s"=singlet, "t"=triplet, "u"=unrestricted. + e_min : float, optional + minimum excitation energy, by default 0.0 + delta : float, optional + energy shift for trial vector generation, typically <=0.0, by default 0.0 + + Returns + ------- + exci : double array + excitation energy. + X_vec : list + X block of eigenvector (excitation). + Y_vec : list + Y block of eigenvector (de-excitation). + """ + # check spin and multiplicity + assert isinstance(multi, str) + multi = multi[0].lower() + assert (self.nspin == 1 and (multi == 's' or multi == 't')) or (self.nspin == 2 and multi == 'u') + self.multi = multi + + cput0 = (time.process_time(), time.perf_counter()) + self.dump_flags() + self.check_memory() + self.exci, self.X_vec, self.Y_vec = bse_davidson(bse=self, multi=multi, e_min=e_min, delta=delta, **kwargs) + lib.logger.timer(self, 'BSE', *cput0) + return self.exci, self.X_vec, self.Y_vec + + def full_diagonalization(self, multi): + """Full diagonalization. + + Parameters + ---------- + multi : str + multiplicity. "s"=singlet, "t"=triplet, "u"=unrestricted. + + Returns + ------- + exci : double array + excitation energy. + X_vec : list + X block of eigenvector (excitation). + Y_vec : list + Y block of eigenvector (de-excitation). + """ + cput0 = (time.process_time(), time.perf_counter()) + lib.logger.info(self, '\nBSE full diagonalization: %s', multi) + self.multi = multi + + # set nroot as full dimension for analysis + nvir = [(self.nmo - self.nocc[i]) for i in range(self.nspin)] + dim = [(self.nocc[i] * nvir[i]) for i in range(self.nspin)] + self.nroot = dim[0] + dim[1] if self.nspin == 2 else dim[0] + + # A+B, A-B, X+Y, X-Y + mem = (self.nroot * self.nroot * 4) * 8 + lib.logger.info(self, 'BSE needs at least %.1f GB memory.', mem / 1.0e9) + + self.exci, self.X_vec, self.Y_vec = bse_full_diagonalization( + multi=multi, nocc=self.nocc, mo_energy=self.mo_energy, Lpq=self.Lpq, TDA=self.TDA + ) + lib.logger.timer(self, 'BSE full diagonalization', *cput0) + return self.exci, self.X_vec, self.Y_vec + + def analyze(self, thresh=0.1, oscillator=True, s2=True, e_min=0.0): + """Analyze excitations. + + Parameters + ---------- + thresh : float, optional + threshold to print dominant component, by default 0.1 + oscillator : bool, optional + calculate oscillator strength, by default True + s2 : bool, optional + calculate expectation value, by default True + e_min : float, optional + minimum excitation energy to analyze, by default 0.0 + """ + multi = self.multi + nspin = self.nspin + nmo = self.nmo + nocc = self.nocc + + emin_index = np.searchsorted(self.exci, e_min, side='left') + exci = self.exci[emin_index:] + + X_vec = [X_vec_s[emin_index:] for X_vec_s in self.X_vec] + Y_vec = [Y_vec_s[emin_index:] for Y_vec_s in self.Y_vec] + nvir = [(nmo - nocc[i]) for i in range(nspin)] + + if oscillator is True: + dipole, oscillator_strength = _get_oscillator_strength( + multi=multi, exci=exci, X_vec=X_vec, Y_vec=Y_vec, mo_coeff=self.mo_coeff, nocc=nocc, mol=self.mol + ) + + if s2 is True and nspin == 2: + s2 = _get_spin_square(nocc=nocc, X_vec=X_vec, Y_vec=Y_vec, mo_coeff=self.mo_coeff, ovlp=self.mf.get_ovlp()) + + lib.logger.info(self, '-' * 55) + if multi == 's': + lib.logger.info(self, 'restricted singlet BSE') + elif multi == 't': + lib.logger.info(self, 'restricted triplet BSE') + elif multi == 'u': + lib.logger.info(self, 'unrestricted BSE') + for r in range(exci.size): + lib.logger.info(self, '-' * 55) + lib.logger.info(self, 'excited state: %-d' % (r + 1)) + lib.logger.info(self, 'excitation energy: %15.8f AU %15.8f eV' % (exci[r], exci[r] * HARTREE2EV)) + if multi == 's': + if oscillator is True: + lib.logger.info(self, 'spin allowed, oscillator strength: %15.8f AU' % oscillator_strength[r]) + lib.logger.info( + self, + 'transition dipole: x = %15.6f , y = %15.6f , z = %15.6f' + % (dipole[0][r], dipole[1][r], dipole[2][r]), + ) + elif multi == 't': + if oscillator is True: + lib.logger.info(self, 'spin forbidden, oscillator strength and transition dipoles are not defined') + elif multi == 'u': + if s2 is True: + lib.logger.info(self, ' = %.6f' % s2[r]) + if oscillator is True: + lib.logger.info(self, 'oscillator strength: %15.8f AU' % oscillator_strength[r]) + lib.logger.info( + self, + 'transition dipole: x = %15.6f , y = %15.6f , z = %15.6f' + % (dipole[0][r], dipole[1][r], dipole[2][r]), + ) + + lib.logger.info(self, 'dominant component') + if nspin == 1: + for i in range(nocc[0]): + for a in range(nvir[0]): + if abs(X_vec[0][r][i][a]) > thresh: + lib.logger.info( + self, '%5d -> %5d, %15.8f, %s' % (i + 1, a + nocc[0] + 1, float(X_vec[0][r][i][a]), 'X') + ) + if abs(Y_vec[0][r][i][a]) > thresh: + lib.logger.info( + self, '%5d -> %5d, %15.8f, %s' % (i + 1, a + nocc[0] + 1, float(Y_vec[0][r][i][a]), 'Y') + ) + else: + for s in range(nspin): + for i in range(nocc[s]): + for a in range(nvir[s]): + if abs(X_vec[s][r][i][a]) > thresh: + lib.logger.info( + self, + '%5d -> %5d, spin %d, %15.8f, %s' + % (i + 1, a + nocc[s] + 1, s, float(X_vec[s][r][i][a]), 'X'), + ) + if abs(Y_vec[s][r][i][a]) > thresh: + lib.logger.info( + self, + '%5d -> %5d, spin %d, %15.8f, %s' + % (i + 1, a + nocc[s] + 1, s, float(Y_vec[s][r][i][a]), 'Y'), + ) + return + + def get_oscillator_strength(self): + """Get transition dipoles and oscillator strengths. + + Returns + ------- + dipole : double array + transition dipoles. + oscillator_strength : double array + oscillator strengths. + """ + assert self.exci is not None and self.X_vec is not None and self.Y_vec is not None + assert self.mo_coeff is not None and self.mol is not None + dipole, oscillator_strength = _get_oscillator_strength( + multi=self.multi, + exci=self.exci, + X_vec=self.X_vec, + Y_vec=self.Y_vec, + mo_coeff=self.mo_coeff, + nocc=self.nocc, + mol=self.mol, + ) + + return dipole, oscillator_strength diff --git a/pyscf/gw/test/test_bse.py b/pyscf/gw/test/test_bse.py new file mode 100644 index 0000000000..1b63806f14 --- /dev/null +++ b/pyscf/gw/test/test_bse.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +import pytest +from pyscf import dft, gto +from pyscf.gw.bse import BSE +from pyscf.gw.gw_ac import GWAC +from pyscf.gw.ugw_ac import UGWAC + + +@pytest.fixture(scope='module') +def h2o_pbe_gw(): + mol = gto.Mole() + mol.verbose = 0 + mol.atom = [[8, (0.0, 0.0, 0.0)], [1, (0.7571, 0.0, 0.5861)], [1, (-0.7571, 0.0, 0.5861)]] + mol.basis = 'def2-svp' + mol.build() + + mf = dft.RKS(mol) + mf.xc = 'pbe' + mf.kernel() + + gw = GWAC(mf) + gw.kernel() + return gw + + +@pytest.fixture(scope='module') +def h2o_cation_pbe_ugw(): + mol = gto.Mole() + mol.verbose = 0 + mol.atom = [[8, (0.0, 0.0, 0.0)], [1, (0.7571, 0.0, 0.5861)], [1, (-0.7571, 0.0, 0.5861)]] + mol.charge = 1 + mol.spin = 1 + mol.basis = 'def2-svp' + mol.build() + + mf = dft.UKS(mol) + mf.xc = 'pbe' + mf.kernel() + + gw = UGWAC(mf) + gw.kernel() + return gw + + +def test_bse_singlet(h2o_pbe_gw): + bse = BSE(h2o_pbe_gw) + exci = bse.kernel('s')[0] + assert exci[0] == pytest.approx(0.25749397, abs=1e-5) + + +def test_bse_triplet(h2o_pbe_gw): + bse = BSE(h2o_pbe_gw) + exci = bse.kernel('t')[0] + assert exci[0] == pytest.approx(0.22299263, abs=1e-5) + + +def test_bse_unrestricted(h2o_cation_pbe_ugw): + bse = BSE(h2o_cation_pbe_ugw) + exci = bse.kernel('u')[0] + assert exci[0] == pytest.approx(0.02114003, abs=1e-5) + + +def test_bse_energy_specific_singlet(h2o_pbe_gw): + bse = BSE(h2o_pbe_gw) + exci = bse.kernel('s', e_min=0.4)[0] + assert exci[0] == pytest.approx(0.42691789, abs=1e-5) + + +def test_bse_energy_specific_triplet(h2o_pbe_gw): + bse = BSE(h2o_pbe_gw) + exci = bse.kernel('t', e_min=0.4)[0] + assert exci[0] == pytest.approx(0.45195324, abs=1e-5) diff --git a/pyscf/gw/test/test_gw.py b/pyscf/gw/test/test_gw.py index 5b3a0e92be..4b53ddf751 100644 --- a/pyscf/gw/test/test_gw.py +++ b/pyscf/gw/test/test_gw.py @@ -48,8 +48,8 @@ def test_gwac_pade_frozen(self): gw_obj.ac = 'pade' gw_obj.orbs = range(nocc-3, nocc+3) gw_obj.kernel() - self.assertAlmostEqual(gw_obj.mo_energy[nocc-1], -0.4129411145067107, 8) - self.assertAlmostEqual(gw_obj.mo_energy[nocc], 0.16568737755110896, 8) + self.assertAlmostEqual(gw_obj.mo_energy[nocc-1], -0.4129411145067107, 7) + self.assertAlmostEqual(gw_obj.mo_energy[nocc], 0.16568737755110896, 7) gw_obj = gw.GW(mf, freq_int='ac') gw_obj.frozen = np.array([0]) @@ -57,8 +57,8 @@ def test_gwac_pade_frozen(self): gw_obj.ac = 'pade' gw_obj.orbs = range(nocc-3, nocc+3) gw_obj.kernel() - self.assertAlmostEqual(gw_obj.mo_energy[nocc-1], -0.4129411145067107, 8) - self.assertAlmostEqual(gw_obj.mo_energy[nocc], 0.16568737755110896, 8) + self.assertAlmostEqual(gw_obj.mo_energy[nocc-1], -0.4129411145067107, 7) + self.assertAlmostEqual(gw_obj.mo_energy[nocc], 0.16568737755110896, 7) def test_gwcd(self): nocc = mol.nelectron//2 diff --git a/pyscf/gw/test/test_gw_ac.py b/pyscf/gw/test/test_gw_ac.py index bbcec0bc16..1dd87419f4 100644 --- a/pyscf/gw/test/test_gw_ac.py +++ b/pyscf/gw/test/test_gw_ac.py @@ -2,13 +2,12 @@ import pytest import numpy as np -from pyscf import gto, scf, dft +from pyscf import gto, dft from pyscf.gw.gw_ac import GWAC @pytest.fixture def h2o_pbe0(): mol = gto.Mole() - mol.verbose = 5 mol.atom = [[8, (0.0, 0.0, 0.0)], [1, (0.0, -0.7571, 0.5861)], [1, (0.0, 0.7571, 0.5861)]] mol.basis = 'def2-svp' mol.build() diff --git a/pyscf/gw/test/test_ugw_ac.py b/pyscf/gw/test/test_ugw_ac.py index 9a3a3b60d7..8600b92004 100644 --- a/pyscf/gw/test/test_ugw_ac.py +++ b/pyscf/gw/test/test_ugw_ac.py @@ -6,7 +6,6 @@ @pytest.fixture def h2o_cation_uhf(): mol = gto.Mole() - mol.verbose = 5 mol.atom = [[8, (0.0, 0.0, 0.0)], [1, (0.0, -0.7571, 0.5861)], [1, (0.0, 0.7571, 0.5861)]] mol.basis = 'def2-svp' mol.charge = 1 diff --git a/pyscf/hessian/dispersion.py b/pyscf/hessian/dispersion.py index ad37289312..efa023f5fb 100644 --- a/pyscf/hessian/dispersion.py +++ b/pyscf/hessian/dispersion.py @@ -30,10 +30,12 @@ def get_dispersion(hessobj, disp=None, with_3body=None): mol = mf.mol natm = mol.natm h_disp = np.zeros([natm,natm,3,3]) - disp_version = check_disp(mf, disp) - if not disp_version: + if not check_disp(mf, disp): return h_disp + if disp is None: + disp = getattr(mf, 'disp', None) + try: from pyscf.dispersion import dftd3, dftd4 except ImportError: @@ -41,9 +43,9 @@ def get_dispersion(hessobj, disp=None, with_3body=None): raise method = getattr(mf, 'xc', 'hf') - method, _, disp_with_3body = parse_disp(method) + method, disp_version, disp_with_3body = parse_disp(method, disp) - if with_3body is not None: + if with_3body is None: with_3body = disp_with_3body if disp_version[:2].upper() == 'D3': diff --git a/pyscf/hessian/test/test_rhf.py b/pyscf/hessian/test/test_rhf.py index fc99b668ca..9a06fdac2e 100644 --- a/pyscf/hessian/test/test_rhf.py +++ b/pyscf/hessian/test/test_rhf.py @@ -19,7 +19,7 @@ from pyscf import grad, hessian try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None def setUpModule(): diff --git a/pyscf/hessian/test/test_rks.py b/pyscf/hessian/test/test_rks.py index 43634b6e8e..6c5a8d36db 100644 --- a/pyscf/hessian/test/test_rks.py +++ b/pyscf/hessian/test/test_rks.py @@ -19,7 +19,7 @@ from pyscf import grad, hessian try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None def setUpModule(): diff --git a/pyscf/hessian/test/test_uhf.py b/pyscf/hessian/test/test_uhf.py index 8b65361b7b..ee5efceb80 100644 --- a/pyscf/hessian/test/test_uhf.py +++ b/pyscf/hessian/test/test_uhf.py @@ -19,7 +19,7 @@ from pyscf import grad, hessian try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None def setUpModule(): diff --git a/pyscf/hessian/test/test_uks.py b/pyscf/hessian/test/test_uks.py index b1c3af8b24..8ebbb7c0e8 100644 --- a/pyscf/hessian/test/test_uks.py +++ b/pyscf/hessian/test/test_uks.py @@ -19,7 +19,7 @@ from pyscf import grad, hessian try: from pyscf.dispersion import dftd3, dftd4 -except ImportError: +except (ImportError, OSError): dftd3 = dftd4 = None def setUpModule(): diff --git a/pyscf/lib/CMakeLists.txt b/pyscf/lib/CMakeLists.txt index 60f404849f..51de15a1bf 100644 --- a/pyscf/lib/CMakeLists.txt +++ b/pyscf/lib/CMakeLists.txt @@ -128,7 +128,9 @@ include_directories("${PROJECT_BINARY_DIR}") # See also https://gitlab.kitware.com/cmake/community/wikis/doc/cmake/RPATH-handling if (WIN32) - #? + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--disable-runtime-pseudo-reloc") + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}") elseif (APPLE) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) set(CMAKE_INSTALL_RPATH "@loader_path;@loader_path/deps/lib;@loader_path/deps/lib64") diff --git a/pyscf/lib/agf2/uagf2.c b/pyscf/lib/agf2/uagf2.c index f16b97d53a..e59df223ad 100644 --- a/pyscf/lib/agf2/uagf2.c +++ b/pyscf/lib/agf2/uagf2.c @@ -317,17 +317,19 @@ void AGF2udf_vv_vev_islice_lowmem(double *qxi, do_os = j < nob; do_ss = j < noa; - // build qx_i + // build qx_i / qa_i (always indexed by i < noa) AGF2slice_01i(qxi, naux, nmo, noa, i, qx_i); - - // build qx_j - AGF2slice_01i(qxi, naux, nmo, noa, j, qx_j); - - // build qa_i AGF2slice_0i2(qja, naux, noa, nva, i, qa_i); - // build qa_j - AGF2slice_0i2(qja, naux, noa, nva, j, qa_j); + // Build qx_j / qa_j only when j is in the alpha range. With + // nob > noa, j ranges up to nob-1 for the cross-spin (do_os) part, + // and slicing the alpha arrays qxi/qja at j >= noa would read past + // their noa-dim. The OS path uses qx_j_b / qa_j_b instead, so the + // alpha j slice is only needed for do_ss. + if (do_ss) { + AGF2slice_01i(qxi, naux, nmo, noa, j, qx_j); + AGF2slice_0i2(qja, naux, noa, nva, j, qa_j); + } if (do_ss) { // build xija diff --git a/pyscf/lib/cc/ccsd_t.c b/pyscf/lib/cc/ccsd_t.c index 22e3e5e4ea..dbda960255 100644 --- a/pyscf/lib/cc/ccsd_t.c +++ b/pyscf/lib/cc/ccsd_t.c @@ -392,15 +392,16 @@ void CCsd_t_contract(double *e_tot, cache_row_b, cache_col_b, sizeof(double)); int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ - v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, stderr) + v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, err_fp) { int a, b, c; size_t k; double *cache1 = malloc(sizeof(double) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in CCsd_t_contract\n", + fprintf(err_fp, "malloc(%zu) failed in CCsd_t_contract\n", sizeof(double) * nocc*nocc*nocc*3); exit(1); } @@ -447,15 +448,16 @@ void QCIsd_t_contract(double *e_tot, cache_row_b, cache_col_b, sizeof(double)); int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ - v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, stderr) + v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, err_fp) { int a, b, c; size_t k; double *cache1 = malloc(sizeof(double) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in QCIsd_t_contract\n", + fprintf(err_fp, "malloc(%zu) failed in QCIsd_t_contract\n", sizeof(double) * nocc*nocc*nocc*3); exit(1); } @@ -628,15 +630,16 @@ void CCsd_t_zcontract(double complex *e_tot, int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ - v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, stderr) + v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, err_fp) { int a, b, c; size_t k; double complex *cache1 = malloc(sizeof(double complex) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in CCsd_t_zcontract\n", + fprintf(err_fp, "malloc(%zu) failed in CCsd_t_zcontract\n", sizeof(double complex) * nocc*nocc*nocc*3); exit(1); } @@ -686,15 +689,16 @@ void QCIsd_t_zcontract(double complex *e_tot, int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ - v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, stderr) + v_ir_loc, oo_ir_loc, orbsym, vooo, fvo, jobs, e_tot, permute_idx, err_fp) { int a, b, c; size_t k; double complex *cache1 = malloc(sizeof(double complex) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in QCIsd_t_zcontract\n", + fprintf(err_fp, "malloc(%zu) failed in QCIsd_t_zcontract\n", sizeof(double complex) * nocc*nocc*nocc*3); exit(1); } @@ -872,15 +876,16 @@ void MPICCsd_t_contract(double *e_tot, double *mo_energy, double *t1T, int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, mo_energy, t1T, fvo, jobs, e_tot, slices, \ - data_ptrs, permute_idx, stderr) + data_ptrs, permute_idx, err_fp) { int a, b, c; size_t k; double *cache1 = malloc(sizeof(double) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in MPICCsd_t_contract\n", + fprintf(err_fp, "malloc(%zu) failed in MPICCsd_t_contract\n", sizeof(double) * nocc*nocc*nocc*3); exit(1); } @@ -1105,15 +1110,16 @@ void CCsd_zcontract_t3T(double complex *t3Tw, double complex *t3Tv, double *mo_e int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, nkpts, t3Tw, t3Tv, mo_offset, mo_energy, t1T, fvo, jobs, slices, \ - data_ptrs, permute_idx, stderr) + data_ptrs, permute_idx, err_fp) { int a, b, c; size_t k; complex double *cache1 = malloc(sizeof(double complex) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in CCsd_zcontract_t3T\n", + fprintf(err_fp, "malloc(%zu) failed in CCsd_zcontract_t3T\n", sizeof(double complex) * nocc*nocc*nocc*3); exit(1); } diff --git a/pyscf/lib/cc/uccsd_t.c b/pyscf/lib/cc/uccsd_t.c index fedb09049e..19571dbb8c 100644 --- a/pyscf/lib/cc/uccsd_t.c +++ b/pyscf/lib/cc/uccsd_t.c @@ -299,16 +299,17 @@ void CCuccsd_t_aaa(double complex *e_tot, int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ v_ir_loc, oo_ir_loc, orbsym, vooo, fvohalf, jobs, e_tot, \ - permute_idx, stderr) + permute_idx, err_fp) { int a, b, c; size_t k; double *cache1 = malloc(sizeof(double) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in CCuccsd_t_aaa\n", + fprintf(err_fp, "malloc(%zu) failed in CCuccsd_t_aaa\n", sizeof(double) * nocc*nocc*nocc*3); exit(1); } @@ -549,15 +550,16 @@ void CCuccsd_t_baa(double complex *e_tot, double *vs_ts[] = {mo_ea, mo_eb, fvo, fVO, vooo, vOoO, VoOo, t1aT, t1bT, t2aaT, t2abT}; + FILE *err_fp = stderr; #pragma omp parallel default(none) \ - shared(njobs, nocca, noccb, nvira, nvirb, vs_ts, jobs, e_tot, stderr) + shared(njobs, nocca, noccb, nvira, nvirb, vs_ts, jobs, e_tot, err_fp) { int a, b, c; size_t k; double *cache1 = malloc(sizeof(double) * (noccb*nocca*nocca*5+1 + nocca*2+noccb*2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in CCuccsd_t_baa\n", + fprintf(err_fp, "malloc(%zu) failed in CCuccsd_t_baa\n", sizeof(double) * noccb*nocca*nocca*5); exit(1); } @@ -705,17 +707,18 @@ void CCuccsd_t_zaaa(double complex *e_tot, int *permute_idx = malloc(sizeof(int) * nocc*nocc*nocc * 6); _make_permute_indices(permute_idx, nocc); + FILE *err_fp = stderr; #pragma omp parallel default(none) \ shared(njobs, nocc, nvir, mo_energy, t1T, t2T, nirrep, o_ir_loc, \ v_ir_loc, oo_ir_loc, orbsym, vooo, fvohalf, jobs, e_tot, \ - permute_idx, stderr) + permute_idx, err_fp) { int a, b, c; size_t k; double complex *cache1 = malloc(sizeof(double complex) * (nocc*nocc*nocc*3+2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in CCuccsd_t_zaaa\n", + fprintf(err_fp, "malloc(%zu) failed in CCuccsd_t_zaaa\n", sizeof(double complex) * nocc*nocc*nocc*3); exit(1); } @@ -915,8 +918,9 @@ void CCuccsd_t_zbaa(double complex *e_tot, (double complex *)mo_eb, fvo, fVO, vooo, vOoO, VoOo, t1aT, t1bT, t2aaT, t2abT}; + FILE *err_fp = stderr; #pragma omp parallel default(none) \ - shared(njobs, nocca, noccb, nvira, nvirb, vs_ts, jobs, e_tot, stderr) + shared(njobs, nocca, noccb, nvira, nvirb, vs_ts, jobs, e_tot, err_fp) { int a, b, c; size_t k; @@ -924,7 +928,7 @@ void CCuccsd_t_zbaa(double complex *e_tot, (noccb*nocca*nocca*5+1 + nocca*2+noccb*2)); if (cache1 == NULL) { - fprintf(stderr, "malloc(%zu) failed in CCuccsd_t_zbaa\n", + fprintf(err_fp, "malloc(%zu) failed in CCuccsd_t_zbaa\n", sizeof(double complex) * noccb*nocca*nocca*5); exit(1); } diff --git a/pyscf/lib/ccsdt/rccsdt.c b/pyscf/lib/ccsdt/rccsdt.c index 30adf69aac..e2c9200c98 100644 --- a/pyscf/lib/ccsdt/rccsdt.c +++ b/pyscf/lib/ccsdt/rccsdt.c @@ -1,4 +1,4 @@ -/* Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +/* Copyright 2014-2026 The PySCF Developers. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -496,6 +496,17 @@ const int64_t tp_t3[6][3] = { {2, 1, 0}, // reverse }; +static inline int64_t src_idx_from_full3(const int64_t *restrict perm, int64_t v0, int64_t v1, int64_t v2, int64_t nvir) +{ + int64_t src_abc[3]; + + src_abc[perm[0]] = v0; + src_abc[perm[1]] = v1; + src_abc[perm[2]] = v2; + + return ((src_abc[0] * nvir + src_abc[1]) * nvir + src_abc[2]); +} + // Unpack triangular-stored T3 amplitudes into a full T3 block. // // This kernel reconstructs the full permutation-expanded T3 tensor block from the compressed triangular @@ -521,6 +532,7 @@ void unpack_t3_tri2block_(const double *restrict t3_tri, { #define MAP(sym, x, y, z) map[(((sym) * nocc + (x)) * nocc + (y)) * nocc + (z)] #define MASK(sym, x, y, z) mask[(((sym) * nocc + (x)) * nocc + (y)) * nocc + (z)] +#define VIDX(a, b, c) (((a) * nvir + (b)) * nvir + (c)) #pragma omp parallel for collapse(4) schedule(dynamic) for (int64_t sym = 0; sym < 6; ++sym) @@ -549,15 +561,10 @@ void unpack_t3_tri2block_(const double *restrict t3_tri, { for (int64_t c = 0; c < nvir; ++c) { - int64_t abc[3] = {a, b, c}; - int64_t aa = abc[perm[0]]; - int64_t bb = abc[perm[1]]; - int64_t cc = abc[perm[2]]; - - int64_t src_idx = src_base + (a * nvir + b) * nvir + c; - int64_t dest_idx = dest_base + (aa * nvir + bb) * nvir + cc; + int64_t src = src_base + src_idx_from_full3(perm, a, b, c, nvir); + int64_t dest = dest_base + VIDX(a, b, c); - t3_blk[dest_idx] = t3_tri[src_idx]; + t3_blk[dest] = t3_tri[src]; } } } @@ -569,112 +576,11 @@ void unpack_t3_tri2block_(const double *restrict t3_tri, #undef MASK } -// Unpack a triangular-stored T3 (i, j, k) element into its 6-fold -// permutation representation for a single occupied triplet. -// -// This routine identifies the symmetry representative of (i0, j0, k0) in the triangular (i <= j <= k) index domain, -// applies the corresponding (a, b, c) permutation, and scatters the resulting amplitudes into `t3_blk`. -// In addition, a second symmetry partner (selected via `tmp_indices`) is accumulated to complete the required -// two-term contribution. Conceptually, this corresponds to reconstructing: -// -// t3_full[i0, j0, k0, :, :, :] + t3_full[j0, i0, k0, :, :, :].transpose(1, 0, 2) -// -// Input -// t3_tri : triangular-stored T3 amplitudes -// t3_blk : output buffer [nvir**3] -// map : mapping (sym, i, j, k) -> tri index -// mask : triangular-domain mask for valid (i, j, k) -// i0, j0, k0 : occupied indices for this element -// nocc : number of occupied orbitals -// nvir : number of virtual orbitals -void unpack_t3_tri2single_pair_(const double *restrict t3_tri, - double *restrict t3_blk, - const int64_t *restrict map, - const bool *restrict mask, - int64_t i0, int64_t j0, int64_t k0, - int64_t nocc, int64_t nvir) -{ - -#define MAP(sym, x, y, z) map[(((sym) * nocc + (x)) * nocc + (y)) * nocc + (z)] -#define MASK(sym, x, y, z) mask[(((sym) * nocc + (x)) * nocc + (y)) * nocc + (z)] - - int64_t sym; - for (sym = 0; sym < 6; ++sym) - { - if (MASK(sym, i0, j0, k0)) - break; - } - - const int64_t *perm = tp_t3[sym]; - int64_t idx = MAP(sym, i0, j0, k0); - -#pragma omp parallel for collapse(3) schedule(static) - for (int64_t a = 0; a < nvir; ++a) - { - for (int64_t b = 0; b < nvir; ++b) - { - for (int64_t c = 0; c < nvir; ++c) - { - int64_t abc[3] = {a, b, c}; - int64_t aa = abc[perm[0]]; - int64_t bb = abc[perm[1]]; - int64_t cc = abc[perm[2]]; - - int64_t src_idx = ((idx * nvir + a) * nvir + b) * nvir + c; - int64_t dest_idx = (aa * nvir + bb) * nvir + cc; - - t3_blk[dest_idx] = t3_tri[src_idx]; - } - } - } - - const int64_t tmp_indices[6] = {2, 4, 0, 5, 1, 3}; - - for (sym = 0; sym < 6; ++sym) - { - if (MASK(tmp_indices[sym], i0, j0, k0)) - break; - } - - const int64_t *perm2 = tp_t3[tmp_indices[sym]]; - idx = MAP(tmp_indices[sym], i0, j0, k0); - -#pragma omp parallel for collapse(3) schedule(static) - for (int64_t a = 0; a < nvir; ++a) - { - for (int64_t b = 0; b < nvir; ++b) - { - for (int64_t c = 0; c < nvir; ++c) - { - int64_t abc[3] = {a, b, c}; - int64_t aa = abc[perm2[0]]; - int64_t bb = abc[perm2[1]]; - int64_t cc = abc[perm2[2]]; - - int64_t src_idx = ((idx * nvir + a) * nvir + b) * nvir + c; - int64_t dest_idx = (aa * nvir + bb) * nvir + cc; - t3_blk[dest_idx] += t3_tri[src_idx]; - } - } - } -#undef MAP -#undef MASK -} - -// Unpack triangular-stored T3 amplitudes into a full T3 block. +// Unpack triangular-stored T3 amplitudes directly into the final block: // -// This kernel reconstructs the full permutation-expanded T3 tensor block from the compressed triangular -// representation without forming the full tensor in memory. +// t3_tmp + t3_tmp.transpose(0, 1, 2, 4, 5, 3) // -// Input: -// t3_tri : triangular-stored T3 amplitudes -// t3_blk : output buffer [blk_i * blk_j * blk_k * nvir**3] -// map : mapping index table for (i, j, k) -> tri index -// mask : mask indicating which (i, j, k) indices are stored (triangular domain) -// [i0:i1), [j0:j1), [k0:k1) : occupied index block ranges -// nocc, nvir : number of occupied / virtual orbitals -// blk_i, blk_j, blk_k : block sizes for the destination tensor void unpack_t3_tri2block_pair_(const double *restrict t3_tri, double *restrict t3_blk, const int64_t *restrict map, @@ -688,9 +594,7 @@ void unpack_t3_tri2block_pair_(const double *restrict t3_tri, #define MAP(sym, x, y, z) map[(((sym) * nocc + (x)) * nocc + (y)) * nocc + (z)] #define MASK(sym, x, y, z) mask[(((sym) * nocc + (x)) * nocc + (y)) * nocc + (z)] - - const int64_t tmp_indices[6] = {5, 3, 4, 1, 2, 0}; - const int64_t trans_indices[6] = {1, 0, 3, 2, 5, 4}; +#define VIDX(a, b, c) (((a) * nvir + (b)) * nvir + (c)) #pragma omp parallel for collapse(4) schedule(dynamic) for (int64_t sym = 0; sym < 6; ++sym) @@ -719,59 +623,11 @@ void unpack_t3_tri2block_pair_(const double *restrict t3_tri, { for (int64_t c = 0; c < nvir; ++c) { - int64_t abc[3] = {a, b, c}; - int64_t aa = abc[perm[0]]; - int64_t bb = abc[perm[1]]; - int64_t cc = abc[perm[2]]; + const int64_t src0 = src_base + src_idx_from_full3(perm, a, b, c, nvir); + const int64_t src1 = src_base + src_idx_from_full3(perm, b, c, a, nvir); + const int64_t dest = dest_base + VIDX(a, b, c); - int64_t src_idx = src_base + (a * nvir + b) * nvir + c; - int64_t dest_idx = dest_base + (aa * nvir + bb) * nvir + cc; - - t3_blk[dest_idx] = t3_tri[src_idx]; - } - } - } - } - } - } - } - -#pragma omp parallel for collapse(4) schedule(dynamic) - for (int64_t sym = 0; sym < 6; ++sym) - { - for (int64_t i = i0; i < i1; ++i) - { - for (int64_t j = j0; j < j1; ++j) - { - for (int64_t k = k0; k < k1; ++k) - { - if (!MASK(tmp_indices[sym], i, j, k)) - continue; - - const int64_t *perm2 = tp_t3[trans_indices[sym]]; - - int64_t loc_i = i - i0; - int64_t loc_j = j - j0; - int64_t loc_k = k - k0; - - int64_t src_base = MAP(tmp_indices[sym], i, j, k) * nvir * nvir * nvir; - int64_t dest_base = ((loc_i * blk_j + loc_j) * blk_k + loc_k) * nvir * nvir * nvir; - - for (int64_t a = 0; a < nvir; ++a) - { - for (int64_t b = 0; b < nvir; ++b) - { - for (int64_t c = 0; c < nvir; ++c) - { - int64_t abc[3] = {a, b, c}; - int64_t aa = abc[perm2[0]]; - int64_t bb = abc[perm2[1]]; - int64_t cc = abc[perm2[2]]; - - int64_t src_idx = src_base + (a * nvir + b) * nvir + c; - int64_t dest_idx = dest_base + (aa * nvir + bb) * nvir + cc; - - t3_blk[dest_idx] += t3_tri[src_idx]; + t3_blk[dest] = t3_tri[src0] + t3_tri[src1]; } } } @@ -851,40 +707,3 @@ void accumulate_t3_block2tri_(double *restrict t3_tri, } #undef MAP } - -// Accumulate a single (i0, j0, k0) full T3 slice into the triangular 6-fold compressed T3 storage. -// -// Inputs -// t3_tri : triangular-stored T3 amplitudes -// t3_blk : full T3 slice [nvir**3] for (i0, j0, k0) -// map : mapping (sym, i, j, k) -> triangular index (sym = 0 used here) -// i0, j0, k0 : occupied indices -// nocc : number of occupied orbitals -// nvir : number of virtual orbitals -// alpha, beta : scaling coefficients for accumulation -void accumulate_t3_single2tri_(double *restrict t3_tri, - const double *restrict t3_blk, - const int64_t *restrict map, - int64_t i0, int64_t j0, int64_t k0, - int64_t nocc, int64_t nvir, - double alpha, double beta) -{ -#define MAP(sym, x, y, z) map[(((sym) * nocc + (x)) * nocc + (y)) * nocc + (z)] - - int64_t p = MAP(0, i0, j0, k0); - int64_t tri_base = p * nvir * nvir * nvir; - -#pragma omp parallel for collapse(3) schedule(static) - for (int64_t a = 0; a < nvir; ++a) - { - for (int64_t b = 0; b < nvir; ++b) - { - for (int64_t c = 0; c < nvir; ++c) - { - int64_t idx = ((a * nvir + b) * nvir + c); - t3_tri[tri_base + idx] = beta * t3_tri[tri_base + idx] + alpha * t3_blk[idx]; - } - } - } -#undef MAP -} diff --git a/pyscf/lib/ccsdt/rccsdtq.c b/pyscf/lib/ccsdt/rccsdtq.c index 3a25a803ea..10f916de5d 100644 --- a/pyscf/lib/ccsdt/rccsdtq.c +++ b/pyscf/lib/ccsdt/rccsdtq.c @@ -1,4 +1,4 @@ -/* Copyright 2014-2025 The PySCF Developers. All Rights Reserved. +/* Copyright 2014-2026 The PySCF Developers. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,8 +27,9 @@ // Apply spin summation projection to T4 amplitudes in place. // A: pointer to T4 tensor (size nocc4 * nvir**4) // pattern: "P4_full" : P(A) = (1 + P_c^d) (1 + P_b^c + P_b^d) (1 + P_a^b + P_a^c + P_a^d) A -// "P4_422" : P(A) = (1 + 0 * P_c^d) (1 + 0 * P_b^c + 0 * P_b^d) (2 - P_a^b - P_a^c - P_a^d) A -// "P4_201" : P(A) = (1 + 0 * P_c^d) (2 - P_b^c - P_b^d) (2 - P_a^b - P_a^c - P_a^d) A +// "P4_444" : P(A) = (2 - P_c^d) (2 - P_b^c - P_b^d) (2 - P_a^b - P_a^c - P_a^d) A +// "P4_422" : P(A) = (1 + 0 * P_c^d) (2 - P_b^c - P_b^d) (2 - P_a^b - P_a^c - P_a^d) A +// "P4_201" : P(A) = (1 + 0 * P_c^d) (1 + 0 * P_b^c + 0 * P_b^d) (2 - P_a^b - P_a^c - P_a^d) A // alpha, beta: A = beta * A + alpha * P(A) void t4_spin_summation_inplace_(double *A, int64_t nocc4, int64_t nvir, char *pattern, double alpha, double beta) { @@ -69,6 +70,18 @@ void t4_spin_summation_inplace_(double *A, int64_t nocc4, int64_t nvir, char *pa p[7] = 1.0; p[8] = 0.0; } + else if (strcmp(pattern, "P4_444") == 0) + { + p[0] = 2.0; + p[1] = -1.0; + p[2] = -1.0; + p[3] = -1.0; + p[4] = 2.0; + p[5] = -1.0; + p[6] = -1.0; + p[7] = 2.0; + p[8] = -1.0; + } else { fprintf(stderr, "Error: unrecognized pattern \"%s\"\n", pattern); @@ -530,6 +543,18 @@ void t4_spin_summation(const double *A, double *B, int64_t nocc4, int64_t nvir, p[7] = 1.0; p[8] = 0.0; } + else if (strcmp(pattern, "P4_444") == 0) + { + p[0] = 2.0; + p[1] = -1.0; + p[2] = -1.0; + p[3] = -1.0; + p[4] = 2.0; + p[5] = -1.0; + p[6] = -1.0; + p[7] = 2.0; + p[8] = -1.0; + } else { fprintf(stderr, "Error: unrecognized pattern \"%s\"\n", pattern); @@ -947,6 +972,491 @@ void t4_spin_summation(const double *A, double *B, int64_t nocc4, int64_t nvir, } } +void t4_spin_summation_single_inplace_(double *A, int64_t nvir, char *pattern, double alpha, double beta) +{ + int64_t nvv = nvir * nvir; + int64_t nvvv = nvir * nvv; + + double p[9]; + + if (strcmp(pattern, "P4_full") == 0) + { + for (int i = 0; i < 9; i++) + p[i] = 1.0; + } + else if (strcmp(pattern, "P4_201") == 0) + { + p[0] = 2.0; + p[1] = -1.0; + p[2] = -1.0; + p[3] = -1.0; + p[4] = 1.0; + p[5] = 0.0; + p[6] = 0.0; + p[7] = 1.0; + p[8] = 0.0; + } + else if (strcmp(pattern, "P4_442") == 0) + { + p[0] = 2.0; + p[1] = -1.0; + p[2] = -1.0; + p[3] = -1.0; + p[4] = 2.0; + p[5] = -1.0; + p[6] = -1.0; + p[7] = 1.0; + p[8] = 0.0; + } + else if (strcmp(pattern, "P4_444") == 0) + { + p[0] = 2.0; + p[1] = -1.0; + p[2] = -1.0; + p[3] = -1.0; + p[4] = 2.0; + p[5] = -1.0; + p[6] = -1.0; + p[7] = 2.0; + p[8] = -1.0; + } + else + { + fprintf(stderr, "Error: unrecognized pattern \"%s\"\n", pattern); + return; + } + + int64_t total_combinations = (nvir * (nvir + 1) * (nvir + 2) * (nvir + 3)) / 24; + +#pragma omp parallel for schedule(static) + for (int64_t idx_linear = 0; idx_linear < total_combinations; idx_linear++) + { + int64_t a, b, c, d; + int64_t remaining = idx_linear; + + a = 0; + while (a < nvir) + { + int64_t count_with_a = ((a + 1) * (a + 2) * (a + 3)) / 6; + if (remaining < count_with_a) + { + break; + } + remaining -= count_with_a; + a++; + } + + b = 0; + while (b <= a) + { + int64_t count_with_b = ((b + 1) * (b + 2)) / 2; + if (remaining < count_with_b) + { + break; + } + remaining -= count_with_b; + b++; + } + + c = 0; + while (c <= b) + { + int64_t count_with_c = c + 1; + if (remaining < count_with_c) + { + break; + } + remaining -= count_with_c; + c++; + } + + d = remaining; + + int64_t nvvv = nvir * nvir * nvir; + int64_t nvv = nvir * nvir; + if (a > b && b > c && c > d) + { + double T1_local[24]; + double T2_local[24]; + + int64_t indices[24]; + indices[0] = a * nvvv + b * nvv + c * nvir + d; + indices[1] = a * nvvv + b * nvv + d * nvir + c; + indices[2] = a * nvvv + c * nvv + b * nvir + d; + indices[3] = a * nvvv + c * nvv + d * nvir + b; + indices[4] = a * nvvv + d * nvv + b * nvir + c; + indices[5] = a * nvvv + d * nvv + c * nvir + b; + indices[6] = b * nvvv + a * nvv + c * nvir + d; + indices[7] = b * nvvv + a * nvv + d * nvir + c; + indices[8] = b * nvvv + c * nvv + a * nvir + d; + indices[9] = b * nvvv + c * nvv + d * nvir + a; + indices[10] = b * nvvv + d * nvv + a * nvir + c; + indices[11] = b * nvvv + d * nvv + c * nvir + a; + indices[12] = c * nvvv + a * nvv + b * nvir + d; + indices[13] = c * nvvv + a * nvv + d * nvir + b; + indices[14] = c * nvvv + b * nvv + a * nvir + d; + indices[15] = c * nvvv + b * nvv + d * nvir + a; + indices[16] = c * nvvv + d * nvv + a * nvir + b; + indices[17] = c * nvvv + d * nvv + b * nvir + a; + indices[18] = d * nvvv + a * nvv + b * nvir + c; + indices[19] = d * nvvv + a * nvv + c * nvir + b; + indices[20] = d * nvvv + b * nvv + a * nvir + c; + indices[21] = d * nvvv + b * nvv + c * nvir + a; + indices[22] = d * nvvv + c * nvv + a * nvir + b; + indices[23] = d * nvvv + c * nvv + b * nvir + a; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[6]] + p[2] * A[indices[14]] + p[3] * A[indices[21]]; + T1_local[1] = p[0] * A[indices[1]] + p[1] * A[indices[7]] + p[2] * A[indices[20]] + p[3] * A[indices[15]]; + T1_local[2] = p[0] * A[indices[2]] + p[1] * A[indices[12]] + p[2] * A[indices[8]] + p[3] * A[indices[23]]; + T1_local[3] = p[0] * A[indices[3]] + p[1] * A[indices[13]] + p[2] * A[indices[22]] + p[3] * A[indices[9]]; + T1_local[4] = p[0] * A[indices[4]] + p[1] * A[indices[18]] + p[2] * A[indices[10]] + p[3] * A[indices[17]]; + T1_local[5] = p[0] * A[indices[5]] + p[1] * A[indices[19]] + p[2] * A[indices[16]] + p[3] * A[indices[11]]; + T1_local[6] = p[0] * A[indices[6]] + p[1] * A[indices[0]] + p[2] * A[indices[12]] + p[3] * A[indices[19]]; + T1_local[7] = p[0] * A[indices[7]] + p[1] * A[indices[1]] + p[2] * A[indices[18]] + p[3] * A[indices[13]]; + T1_local[8] = p[0] * A[indices[8]] + p[1] * A[indices[14]] + p[2] * A[indices[2]] + p[3] * A[indices[22]]; + T1_local[9] = p[0] * A[indices[9]] + p[1] * A[indices[15]] + p[2] * A[indices[23]] + p[3] * A[indices[3]]; + T1_local[10] = p[0] * A[indices[10]] + p[1] * A[indices[20]] + p[2] * A[indices[4]] + p[3] * A[indices[16]]; + T1_local[11] = p[0] * A[indices[11]] + p[1] * A[indices[21]] + p[2] * A[indices[17]] + p[3] * A[indices[5]]; + T1_local[12] = p[0] * A[indices[12]] + p[1] * A[indices[2]] + p[2] * A[indices[6]] + p[3] * A[indices[18]]; + T1_local[13] = p[0] * A[indices[13]] + p[1] * A[indices[3]] + p[2] * A[indices[19]] + p[3] * A[indices[7]]; + T1_local[14] = p[0] * A[indices[14]] + p[1] * A[indices[8]] + p[2] * A[indices[0]] + p[3] * A[indices[20]]; + T1_local[15] = p[0] * A[indices[15]] + p[1] * A[indices[9]] + p[2] * A[indices[21]] + p[3] * A[indices[1]]; + T1_local[16] = p[0] * A[indices[16]] + p[1] * A[indices[22]] + p[2] * A[indices[5]] + p[3] * A[indices[10]]; + T1_local[17] = p[0] * A[indices[17]] + p[1] * A[indices[23]] + p[2] * A[indices[11]] + p[3] * A[indices[4]]; + T1_local[18] = p[0] * A[indices[18]] + p[1] * A[indices[4]] + p[2] * A[indices[7]] + p[3] * A[indices[12]]; + T1_local[19] = p[0] * A[indices[19]] + p[1] * A[indices[5]] + p[2] * A[indices[13]] + p[3] * A[indices[6]]; + T1_local[20] = p[0] * A[indices[20]] + p[1] * A[indices[10]] + p[2] * A[indices[1]] + p[3] * A[indices[14]]; + T1_local[21] = p[0] * A[indices[21]] + p[1] * A[indices[11]] + p[2] * A[indices[15]] + p[3] * A[indices[0]]; + T1_local[22] = p[0] * A[indices[22]] + p[1] * A[indices[16]] + p[2] * A[indices[3]] + p[3] * A[indices[8]]; + T1_local[23] = p[0] * A[indices[23]] + p[1] * A[indices[17]] + p[2] * A[indices[9]] + p[3] * A[indices[2]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[2] + p[6] * T1_local[5]; + T2_local[1] = p[4] * T1_local[1] + p[5] * T1_local[4] + p[6] * T1_local[3]; + T2_local[2] = p[4] * T1_local[2] + p[5] * T1_local[0] + p[6] * T1_local[4]; + T2_local[3] = p[4] * T1_local[3] + p[5] * T1_local[5] + p[6] * T1_local[1]; + T2_local[4] = p[4] * T1_local[4] + p[5] * T1_local[1] + p[6] * T1_local[2]; + T2_local[5] = p[4] * T1_local[5] + p[5] * T1_local[3] + p[6] * T1_local[0]; + T2_local[6] = p[4] * T1_local[6] + p[5] * T1_local[8] + p[6] * T1_local[11]; + T2_local[7] = p[4] * T1_local[7] + p[5] * T1_local[10] + p[6] * T1_local[9]; + T2_local[8] = p[4] * T1_local[8] + p[5] * T1_local[6] + p[6] * T1_local[10]; + T2_local[9] = p[4] * T1_local[9] + p[5] * T1_local[11] + p[6] * T1_local[7]; + T2_local[10] = p[4] * T1_local[10] + p[5] * T1_local[7] + p[6] * T1_local[8]; + T2_local[11] = p[4] * T1_local[11] + p[5] * T1_local[9] + p[6] * T1_local[6]; + T2_local[12] = p[4] * T1_local[12] + p[5] * T1_local[14] + p[6] * T1_local[17]; + T2_local[13] = p[4] * T1_local[13] + p[5] * T1_local[16] + p[6] * T1_local[15]; + T2_local[14] = p[4] * T1_local[14] + p[5] * T1_local[12] + p[6] * T1_local[16]; + T2_local[15] = p[4] * T1_local[15] + p[5] * T1_local[17] + p[6] * T1_local[13]; + T2_local[16] = p[4] * T1_local[16] + p[5] * T1_local[13] + p[6] * T1_local[14]; + T2_local[17] = p[4] * T1_local[17] + p[5] * T1_local[15] + p[6] * T1_local[12]; + T2_local[18] = p[4] * T1_local[18] + p[5] * T1_local[20] + p[6] * T1_local[23]; + T2_local[19] = p[4] * T1_local[19] + p[5] * T1_local[22] + p[6] * T1_local[21]; + T2_local[20] = p[4] * T1_local[20] + p[5] * T1_local[18] + p[6] * T1_local[22]; + T2_local[21] = p[4] * T1_local[21] + p[5] * T1_local[23] + p[6] * T1_local[19]; + T2_local[22] = p[4] * T1_local[22] + p[5] * T1_local[19] + p[6] * T1_local[20]; + T2_local[23] = p[4] * T1_local[23] + p[5] * T1_local[21] + p[6] * T1_local[18]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[1]) + beta * A[indices[0]]; + A[indices[1]] = alpha * (p[7] * T2_local[1] + p[8] * T2_local[0]) + beta * A[indices[1]]; + A[indices[2]] = alpha * (p[7] * T2_local[2] + p[8] * T2_local[3]) + beta * A[indices[2]]; + A[indices[3]] = alpha * (p[7] * T2_local[3] + p[8] * T2_local[2]) + beta * A[indices[3]]; + A[indices[4]] = alpha * (p[7] * T2_local[4] + p[8] * T2_local[5]) + beta * A[indices[4]]; + A[indices[5]] = alpha * (p[7] * T2_local[5] + p[8] * T2_local[4]) + beta * A[indices[5]]; + A[indices[6]] = alpha * (p[7] * T2_local[6] + p[8] * T2_local[7]) + beta * A[indices[6]]; + A[indices[7]] = alpha * (p[7] * T2_local[7] + p[8] * T2_local[6]) + beta * A[indices[7]]; + A[indices[8]] = alpha * (p[7] * T2_local[8] + p[8] * T2_local[9]) + beta * A[indices[8]]; + A[indices[9]] = alpha * (p[7] * T2_local[9] + p[8] * T2_local[8]) + beta * A[indices[9]]; + A[indices[10]] = alpha * (p[7] * T2_local[10] + p[8] * T2_local[11]) + beta * A[indices[10]]; + A[indices[11]] = alpha * (p[7] * T2_local[11] + p[8] * T2_local[10]) + beta * A[indices[11]]; + A[indices[12]] = alpha * (p[7] * T2_local[12] + p[8] * T2_local[13]) + beta * A[indices[12]]; + A[indices[13]] = alpha * (p[7] * T2_local[13] + p[8] * T2_local[12]) + beta * A[indices[13]]; + A[indices[14]] = alpha * (p[7] * T2_local[14] + p[8] * T2_local[15]) + beta * A[indices[14]]; + A[indices[15]] = alpha * (p[7] * T2_local[15] + p[8] * T2_local[14]) + beta * A[indices[15]]; + A[indices[16]] = alpha * (p[7] * T2_local[16] + p[8] * T2_local[17]) + beta * A[indices[16]]; + A[indices[17]] = alpha * (p[7] * T2_local[17] + p[8] * T2_local[16]) + beta * A[indices[17]]; + A[indices[18]] = alpha * (p[7] * T2_local[18] + p[8] * T2_local[19]) + beta * A[indices[18]]; + A[indices[19]] = alpha * (p[7] * T2_local[19] + p[8] * T2_local[18]) + beta * A[indices[19]]; + A[indices[20]] = alpha * (p[7] * T2_local[20] + p[8] * T2_local[21]) + beta * A[indices[20]]; + A[indices[21]] = alpha * (p[7] * T2_local[21] + p[8] * T2_local[20]) + beta * A[indices[21]]; + A[indices[22]] = alpha * (p[7] * T2_local[22] + p[8] * T2_local[23]) + beta * A[indices[22]]; + A[indices[23]] = alpha * (p[7] * T2_local[23] + p[8] * T2_local[22]) + beta * A[indices[23]]; + } + else if (a > b && b > c && c == d) + { + double T1_local[12]; + double T2_local[12]; + + int64_t indices[12]; + indices[0] = a * nvvv + b * nvv + c * nvir + c; + indices[1] = a * nvvv + c * nvv + b * nvir + c; + indices[2] = a * nvvv + c * nvv + c * nvir + b; + indices[3] = b * nvvv + a * nvv + c * nvir + c; + indices[4] = b * nvvv + c * nvv + a * nvir + c; + indices[5] = b * nvvv + c * nvv + c * nvir + a; + indices[6] = c * nvvv + a * nvv + b * nvir + c; + indices[7] = c * nvvv + a * nvv + c * nvir + b; + indices[8] = c * nvvv + b * nvv + a * nvir + c; + indices[9] = c * nvvv + b * nvv + c * nvir + a; + indices[10] = c * nvvv + c * nvv + a * nvir + b; + indices[11] = c * nvvv + c * nvv + b * nvir + a; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[3]] + p[2] * A[indices[8]] + p[3] * A[indices[9]]; + T1_local[1] = p[0] * A[indices[1]] + p[1] * A[indices[6]] + p[2] * A[indices[4]] + p[3] * A[indices[11]]; + T1_local[2] = p[0] * A[indices[2]] + p[1] * A[indices[7]] + p[2] * A[indices[10]] + p[3] * A[indices[5]]; + T1_local[3] = p[0] * A[indices[3]] + p[1] * A[indices[0]] + p[2] * A[indices[6]] + p[3] * A[indices[7]]; + T1_local[4] = p[0] * A[indices[4]] + p[1] * A[indices[8]] + p[2] * A[indices[1]] + p[3] * A[indices[10]]; + T1_local[5] = p[0] * A[indices[5]] + p[1] * A[indices[9]] + p[2] * A[indices[11]] + p[3] * A[indices[2]]; + T1_local[6] = p[0] * A[indices[6]] + p[1] * A[indices[1]] + p[2] * A[indices[3]] + p[3] * A[indices[6]]; + T1_local[7] = p[0] * A[indices[7]] + p[1] * A[indices[2]] + p[2] * A[indices[7]] + p[3] * A[indices[3]]; + T1_local[8] = p[0] * A[indices[8]] + p[1] * A[indices[4]] + p[2] * A[indices[0]] + p[3] * A[indices[8]]; + T1_local[9] = p[0] * A[indices[9]] + p[1] * A[indices[5]] + p[2] * A[indices[9]] + p[3] * A[indices[0]]; + T1_local[10] = p[0] * A[indices[10]] + p[1] * A[indices[10]] + p[2] * A[indices[2]] + p[3] * A[indices[4]]; + T1_local[11] = p[0] * A[indices[11]] + p[1] * A[indices[11]] + p[2] * A[indices[5]] + p[3] * A[indices[1]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[1] + p[6] * T1_local[2]; + T2_local[1] = p[4] * T1_local[1] + p[5] * T1_local[0] + p[6] * T1_local[1]; + T2_local[2] = p[4] * T1_local[2] + p[5] * T1_local[2] + p[6] * T1_local[0]; + T2_local[3] = p[4] * T1_local[3] + p[5] * T1_local[4] + p[6] * T1_local[5]; + T2_local[4] = p[4] * T1_local[4] + p[5] * T1_local[3] + p[6] * T1_local[4]; + T2_local[5] = p[4] * T1_local[5] + p[5] * T1_local[5] + p[6] * T1_local[3]; + T2_local[6] = p[4] * T1_local[6] + p[5] * T1_local[8] + p[6] * T1_local[11]; + T2_local[7] = p[4] * T1_local[7] + p[5] * T1_local[10] + p[6] * T1_local[9]; + T2_local[8] = p[4] * T1_local[8] + p[5] * T1_local[6] + p[6] * T1_local[10]; + T2_local[9] = p[4] * T1_local[9] + p[5] * T1_local[11] + p[6] * T1_local[7]; + T2_local[10] = p[4] * T1_local[10] + p[5] * T1_local[7] + p[6] * T1_local[8]; + T2_local[11] = p[4] * T1_local[11] + p[5] * T1_local[9] + p[6] * T1_local[6]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[0]) + beta * A[indices[0]]; + A[indices[1]] = alpha * (p[7] * T2_local[1] + p[8] * T2_local[2]) + beta * A[indices[1]]; + A[indices[2]] = alpha * (p[7] * T2_local[2] + p[8] * T2_local[1]) + beta * A[indices[2]]; + A[indices[3]] = alpha * (p[7] * T2_local[3] + p[8] * T2_local[3]) + beta * A[indices[3]]; + A[indices[4]] = alpha * (p[7] * T2_local[4] + p[8] * T2_local[5]) + beta * A[indices[4]]; + A[indices[5]] = alpha * (p[7] * T2_local[5] + p[8] * T2_local[4]) + beta * A[indices[5]]; + A[indices[6]] = alpha * (p[7] * T2_local[6] + p[8] * T2_local[7]) + beta * A[indices[6]]; + A[indices[7]] = alpha * (p[7] * T2_local[7] + p[8] * T2_local[6]) + beta * A[indices[7]]; + A[indices[8]] = alpha * (p[7] * T2_local[8] + p[8] * T2_local[9]) + beta * A[indices[8]]; + A[indices[9]] = alpha * (p[7] * T2_local[9] + p[8] * T2_local[8]) + beta * A[indices[9]]; + A[indices[10]] = alpha * (p[7] * T2_local[10] + p[8] * T2_local[11]) + beta * A[indices[10]]; + A[indices[11]] = alpha * (p[7] * T2_local[11] + p[8] * T2_local[10]) + beta * A[indices[11]]; + } + else if (a > b && b == c && c > d) + { + double T1_local[12]; + double T2_local[12]; + + int64_t indices[12]; + indices[0] = a * nvvv + b * nvv + b * nvir + d; + indices[1] = a * nvvv + b * nvv + d * nvir + b; + indices[2] = a * nvvv + d * nvv + b * nvir + b; + indices[3] = b * nvvv + a * nvv + b * nvir + d; + indices[4] = b * nvvv + a * nvv + d * nvir + b; + indices[5] = b * nvvv + b * nvv + a * nvir + d; + indices[6] = b * nvvv + b * nvv + d * nvir + a; + indices[7] = b * nvvv + d * nvv + a * nvir + b; + indices[8] = b * nvvv + d * nvv + b * nvir + a; + indices[9] = d * nvvv + a * nvv + b * nvir + b; + indices[10] = d * nvvv + b * nvv + a * nvir + b; + indices[11] = d * nvvv + b * nvv + b * nvir + a; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[3]] + p[2] * A[indices[5]] + p[3] * A[indices[11]]; + T1_local[1] = p[0] * A[indices[1]] + p[1] * A[indices[4]] + p[2] * A[indices[10]] + p[3] * A[indices[6]]; + T1_local[2] = p[0] * A[indices[2]] + p[1] * A[indices[9]] + p[2] * A[indices[7]] + p[3] * A[indices[8]]; + T1_local[3] = p[0] * A[indices[3]] + p[1] * A[indices[0]] + p[2] * A[indices[3]] + p[3] * A[indices[9]]; + T1_local[4] = p[0] * A[indices[4]] + p[1] * A[indices[1]] + p[2] * A[indices[9]] + p[3] * A[indices[4]]; + T1_local[5] = p[0] * A[indices[5]] + p[1] * A[indices[5]] + p[2] * A[indices[0]] + p[3] * A[indices[10]]; + T1_local[6] = p[0] * A[indices[6]] + p[1] * A[indices[6]] + p[2] * A[indices[11]] + p[3] * A[indices[1]]; + T1_local[7] = p[0] * A[indices[7]] + p[1] * A[indices[10]] + p[2] * A[indices[2]] + p[3] * A[indices[7]]; + T1_local[8] = p[0] * A[indices[8]] + p[1] * A[indices[11]] + p[2] * A[indices[8]] + p[3] * A[indices[2]]; + T1_local[9] = p[0] * A[indices[9]] + p[1] * A[indices[2]] + p[2] * A[indices[4]] + p[3] * A[indices[3]]; + T1_local[10] = p[0] * A[indices[10]] + p[1] * A[indices[7]] + p[2] * A[indices[1]] + p[3] * A[indices[5]]; + T1_local[11] = p[0] * A[indices[11]] + p[1] * A[indices[8]] + p[2] * A[indices[6]] + p[3] * A[indices[0]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[0] + p[6] * T1_local[2]; + T2_local[1] = p[4] * T1_local[1] + p[5] * T1_local[2] + p[6] * T1_local[1]; + T2_local[2] = p[4] * T1_local[2] + p[5] * T1_local[1] + p[6] * T1_local[0]; + T2_local[3] = p[4] * T1_local[3] + p[5] * T1_local[5] + p[6] * T1_local[8]; + T2_local[4] = p[4] * T1_local[4] + p[5] * T1_local[7] + p[6] * T1_local[6]; + T2_local[5] = p[4] * T1_local[5] + p[5] * T1_local[3] + p[6] * T1_local[7]; + T2_local[6] = p[4] * T1_local[6] + p[5] * T1_local[8] + p[6] * T1_local[4]; + T2_local[7] = p[4] * T1_local[7] + p[5] * T1_local[4] + p[6] * T1_local[5]; + T2_local[8] = p[4] * T1_local[8] + p[5] * T1_local[6] + p[6] * T1_local[3]; + T2_local[9] = p[4] * T1_local[9] + p[5] * T1_local[10] + p[6] * T1_local[11]; + T2_local[10] = p[4] * T1_local[10] + p[5] * T1_local[9] + p[6] * T1_local[10]; + T2_local[11] = p[4] * T1_local[11] + p[5] * T1_local[11] + p[6] * T1_local[9]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[1]) + beta * A[indices[0]]; + A[indices[1]] = alpha * (p[7] * T2_local[1] + p[8] * T2_local[0]) + beta * A[indices[1]]; + A[indices[2]] = alpha * (p[7] * T2_local[2] + p[8] * T2_local[2]) + beta * A[indices[2]]; + A[indices[3]] = alpha * (p[7] * T2_local[3] + p[8] * T2_local[4]) + beta * A[indices[3]]; + A[indices[4]] = alpha * (p[7] * T2_local[4] + p[8] * T2_local[3]) + beta * A[indices[4]]; + A[indices[5]] = alpha * (p[7] * T2_local[5] + p[8] * T2_local[6]) + beta * A[indices[5]]; + A[indices[6]] = alpha * (p[7] * T2_local[6] + p[8] * T2_local[5]) + beta * A[indices[6]]; + A[indices[7]] = alpha * (p[7] * T2_local[7] + p[8] * T2_local[8]) + beta * A[indices[7]]; + A[indices[8]] = alpha * (p[7] * T2_local[8] + p[8] * T2_local[7]) + beta * A[indices[8]]; + A[indices[9]] = alpha * (p[7] * T2_local[9] + p[8] * T2_local[9]) + beta * A[indices[9]]; + A[indices[10]] = alpha * (p[7] * T2_local[10] + p[8] * T2_local[11]) + beta * A[indices[10]]; + A[indices[11]] = alpha * (p[7] * T2_local[11] + p[8] * T2_local[10]) + beta * A[indices[11]]; + } + else if (a == b && b > c && c > d) + { + double T1_local[12]; + double T2_local[12]; + + int64_t indices[12]; + indices[0] = a * nvvv + a * nvv + c * nvir + d; + indices[1] = a * nvvv + a * nvv + d * nvir + c; + indices[2] = a * nvvv + c * nvv + a * nvir + d; + indices[3] = a * nvvv + c * nvv + d * nvir + a; + indices[4] = a * nvvv + d * nvv + a * nvir + c; + indices[5] = a * nvvv + d * nvv + c * nvir + a; + indices[6] = c * nvvv + a * nvv + a * nvir + d; + indices[7] = c * nvvv + a * nvv + d * nvir + a; + indices[8] = c * nvvv + d * nvv + a * nvir + a; + indices[9] = d * nvvv + a * nvv + a * nvir + c; + indices[10] = d * nvvv + a * nvv + c * nvir + a; + indices[11] = d * nvvv + c * nvv + a * nvir + a; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[0]] + p[2] * A[indices[6]] + p[3] * A[indices[10]]; + T1_local[1] = p[0] * A[indices[1]] + p[1] * A[indices[1]] + p[2] * A[indices[9]] + p[3] * A[indices[7]]; + T1_local[2] = p[0] * A[indices[2]] + p[1] * A[indices[6]] + p[2] * A[indices[2]] + p[3] * A[indices[11]]; + T1_local[3] = p[0] * A[indices[3]] + p[1] * A[indices[7]] + p[2] * A[indices[11]] + p[3] * A[indices[3]]; + T1_local[4] = p[0] * A[indices[4]] + p[1] * A[indices[9]] + p[2] * A[indices[4]] + p[3] * A[indices[8]]; + T1_local[5] = p[0] * A[indices[5]] + p[1] * A[indices[10]] + p[2] * A[indices[8]] + p[3] * A[indices[5]]; + T1_local[6] = p[0] * A[indices[6]] + p[1] * A[indices[2]] + p[2] * A[indices[0]] + p[3] * A[indices[9]]; + T1_local[7] = p[0] * A[indices[7]] + p[1] * A[indices[3]] + p[2] * A[indices[10]] + p[3] * A[indices[1]]; + T1_local[8] = p[0] * A[indices[8]] + p[1] * A[indices[11]] + p[2] * A[indices[5]] + p[3] * A[indices[4]]; + T1_local[9] = p[0] * A[indices[9]] + p[1] * A[indices[4]] + p[2] * A[indices[1]] + p[3] * A[indices[6]]; + T1_local[10] = p[0] * A[indices[10]] + p[1] * A[indices[5]] + p[2] * A[indices[7]] + p[3] * A[indices[0]]; + T1_local[11] = p[0] * A[indices[11]] + p[1] * A[indices[8]] + p[2] * A[indices[3]] + p[3] * A[indices[2]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[2] + p[6] * T1_local[5]; + T2_local[1] = p[4] * T1_local[1] + p[5] * T1_local[4] + p[6] * T1_local[3]; + T2_local[2] = p[4] * T1_local[2] + p[5] * T1_local[0] + p[6] * T1_local[4]; + T2_local[3] = p[4] * T1_local[3] + p[5] * T1_local[5] + p[6] * T1_local[1]; + T2_local[4] = p[4] * T1_local[4] + p[5] * T1_local[1] + p[6] * T1_local[2]; + T2_local[5] = p[4] * T1_local[5] + p[5] * T1_local[3] + p[6] * T1_local[0]; + T2_local[6] = p[4] * T1_local[6] + p[5] * T1_local[6] + p[6] * T1_local[8]; + T2_local[7] = p[4] * T1_local[7] + p[5] * T1_local[8] + p[6] * T1_local[7]; + T2_local[8] = p[4] * T1_local[8] + p[5] * T1_local[7] + p[6] * T1_local[6]; + T2_local[9] = p[4] * T1_local[9] + p[5] * T1_local[9] + p[6] * T1_local[11]; + T2_local[10] = p[4] * T1_local[10] + p[5] * T1_local[11] + p[6] * T1_local[10]; + T2_local[11] = p[4] * T1_local[11] + p[5] * T1_local[10] + p[6] * T1_local[9]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[1]) + beta * A[indices[0]]; + A[indices[1]] = alpha * (p[7] * T2_local[1] + p[8] * T2_local[0]) + beta * A[indices[1]]; + A[indices[2]] = alpha * (p[7] * T2_local[2] + p[8] * T2_local[3]) + beta * A[indices[2]]; + A[indices[3]] = alpha * (p[7] * T2_local[3] + p[8] * T2_local[2]) + beta * A[indices[3]]; + A[indices[4]] = alpha * (p[7] * T2_local[4] + p[8] * T2_local[5]) + beta * A[indices[4]]; + A[indices[5]] = alpha * (p[7] * T2_local[5] + p[8] * T2_local[4]) + beta * A[indices[5]]; + A[indices[6]] = alpha * (p[7] * T2_local[6] + p[8] * T2_local[7]) + beta * A[indices[6]]; + A[indices[7]] = alpha * (p[7] * T2_local[7] + p[8] * T2_local[6]) + beta * A[indices[7]]; + A[indices[8]] = alpha * (p[7] * T2_local[8] + p[8] * T2_local[8]) + beta * A[indices[8]]; + A[indices[9]] = alpha * (p[7] * T2_local[9] + p[8] * T2_local[10]) + beta * A[indices[9]]; + A[indices[10]] = alpha * (p[7] * T2_local[10] + p[8] * T2_local[9]) + beta * A[indices[10]]; + A[indices[11]] = alpha * (p[7] * T2_local[11] + p[8] * T2_local[11]) + beta * A[indices[11]]; + } + else if (a > b && b == c && c == d) + { + double T1_local[4]; + double T2_local[4]; + + int64_t indices[4]; + indices[0] = a * nvvv + b * nvv + b * nvir + b; + indices[1] = b * nvvv + a * nvv + b * nvir + b; + indices[2] = b * nvvv + b * nvv + a * nvir + b; + indices[3] = b * nvvv + b * nvv + b * nvir + a; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[1]] + p[2] * A[indices[2]] + p[3] * A[indices[3]]; + T1_local[1] = p[0] * A[indices[1]] + p[1] * A[indices[0]] + p[2] * A[indices[1]] + p[3] * A[indices[1]]; + T1_local[2] = p[0] * A[indices[2]] + p[1] * A[indices[2]] + p[2] * A[indices[0]] + p[3] * A[indices[2]]; + T1_local[3] = p[0] * A[indices[3]] + p[1] * A[indices[3]] + p[2] * A[indices[3]] + p[3] * A[indices[0]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[0] + p[6] * T1_local[0]; + T2_local[1] = p[4] * T1_local[1] + p[5] * T1_local[2] + p[6] * T1_local[3]; + T2_local[2] = p[4] * T1_local[2] + p[5] * T1_local[1] + p[6] * T1_local[2]; + T2_local[3] = p[4] * T1_local[3] + p[5] * T1_local[3] + p[6] * T1_local[1]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[0]) + beta * A[indices[0]]; + A[indices[1]] = alpha * (p[7] * T2_local[1] + p[8] * T2_local[1]) + beta * A[indices[1]]; + A[indices[2]] = alpha * (p[7] * T2_local[2] + p[8] * T2_local[3]) + beta * A[indices[2]]; + A[indices[3]] = alpha * (p[7] * T2_local[3] + p[8] * T2_local[2]) + beta * A[indices[3]]; + } + else if (a == b && b == c && c > d) + { + double T1_local[4]; + double T2_local[4]; + + int64_t indices[4]; + indices[0] = a * nvvv + a * nvv + a * nvir + d; + indices[1] = a * nvvv + a * nvv + d * nvir + a; + indices[2] = a * nvvv + d * nvv + a * nvir + a; + indices[3] = d * nvvv + a * nvv + a * nvir + a; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[0]] + p[2] * A[indices[0]] + p[3] * A[indices[3]]; + T1_local[1] = p[0] * A[indices[1]] + p[1] * A[indices[1]] + p[2] * A[indices[3]] + p[3] * A[indices[1]]; + T1_local[2] = p[0] * A[indices[2]] + p[1] * A[indices[3]] + p[2] * A[indices[2]] + p[3] * A[indices[2]]; + T1_local[3] = p[0] * A[indices[3]] + p[1] * A[indices[2]] + p[2] * A[indices[1]] + p[3] * A[indices[0]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[0] + p[6] * T1_local[2]; + T2_local[1] = p[4] * T1_local[1] + p[5] * T1_local[2] + p[6] * T1_local[1]; + T2_local[2] = p[4] * T1_local[2] + p[5] * T1_local[1] + p[6] * T1_local[0]; + T2_local[3] = p[4] * T1_local[3] + p[5] * T1_local[3] + p[6] * T1_local[3]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[1]) + beta * A[indices[0]]; + A[indices[1]] = alpha * (p[7] * T2_local[1] + p[8] * T2_local[0]) + beta * A[indices[1]]; + A[indices[2]] = alpha * (p[7] * T2_local[2] + p[8] * T2_local[2]) + beta * A[indices[2]]; + A[indices[3]] = alpha * (p[7] * T2_local[3] + p[8] * T2_local[3]) + beta * A[indices[3]]; + } + else if (a == b && b > c && c == d) + { + double T1_local[6]; + double T2_local[6]; + + int64_t indices[6]; + indices[0] = b * nvvv + b * nvv + c * nvir + c; + indices[1] = b * nvvv + c * nvv + b * nvir + c; + indices[2] = b * nvvv + c * nvv + c * nvir + b; + indices[3] = c * nvvv + b * nvv + b * nvir + c; + indices[4] = c * nvvv + b * nvv + c * nvir + b; + indices[5] = c * nvvv + c * nvv + b * nvir + b; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[0]] + p[2] * A[indices[3]] + p[3] * A[indices[4]]; + T1_local[1] = p[0] * A[indices[1]] + p[1] * A[indices[3]] + p[2] * A[indices[1]] + p[3] * A[indices[5]]; + T1_local[2] = p[0] * A[indices[2]] + p[1] * A[indices[4]] + p[2] * A[indices[5]] + p[3] * A[indices[2]]; + T1_local[3] = p[0] * A[indices[3]] + p[1] * A[indices[1]] + p[2] * A[indices[0]] + p[3] * A[indices[3]]; + T1_local[4] = p[0] * A[indices[4]] + p[1] * A[indices[2]] + p[2] * A[indices[4]] + p[3] * A[indices[0]]; + T1_local[5] = p[0] * A[indices[5]] + p[1] * A[indices[5]] + p[2] * A[indices[2]] + p[3] * A[indices[1]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[1] + p[6] * T1_local[2]; + T2_local[1] = p[4] * T1_local[1] + p[5] * T1_local[0] + p[6] * T1_local[1]; + T2_local[2] = p[4] * T1_local[2] + p[5] * T1_local[2] + p[6] * T1_local[0]; + T2_local[3] = p[4] * T1_local[3] + p[5] * T1_local[3] + p[6] * T1_local[5]; + T2_local[4] = p[4] * T1_local[4] + p[5] * T1_local[5] + p[6] * T1_local[4]; + T2_local[5] = p[4] * T1_local[5] + p[5] * T1_local[4] + p[6] * T1_local[3]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[0]) + beta * A[indices[0]]; + A[indices[1]] = alpha * (p[7] * T2_local[1] + p[8] * T2_local[2]) + beta * A[indices[1]]; + A[indices[2]] = alpha * (p[7] * T2_local[2] + p[8] * T2_local[1]) + beta * A[indices[2]]; + A[indices[3]] = alpha * (p[7] * T2_local[3] + p[8] * T2_local[4]) + beta * A[indices[3]]; + A[indices[4]] = alpha * (p[7] * T2_local[4] + p[8] * T2_local[3]) + beta * A[indices[4]]; + A[indices[5]] = alpha * (p[7] * T2_local[5] + p[8] * T2_local[5]) + beta * A[indices[5]]; + } + else if (a == b && b == c && c == d) + { + double T1_local[1]; + double T2_local[1]; + + int64_t indices[1]; + indices[0] = a * nvvv + a * nvv + a * nvir + a; + + T1_local[0] = p[0] * A[indices[0]] + p[1] * A[indices[0]] + p[2] * A[indices[0]] + p[3] * A[indices[0]]; + + T2_local[0] = p[4] * T1_local[0] + p[5] * T1_local[0] + p[6] * T1_local[0]; + + A[indices[0]] = alpha * (p[7] * T2_local[0] + p[8] * T2_local[0]) + beta * A[indices[0]]; + } + } +} + // Apply permutation-symmetry projection to T4 amplitudes in place. // A = beta * A + alpha * P(A) // where P(A) ijklabcd = ijklabcd + ijlkabdc + ... @@ -1256,6 +1766,38 @@ void eijkl_division_(double *r4, const double *eia, const int64_t nocc, const in } } +void eijkl_division_single_(double *r4, const double *e_occ, const double *e_vir, + const int64_t i, const int64_t j, const int64_t k, const int64_t l, const int64_t nvir) +{ + double eijkl = e_occ[i] + e_occ[j] + e_occ[k] + e_occ[l]; + +#pragma omp parallel for collapse(4) schedule(static) + for (int64_t a = 0; a < nvir; a++) + { + for (int64_t b = 0; b < nvir; b++) + { + for (int64_t c = 0; c < nvir; c++) + { + for (int64_t d = 0; d < nvir; d++) + { + int64_t r4_idx = ((a * nvir + b) * nvir + c) * nvir + d; + + double eijklabcd = eijkl - e_vir[a] - e_vir[b] - e_vir[c] - e_vir[d]; + + if (fabs(eijklabcd) > 1e-15) + { + r4[r4_idx] /= eijklabcd; + } + else + { + r4[r4_idx] = 0.0; + } + } + } + } + } +} + void t4_add_(double *t4, const double *r4, const int64_t nocc4, const int64_t nvir) { const int64_t total_size = nocc4 * nvir * nvir * nvir * nvir; @@ -1294,6 +1836,18 @@ const int64_t tp_t4[24][4] = { {3, 2, 1, 0}, }; +static inline int64_t src_idx_from_full4(const int64_t *restrict perm, int64_t v0, int64_t v1, int64_t v2, int64_t v3, int64_t nvir) +{ + int64_t src_abcd[4]; + + src_abcd[perm[0]] = v0; + src_abcd[perm[1]] = v1; + src_abcd[perm[2]] = v2; + src_abcd[perm[3]] = v3; + + return (((src_abcd[0] * nvir + src_abcd[1]) * nvir + src_abcd[2]) * nvir + src_abcd[3]); +} + // Unpack triangular-stored T4 amplitudes into a full T4 block. // // This kernel reconstructs the full permutation-expanded T4 tensor block from the compressed triangular @@ -1320,6 +1874,7 @@ void unpack_t4_tri2block_(const double *restrict t4_tri, { #define MAP(sym, w, x, y, z) map[((((sym) * nocc + (w)) * nocc + (x)) * nocc + (y)) * nocc + (z)] #define MASK(sym, w, x, y, z) mask[((((sym) * nocc + (w)) * nocc + (x)) * nocc + (y)) * nocc + (z)] +#define VIDX(a, b, c, d) ((((a) * nvir + (b)) * nvir + (c)) * nvir + (d)) #pragma omp parallel for collapse(5) schedule(dynamic) for (int64_t sym = 0; sym < 24; ++sym) @@ -1353,16 +1908,82 @@ void unpack_t4_tri2block_(const double *restrict t4_tri, { for (int64_t d = 0; d < nvir; ++d) { - int64_t abcd[4] = {a, b, c, d}; - int64_t aa = abcd[perm[0]]; - int64_t bb = abcd[perm[1]]; - int64_t cc = abcd[perm[2]]; - int64_t dd = abcd[perm[3]]; + int64_t src = src_base + src_idx_from_full4(perm, a, b, c, d, nvir); + int64_t dest = dest_base + VIDX(a, b, c, d); + + t4_blk[dest] = t4_tri[src]; + } + } + } + } + } + } + } + } + } +#undef MAP +#undef MASK +} + +// Unpack triangular-stored T4 amplitudes directly into the final block: +// +// t4_tmp + t4_tmp.transpose(0, 1, 2, 3, 5, 6, 4, 7) + t4_tmp.transpose(0, 1, 2, 3, 5, 7, 6, 4) +// +void unpack_t4_tri2block_triples_(const double *restrict t4_tri, + double *restrict t4_blk, + const int64_t *restrict map, + const bool *restrict mask, + int64_t i0, int64_t i1, + int64_t j0, int64_t j1, + int64_t k0, int64_t k1, + int64_t l0, int64_t l1, + int64_t nocc, int64_t nvir, + int64_t blk_i, int64_t blk_j, int64_t blk_k, int64_t blk_l) +{ +#define MAP(sym, w, x, y, z) map[((((sym) * nocc + (w)) * nocc + (x)) * nocc + (y)) * nocc + (z)] +#define MASK(sym, w, x, y, z) mask[((((sym) * nocc + (w)) * nocc + (x)) * nocc + (y)) * nocc + (z)] +#define VIDX(a, b, c, d) ((((a) * nvir + (b)) * nvir + (c)) * nvir + (d)) + + const int64_t nvir4 = nvir * nvir * nvir * nvir; - int64_t src_idx = src_base + ((a * nvir + b) * nvir + c) * nvir + d; - int64_t dest_idx = dest_base + ((aa * nvir + bb) * nvir + cc) * nvir + dd; +#pragma omp parallel for collapse(5) schedule(dynamic) + for (int64_t sym = 0; sym < 24; ++sym) + { + for (int64_t i = i0; i < i1; ++i) + { + for (int64_t j = j0; j < j1; ++j) + { + for (int64_t k = k0; k < k1; ++k) + { + for (int64_t l = l0; l < l1; ++l) + { + if (!MASK(sym, i, j, k, l)) + continue; + + const int64_t *perm = tp_t4[sym]; - t4_blk[dest_idx] = t4_tri[src_idx]; + const int64_t loc_i = i - i0; + const int64_t loc_j = j - j0; + const int64_t loc_k = k - k0; + const int64_t loc_l = l - l0; + + const int64_t src_base = MAP(sym, i, j, k, l) * nvir4; + + const int64_t dest_base = (((loc_i * blk_j + loc_j) * blk_k + loc_k) * blk_l + loc_l) * nvir4; + + for (int64_t a = 0; a < nvir; ++a) + { + for (int64_t b = 0; b < nvir; ++b) + { + for (int64_t c = 0; c < nvir; ++c) + { + for (int64_t d = 0; d < nvir; ++d) + { + const int64_t src0 = src_base + src_idx_from_full4(perm, a, b, c, d, nvir); + const int64_t src1 = src_base + src_idx_from_full4(perm, c, a, b, d, nvir); + const int64_t src2 = src_base + src_idx_from_full4(perm, d, a, c, b, nvir); + const int64_t dest = dest_base + VIDX(a, b, c, d); + t4_blk[dest] = t4_tri[src0] + t4_tri[src1] + t4_tri[src2]; } } } @@ -1372,6 +1993,7 @@ void unpack_t4_tri2block_(const double *restrict t4_tri, } } } +#undef VIDX #undef MAP #undef MASK } @@ -1452,3 +2074,324 @@ void accumulate_t4_block2tri_(double *restrict t4_tri, } #undef MAP } + +const int64_t swap_pairs[6][2] = { + {0, 1}, // ab + {0, 2}, // ac + {0, 3}, // ad + {1, 2}, // bc + {1, 3}, // bd + {2, 3} // cd +}; + +static inline int64_t idx4(int64_t a, int64_t b, int64_t c, int64_t d, int64_t nvir, int64_t nvv, int64_t nvvv) +{ + return a * nvvv + b * nvv + c * nvir + d; +} + +static inline void swap4(int64_t in[4], int p, int q, int64_t out[4]) +{ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + + int64_t tmp = out[p]; + out[p] = out[q]; + out[q] = tmp; +} + +static inline int same_tuple4(int64_t x[4], int64_t y[4]) +{ + return x[0] == y[0] && x[1] == y[1] && x[2] == y[2] && x[3] == y[3]; +} + +static int find_tuple4(int64_t tuples[24][4], int ntuples, int64_t target[4]) +{ + for (int i = 0; i < ntuples; i++) + { + if (same_tuple4(tuples[i], target)) + return i; + } + + fprintf(stderr, "Error: tuple not found in orbit.\n"); + return -1; +} + +static void apply_omega_local(double *y, const double *x, int64_t tuples[24][4], int ntuples) +{ + for (int i = 0; i < ntuples; i++) + { + y[i] = 0.0; + + for (int s = 0; s < 6; s++) + { + int64_t target[4]; + swap4(tuples[i], swap_pairs[s][0], swap_pairs[s][1], target); + + int j = find_tuple4(tuples, ntuples, target); + y[i] += x[j]; + } + } +} + +static int build_unique_orbit(int64_t a, int64_t b, int64_t c, int64_t d, int64_t tuples[24][4]) +{ + int64_t base[4] = {a, b, c, d}; + int ntuples = 0; + + for (int p = 0; p < 24; p++) + { + int64_t cand[4] = {base[tp_t4[p][0]], base[tp_t4[p][1]], base[tp_t4[p][2]], base[tp_t4[p][3]]}; + + int duplicate = 0; + for (int q = 0; q < ntuples; q++) + { + if (same_tuple4(tuples[q], cand)) + { + duplicate = 1; + break; + } + } + + if (!duplicate) + { + tuples[ntuples][0] = cand[0]; + tuples[ntuples][1] = cand[1]; + tuples[ntuples][2] = cand[2]; + tuples[ntuples][3] = cand[3]; + ntuples++; + } + } + return ntuples; +} + +static int s_omega_action_24[24][6]; +static int s_omega_action_24_ready = 0; + +static void init_omega_action_24(void) +{ + if (s_omega_action_24_ready) + return; + + int rev[4][4][4][4]; + for (int p = 0; p < 24; p++) + rev[tp_t4[p][0]][tp_t4[p][1]][tp_t4[p][2]][tp_t4[p][3]] = p; + + for (int p = 0; p < 24; p++) + for (int s = 0; s < 6; s++) + { + int t[4] = {tp_t4[p][0], tp_t4[p][1], tp_t4[p][2], tp_t4[p][3]}; + int i = swap_pairs[s][0], j = swap_pairs[s][1]; + int tmp = t[i]; + t[i] = t[j]; + t[j] = tmp; + s_omega_action_24[p][s] = rev[t[0]][t[1]][t[2]][t[3]]; + } + s_omega_action_24_ready = 1; +} + +static inline void apply_omega_24(double *restrict y, const double *restrict x) +{ + for (int p = 0; p < 24; p++) + y[p] = x[s_omega_action_24[p][0]] + x[s_omega_action_24[p][1]] + x[s_omega_action_24[p][2]] + x[s_omega_action_24[p][3]] + x[s_omega_action_24[p][4]] + x[s_omega_action_24[p][5]]; +} + +static inline void project_orbit_(double *restrict A, int64_t h, int64_t a, int64_t b, int64_t c, int64_t d, + int64_t nvir, int64_t nvv, int64_t nvvv, double alpha, double beta) +{ + if (a > b && b > c && c > d) + { + int64_t idx[24]; + idx[0] = a * nvvv + b * nvv + c * nvir + d; + idx[1] = a * nvvv + b * nvv + d * nvir + c; + idx[2] = a * nvvv + c * nvv + b * nvir + d; + idx[3] = a * nvvv + c * nvv + d * nvir + b; + idx[4] = a * nvvv + d * nvv + b * nvir + c; + idx[5] = a * nvvv + d * nvv + c * nvir + b; + idx[6] = b * nvvv + a * nvv + c * nvir + d; + idx[7] = b * nvvv + a * nvv + d * nvir + c; + idx[8] = b * nvvv + c * nvv + a * nvir + d; + idx[9] = b * nvvv + c * nvv + d * nvir + a; + idx[10] = b * nvvv + d * nvv + a * nvir + c; + idx[11] = b * nvvv + d * nvv + c * nvir + a; + idx[12] = c * nvvv + a * nvv + b * nvir + d; + idx[13] = c * nvvv + a * nvv + d * nvir + b; + idx[14] = c * nvvv + b * nvv + a * nvir + d; + idx[15] = c * nvvv + b * nvv + d * nvir + a; + idx[16] = c * nvvv + d * nvv + a * nvir + b; + idx[17] = c * nvvv + d * nvv + b * nvir + a; + idx[18] = d * nvvv + a * nvv + b * nvir + c; + idx[19] = d * nvvv + a * nvv + c * nvir + b; + idx[20] = d * nvvv + b * nvv + a * nvir + c; + idx[21] = d * nvvv + b * nvv + c * nvir + a; + idx[22] = d * nvvv + c * nvv + a * nvir + b; + idx[23] = d * nvvv + c * nvv + b * nvir + a; + + double x[24], x1[24], x2[24], x3[24], x4[24], y[24]; + for (int p = 0; p < 24; p++) + x[p] = A[h + idx[p]]; + + apply_omega_24(x1, x); + for (int p = 0; p < 24; p++) + x1[p] -= 6.0 * x[p]; + + apply_omega_24(x2, x1); + for (int p = 0; p < 24; p++) + x2[p] -= 2.0 * x1[p]; + + apply_omega_24(x3, x2); + apply_omega_24(x4, x3); + + for (int p = 0; p < 24; p++) + y[p] = (2.0 * x4[p] + 19.0 * x3[p] + 48.0 * x2[p]) / 576.0; + + for (int p = 0; p < 24; p++) + A[h + idx[p]] = beta * x[p] + alpha * y[p]; + } + else + { + int64_t tuples[24][4]; + int64_t indices[24]; + int perm_map[24][6]; + double x[24], x1[24], x2[24], x3[24], x4[24], y[24]; + static const int swaps[6][2] = {{0, 1}, {0, 2}, {0, 3}, {1, 2}, {1, 3}, {2, 3}}; + + int ntuples = build_unique_orbit(a, b, c, d, tuples); + + for (int p = 0; p < ntuples; p++) + for (int s = 0; s < 6; s++) + { + int64_t target[4]; + swap4(tuples[p], swaps[s][0], swaps[s][1], target); + perm_map[p][s] = find_tuple4(tuples, ntuples, target); + } + for (int p = 0; p < ntuples; p++) + { + indices[p] = idx4(tuples[p][0], tuples[p][1], tuples[p][2], tuples[p][3], nvir, nvv, nvvv); + x[p] = A[h + indices[p]]; + } + for (int p = 0; p < ntuples; p++) + { + double om = 0.0; + for (int s = 0; s < 6; s++) + om += x[perm_map[p][s]]; + x1[p] = om - 6.0 * x[p]; + } + for (int p = 0; p < ntuples; p++) + { + double om = 0.0; + for (int s = 0; s < 6; s++) + om += x1[perm_map[p][s]]; + x2[p] = om - 2.0 * x1[p]; + } + for (int p = 0; p < ntuples; p++) + { + double om = 0.0; + for (int s = 0; s < 6; s++) + om += x2[perm_map[p][s]]; + x3[p] = om; + } + for (int p = 0; p < ntuples; p++) + { + double om = 0.0; + for (int s = 0; s < 6; s++) + om += x3[perm_map[p][s]]; + x4[p] = om; + } + for (int p = 0; p < ntuples; p++) + y[p] = (2.0 * x4[p] + 19.0 * x3[p] + 48.0 * x2[p]) / 576.0; + for (int p = 0; p < ntuples; p++) + A[h + indices[p]] = beta * x[p] + alpha * y[p]; + } +} + +void t4_project_1_minus_p4_p31_inplace_(double *A, int64_t nocc4, int64_t nvir, double alpha, double beta) +{ + init_omega_action_24(); + int64_t nvv = nvir * nvir; + int64_t nvvv = nvir * nvv; + int64_t nvvvv = nvir * nvvv; + const int64_t bl = 8; + +#pragma omp parallel for schedule(static) + for (int64_t ijkl = 0; ijkl < nocc4; ijkl++) + { + int64_t h = ijkl * nvvvv; + for (int64_t a0 = 0; a0 < nvir; a0 += bl) + for (int64_t b0 = 0; b0 <= a0; b0 += bl) + for (int64_t c0 = 0; c0 <= b0; c0 += bl) + for (int64_t d0 = 0; d0 <= c0; d0 += bl) + for (int64_t a = a0; a < a0 + bl && a < nvir; a++) + for (int64_t b = b0; b < b0 + bl && b <= a; b++) + for (int64_t c = c0; c < c0 + bl && c <= b; c++) + for (int64_t d = d0; d < d0 + bl && d <= c; d++) + project_orbit_(A, h, a, b, c, d, nvir, nvv, nvvv, alpha, beta); + } +} + +void r4_tri_divide_e_(double *restrict r4_tri, const double *restrict eia, int64_t nocc, int64_t nvir) +{ + const int64_t nvir2 = nvir * nvir; + const int64_t nvir3 = nvir2 * nvir; + const int64_t nvir4 = nvir3 * nvir; + + int64_t *i_start = (int64_t *)malloc((size_t)(nocc + 1) * sizeof(int64_t)); + if (!i_start) + { + fprintf(stderr, "r4_tri_divide_e_: malloc failed\n"); + return; + } + i_start[0] = 0; + for (int64_t i = 1; i <= nocc; i++) + { + int64_t m = nocc - i + 1; + i_start[i] = i_start[i - 1] + m * (m + 1) * (m + 2) / 6; + } + +#pragma omp parallel for schedule(dynamic) + for (int64_t i = 0; i < nocc; i++) + { + int64_t idx = i_start[i]; + const double *eia_i = eia + i * nvir; + for (int64_t j = i; j < nocc; j++) + { + const double *eia_j = eia + j * nvir; + for (int64_t k = j; k < nocc; k++) + { + const double *eia_k = eia + k * nvir; + for (int64_t l = k; l < nocc; l++, idx++) + { + const double *eia_l = eia + l * nvir; + double *blk = r4_tri + idx * nvir4; + for (int64_t a = 0; a < nvir; a++) + { + double eia_ia = eia_i[a]; + for (int64_t b = 0; b < nvir; b++) + { + double eijab = eia_ia + eia_j[b]; + for (int64_t c = 0; c < nvir; c++) + { + double eijkabc = eijab + eia_k[c]; + double *ptr = blk + a * nvir3 + b * nvir2 + c * nvir; + for (int64_t d = 0; d < nvir; d++) + { + if (fabs(eijkabc + eia_l[d]) > 1e-15) + { + ptr[d] /= eijkabc + eia_l[d]; + } + else + { + ptr[d] = 0.0; + } + } + } + } + } + } + } + } + } + + free(i_start); +} diff --git a/pyscf/lib/dft/libxc_itrf.c b/pyscf/lib/dft/libxc_itrf.c index 02ebfbf91f..2c1d03df94 100644 --- a/pyscf/lib/dft/libxc_itrf.c +++ b/pyscf/lib/dft/libxc_itrf.c @@ -1036,15 +1036,19 @@ int LIBXC_max_deriv_order(const int nfunc, const xc_func_type *func) }; for (i = 0; i < nfunc; i++) { - /* find the minimum order of all functionals */ + /* find the highest order this functional supports, then keep + * the minimum across all functionals. Iterate o>=0 to also + * cover order-0 (EXC-only) functionals. */ const int flag = func[i].info->flags; - for (o = ord; o > 0; o--) { + int found = 0; + for (o = ord; o >= 0; o--) { if (flag & DERIV_FLAGS_TABLE[o]) { ord = o; + found = 1; break; } } - if (o == -1) return -1; + if (!found) return -1; } return ord; diff --git a/pyscf/lib/dft/multigrid.c b/pyscf/lib/dft/multigrid.c index 11344295bb..76c118f101 100644 --- a/pyscf/lib/dft/multigrid.c +++ b/pyscf/lib/dft/multigrid.c @@ -94,7 +94,10 @@ void init_rs_grid(RS_Grid** rs_grid, GridLevel_Info** gridlevel_info, int comp) int *mesh = gl_info->mesh; rg->data = (double**)malloc(sizeof(double*) * nlevels); for (i = 0; i < nlevels; i++) { - ngrid = mesh[i*3] * mesh[i*3+1] * mesh[i*3+2]; + // Cast to size_t before multiplying so very fine meshes + // (mesh > ~1024 on a side) do not overflow int and silently + // under-size the FFT-grid allocation. + ngrid = (size_t)mesh[i*3] * mesh[i*3+1] * mesh[i*3+2]; (rg->data)[i] = calloc(comp*ngrid, sizeof(double)); } *rs_grid = rg; @@ -171,7 +174,11 @@ double pgfpair_radius(int la, int lb, double zeta, double zetb, double* ra, doub double zetp = zeta + zetb; double eps = precision * precision; - if (rab[0] < RZERO && rab[1] < RZERO && rab[2] < RZERO) { + // Same-atom shortcut: compare the magnitude of the displacement vector, + // not the signed components. The previous "rab[0] < RZERO && ..." test + // wrongly fired on any all-negative displacement (~1/8 of periodic-image + // shifted pairs) and returned the wrong radius. + if (SQUARE(rab) < RZERO*RZERO) { radius = pgf_rcut(la+lb, zetp, 1., eps, radius); return radius; } diff --git a/pyscf/lib/dft/test/test_sparse_dot.py b/pyscf/lib/dft/test/test_sparse_dot.py index 8b948c5a61..feb216d8ab 100644 --- a/pyscf/lib/dft/test/test_sparse_dot.py +++ b/pyscf/lib/dft/test/test_sparse_dot.py @@ -162,7 +162,7 @@ def test_dot_ao_ao_case1(self): ctypes.c_int(nbins), s_index.ctypes.data_as(ctypes.c_void_p), pair_mask.ctypes.data_as(ctypes.c_void_p), ao_loc.ctypes.data_as(ctypes.c_void_p)) - self.assertAlmostEqual(abs(ref - out).max(), 0, 24) + self.assertAlmostEqual(abs(ref - out).max(), 0, 23) def test_dot_ao_ao_case2(self): np.random.seed(1) diff --git a/pyscf/lib/gto/deriv2.c b/pyscf/lib/gto/deriv2.c index e9fa27e6bf..55a02d1c91 100644 --- a/pyscf/lib/gto/deriv2.c +++ b/pyscf/lib/gto/deriv2.c @@ -46,15 +46,15 @@ void GTOshell_eval_grid_cart_deriv2(double *cgto, double *ri, double *exps, const size_t bgrids0 = (bgrids >= SIMDD) ? (bgrids+1-SIMDD) : 0; int lx, ly, lz; size_t i, j, j1, k, l1, n; - double fx0[SIMDD*16]; - double fy0[SIMDD*16]; - double fz0[SIMDD*16]; - double fx1[SIMDD*16]; - double fy1[SIMDD*16]; - double fz1[SIMDD*16]; - double fx2[SIMDD*16]; - double fy2[SIMDD*16]; - double fz2[SIMDD*16]; + double fx0[SIMDD*(LMAX+5)]; + double fy0[SIMDD*(LMAX+5)]; + double fz0[SIMDD*(LMAX+5)]; + double fx1[SIMDD*(LMAX+5)]; + double fy1[SIMDD*(LMAX+5)]; + double fz1[SIMDD*(LMAX+5)]; + double fx2[SIMDD*(LMAX+5)]; + double fy2[SIMDD*(LMAX+5)]; + double fz2[SIMDD*(LMAX+5)]; double buf[SIMDD*10]; double *gridx = coord; double *gridy = coord+BLKSIZE; @@ -220,18 +220,18 @@ void GTOshell_eval_grid_cart_deriv3(double *cgto, double *ri, double *exps, const size_t bgrids0 = (bgrids >= SIMDD) ? (bgrids+1-SIMDD) : 0; int lx, ly, lz; size_t i, j, j1, k, l1, n; - double fx0[SIMDD*16]; - double fy0[SIMDD*16]; - double fz0[SIMDD*16]; - double fx1[SIMDD*16]; - double fy1[SIMDD*16]; - double fz1[SIMDD*16]; - double fx2[SIMDD*16]; - double fy2[SIMDD*16]; - double fz2[SIMDD*16]; - double fx3[SIMDD*16]; - double fy3[SIMDD*16]; - double fz3[SIMDD*16]; + double fx0[SIMDD*(LMAX+5)]; + double fy0[SIMDD*(LMAX+5)]; + double fz0[SIMDD*(LMAX+5)]; + double fx1[SIMDD*(LMAX+5)]; + double fy1[SIMDD*(LMAX+5)]; + double fz1[SIMDD*(LMAX+5)]; + double fx2[SIMDD*(LMAX+5)]; + double fy2[SIMDD*(LMAX+5)]; + double fz2[SIMDD*(LMAX+5)]; + double fx3[SIMDD*(LMAX+5)]; + double fy3[SIMDD*(LMAX+5)]; + double fz3[SIMDD*(LMAX+5)]; double buf[SIMDD*20]; double *gridx = coord; double *gridy = coord+BLKSIZE; @@ -447,21 +447,21 @@ void GTOshell_eval_grid_cart_deriv4(double *cgto, double *ri, double *exps, const size_t bgrids0 = (bgrids >= SIMDD) ? (bgrids+1-SIMDD) : 0; int lx, ly, lz; size_t i, j, j1, k, l1, n; - double fx0[SIMDD*16]; - double fy0[SIMDD*16]; - double fz0[SIMDD*16]; - double fx1[SIMDD*16]; - double fy1[SIMDD*16]; - double fz1[SIMDD*16]; - double fx2[SIMDD*16]; - double fy2[SIMDD*16]; - double fz2[SIMDD*16]; - double fx3[SIMDD*16]; - double fy3[SIMDD*16]; - double fz3[SIMDD*16]; - double fx4[SIMDD*16]; - double fy4[SIMDD*16]; - double fz4[SIMDD*16]; + double fx0[SIMDD*(LMAX+5)]; + double fy0[SIMDD*(LMAX+5)]; + double fz0[SIMDD*(LMAX+5)]; + double fx1[SIMDD*(LMAX+5)]; + double fy1[SIMDD*(LMAX+5)]; + double fz1[SIMDD*(LMAX+5)]; + double fx2[SIMDD*(LMAX+5)]; + double fy2[SIMDD*(LMAX+5)]; + double fz2[SIMDD*(LMAX+5)]; + double fx3[SIMDD*(LMAX+5)]; + double fy3[SIMDD*(LMAX+5)]; + double fz3[SIMDD*(LMAX+5)]; + double fx4[SIMDD*(LMAX+5)]; + double fy4[SIMDD*(LMAX+5)]; + double fz4[SIMDD*(LMAX+5)]; double buf[SIMDD*35]; double *gridx = coord; double *gridy = coord+BLKSIZE; diff --git a/pyscf/lib/gto/fill_grids_int2c.c b/pyscf/lib/gto/fill_grids_int2c.c index b0e9c83b2f..76b21b9b21 100644 --- a/pyscf/lib/gto/fill_grids_int2c.c +++ b/pyscf/lib/gto/fill_grids_int2c.c @@ -18,6 +18,7 @@ #include #include +#include #include "config.h" #include "cint.h" #include "np_helper/np_helper.h" @@ -75,6 +76,11 @@ void GTOgrids_int2c(int (*intor)(), double *mat, int comp, int hermi, const int njsh = jsh1 - jsh0; const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + // The hermi-mode symmetrization loop below decodes (i, j) via + // ig/naoj and then writes to both mat[j*naoi+i] and mat[i*naoi+j], + // which only addresses the right cells when naoi == naoj and the + // slice is square. + assert(hermi == PLAIN || (ish0 == jsh0 && naoi == naoj)); const size_t cache_size = _max_cache_size(intor, shls_slice, 2, atm, natm, bas, nbas, env); const int dims[] = {naoi, naoj, ngrids}; @@ -92,17 +98,17 @@ void GTOgrids_int2c(int (*intor)(), double *mat, int comp, int hermi, // fill up only upper triangle of F-array continue; } + ish += ish0; + jsh += jsh0; + shls[0] = ish; + shls[1] = jsh; + i0 = ao_loc[ish] - ao_loc[ish0]; + j0 = ao_loc[jsh] - ao_loc[jsh0]; for (grid0 = 0; grid0 < ngrids; grid0 += BLKSIZE) { grid1 = MIN(grid0 + BLKSIZE, ngrids); - ish += ish0; - jsh += jsh0; - shls[0] = ish; - shls[1] = jsh; shls[2] = grid0; shls[3] = grid1; - i0 = ao_loc[ish] - ao_loc[ish0]; - j0 = ao_loc[jsh] - ao_loc[jsh0]; (*intor)(mat+ngrids*(j0*naoi+i0)+grid0, dims, shls, atm, natm, bas, nbas, env, opt, cache); } @@ -153,6 +159,9 @@ void GTOgrids_int2c_spinor(int (*intor)(), double complex *mat, int comp, int he const int njsh = jsh1 - jsh0; const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + // Hermi-mode symmetrization assumes a square layout (same reasoning + // as the real variant above). + assert(hermi == PLAIN || (ish0 == jsh0 && naoi == naoj)); const size_t cache_size = _max_cache_size(intor, shls_slice, 2, atm, natm, bas, nbas, env); int dims[] = {naoi, naoj, ngrids}; @@ -169,17 +178,17 @@ void GTOgrids_int2c_spinor(int (*intor)(), double complex *mat, int comp, int he if (hermi != PLAIN && ish > jsh) { continue; } + ish += ish0; + jsh += jsh0; + shls[0] = ish; + shls[1] = jsh; + i0 = ao_loc[ish] - ao_loc[ish0]; + j0 = ao_loc[jsh] - ao_loc[jsh0]; for (grid0 = 0; grid0 < ngrids; grid0 += BLKSIZE) { grid1 = MIN(grid0 + BLKSIZE, ngrids); - ish += ish0; - jsh += jsh0; - shls[0] = ish; - shls[1] = jsh; shls[2] = grid0; shls[3] = grid1; - i0 = ao_loc[ish] - ao_loc[ish0]; - j0 = ao_loc[jsh] - ao_loc[jsh0]; (*intor)(mat+ngrids*(j0*naoi+i0)+grid0, dims, shls, atm, natm, bas, nbas, env, opt, cache); } diff --git a/pyscf/lib/gto/fill_int2c.c b/pyscf/lib/gto/fill_int2c.c index 137fe8c15a..e0a5706d2b 100644 --- a/pyscf/lib/gto/fill_int2c.c +++ b/pyscf/lib/gto/fill_int2c.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "config.h" #include "cint.h" #include "np_helper/np_helper.h" @@ -44,6 +45,12 @@ void GTOint2c(int (*intor)(), double *mat, int comp, int hermi, const int njsh = jsh1 - jsh0; const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + // Hermi-mode symmetrization (NPdsymm_triu below) and the upper- + // triangle skip (ish > jsh in slice-relative indices) both assume a + // square layout with ish0 == jsh0. The per-component stride + // ic*naoi*naoi also assumes that. Calling with hermi != PLAIN on a + // rectangular or offset slice silently corrupts the matrix. + assert(hermi == PLAIN || (ish0 == jsh0 && naoi == naoj)); const int cache_size = GTOmax_cache_size(intor, shls_slice, 2, atm, natm, bas, nbas, env); #pragma omp parallel @@ -92,6 +99,9 @@ void GTOint2c_spinor(int (*intor)(), double complex *mat, int comp, int hermi, const int njsh = jsh1 - jsh0; const size_t naoi = ao_loc[ish1] - ao_loc[ish0]; const size_t naoj = ao_loc[jsh1] - ao_loc[jsh0]; + // Hermi-mode symmetrization assumes a square layout (same reasoning + // as the real variant above). + assert(hermi == PLAIN || (ish0 == jsh0 && naoi == naoj)); const int cache_size = GTOmax_cache_size(intor, shls_slice, 2, atm, natm, bas, nbas, env); diff --git a/pyscf/lib/gto/ft_ao.c b/pyscf/lib/gto/ft_ao.c index 04ba219360..76b536b86c 100644 --- a/pyscf/lib/gto/ft_ao.c +++ b/pyscf/lib/gto/ft_ao.c @@ -549,7 +549,7 @@ void GTO_Gv_orth(double *gzR, double *gzI, double fac, double aij, int *idy = idx + nGv; int *idz = idy + nGv; - double cutoff = EXPCUTOFF * aij * 4; + double cutoff = envs->expcutoff * aij * 4; double aij4 = .25 / aij; double complex fac1 = fac * envs->common_factor; int n, ix, iy, iz; @@ -636,7 +636,7 @@ void GTO_Gv_nonorth(double *gzR, double *gzI, double fac, double aij, int *idy = idx + nGv; int *idz = idy + nGv; - double cutoff = EXPCUTOFF * aij * 4; + double cutoff = envs->expcutoff * aij * 4; double aij4 = -.25 / aij; double complex fac1 = fac * envs->common_factor; int ix, iy, iz; @@ -1094,7 +1094,7 @@ if (ioff == joff) { for (i = 0; i < di; i++) { pbufR = bufR + ic * dijg + dg * (j*di+i); pbufI = bufI + ic * dijg + dg * (j*di+i); - ij = j * nj + i; + ij = j * ni + i; ji = i * nj + j; for (n = 0; n < dg; n++) { pout_ij[(ij*NGv+n)*OF_CMPLX ] += pbufR[n]; diff --git a/pyscf/lib/gto/grid_ao_drv.c b/pyscf/lib/gto/grid_ao_drv.c index fde945f325..ca9a8fa31c 100644 --- a/pyscf/lib/gto/grid_ao_drv.c +++ b/pyscf/lib/gto/grid_ao_drv.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include @@ -32,8 +33,10 @@ void GTO_screen_index(uint8_t *screen_index, int nbins, double cutoff, double *coords, int ngrids, int blksize, int *atm, int natm, int *bas, int nbas, double *env) { + // Keep nbins < 120 so si = nbins - arr*scale + 1 fits in uint8_t + // without saturating the screen_index = 255 cap below. + assert(nbins < 120); double scale = -nbins / log(MIN(cutoff, .1)); - nbins = MIN(127, nbins); #pragma omp parallel { const int nblk = (ngrids+blksize-1) / blksize; @@ -101,8 +104,15 @@ void GTO_screen_index(uint8_t *screen_index, int nbins, double cutoff, - log_coeff; } si = nbins - arr * scale; + /* screen_index is uint8: 0 = screened out, otherwise stored value + * is (raw_si + 1). Cap at 254 to keep the +1 from wrapping mod + * 256, which would silently demote a very-significant entry + * (large -arr from extremely tight AOs) to 0 = "screened out". + * Behavior for si <= 254 is unchanged. */ if (si <= 0) { screen_index[ib*nbas+bas_id] = 0; + } else if (si > 254) { + screen_index[ib*nbas+bas_id] = 255; } else { screen_index[ib*nbas+bas_id] = (uint8_t)(si + 1); } diff --git a/pyscf/lib/gto/nr_ecp.c b/pyscf/lib/gto/nr_ecp.c index 652033323a..f59ca31caa 100644 --- a/pyscf/lib/gto/nr_ecp.c +++ b/pyscf/lib/gto/nr_ecp.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include "cint.h" @@ -4686,7 +4687,11 @@ static const double _j_inv[] = { // 1/j }; void ECPsph_ine_opt(double *out, int order, double z) { - if (z < 1e-7 || z > 16) { + // The default branch below reads/writes k0[0..order+K_TAYLOR_MAX] which + // would overrun the K_TAB_COL-wide _sph_ine_tab row (and the K_TAB_COL + // local buf) when order > K_TAB_COL-K_TAYLOR_MAX-1 = 16. Fall back to + // the slow general routine for high-l basis combined with high-l ECP. + if (z < 1e-7 || z > 16 || order > K_TAB_COL - K_TAYLOR_MAX - 1) { return ECPsph_ine(out, order, z); } else { /* @@ -5457,7 +5462,7 @@ int ECPtype2_cart(double *gctr, int *shls, int *ecpbas, int necpbas, pradi = radi + ic * nrs * lilc1; pradj = radj + jc * nrs * ljlc1; for (lab = 0; lab <= li+lj; lab++, ijl++) { - if (!converged[ijl]) { + if (converged[ijl] < 2) { prur = rur + lab * nrs; prad = rad_all + ijl*d2; for (i = 0; i < d2; i++) { @@ -5474,11 +5479,20 @@ int ECPtype2_cart(double *gctr, int *shls, int *ecpbas, int necpbas, prad[i*ljlc1+j] = s; } } - for (i = 0; i < d2; i++) { - if (!CLOSE_ENOUGH(plast[i],prad[i])) { + { + int _pair_close = 1; + for (i = 0; i < d2; i++) { + if (!CLOSE_ENOUGH(plast[i],prad[i])) { + _pair_close = 0; + break; + } + } + if (_pair_close) { + converged[ijl] += 1; + if (converged[ijl] < 2) { all_conv = 0; } + } else { converged[ijl] = 0; all_conv = 0; - break; } } } @@ -5571,7 +5585,6 @@ int ECPtype_so_cart(double *gctr, int *shls, int *ecpbas, int necpbas, MARK_STACK; MALLOC_INSTACK(angi, (li+1)*nfi*(ECP_LMAX*2+1)*(li+ECP_LMAX+1)); MALLOC_INSTACK(angj, (lj+1)*nfj*(ECP_LMAX*2+1)*(lj+ECP_LMAX+1)); - MALLOC_INSTACK(buf, nfi*(ECP_LMAX*2+1)*(lj+ECP_LMAX+1)); MALLOC_INSTACK(jmm_angj, (lj+1)*nfj*(ECP_LMAX*2+1)*(lj+ECP_LMAX+1)*3); MALLOC_INSTACK(buf, nfi*(ECP_LMAX*2+1)*(lj+ECP_LMAX+1)); @@ -5613,6 +5626,19 @@ int ECPtype_so_cart(double *gctr, int *shls, int *ecpbas, int necpbas, n = ecpbas[ATOM_OF+ecploc[iloc]*BAS_SLOTS]; lc = ecp_lmax[n] + 1; } + // _angular_moment_matrix[] only has entries for lc in 0..4 + // (s..g). Higher-l SO-ECP projectors (or Ul fallbacks that push + // lc past 4) would read past the table in transform_angj and + // overflow the angi/angj/jmm_angj allocations sized assuming + // lc <= ECP_LMAX. Skip rather than crash silently. + if (lc > 4) { + fprintf(stderr, + "ECPtype_so_cart: SO-ECP projector with lc=%d " + "(atom %d) is not supported (max lc=4); " + "skipping.\n", + lc, ecpbas[ATOM_OF+ecploc[iloc]*BAS_SLOTS]); + continue; + } atm_id = ecpbas[ATOM_OF+ecploc[iloc]*BAS_SLOTS]; rc = env + atm[PTR_COORD+atm_id*ATM_SLOTS]; ecpshls = ecploc + iloc; @@ -5672,7 +5698,7 @@ int ECPtype_so_cart(double *gctr, int *shls, int *ecpbas, int necpbas, pradi = radi + ic * nrs * lilc1; pradj = radj + jc * nrs * ljlc1; for (lab = 0; lab <= li+lj; lab++, ijl++) { - if (!converged[ijl]) { + if (converged[ijl] < 2) { prur = rur + lab * nrs; prad = rad_all + ijl*d2; for (i = 0; i < d2; i++) { @@ -5689,11 +5715,20 @@ int ECPtype_so_cart(double *gctr, int *shls, int *ecpbas, int necpbas, prad[i*ljlc1+j] = s; } } - for (i = 0; i < d2; i++) { - if (!CLOSE_ENOUGH(plast[i], prad[i])) { + { + int _pair_close = 1; + for (i = 0; i < d2; i++) { + if (!CLOSE_ENOUGH(plast[i], prad[i])) { + _pair_close = 0; + break; + } + } + if (_pair_close) { + converged[ijl] += 1; + if (converged[ijl] < 2) { all_conv = 0; } + } else { converged[ijl] = 0; all_conv = 0; - break; } } } @@ -5919,7 +5954,7 @@ int ECPtype1_cart(double *gctr, int *shls, int *ecpbas, int necpbas, all_conv = 1; for (ip = 0; ip < npi; ip++) { for (jp = 0; jp < npj; jp++) { - if (!converged[ip*npj+jp]) { + if (converged[ip*npj+jp] < 2) { prad = rad_all + (ip*npj+jp)*d2; for (i = 0; i < d2; i++) { plast[i] = prad[i]; @@ -5930,12 +5965,20 @@ int ECPtype1_cart(double *gctr, int *shls, int *ecpbas, int necpbas, rij[2] = ai[ip] * rca[2] + aj[jp] * rcb[2]; type1_rad_part(prad, li+lj, sqrt(SQUARE(rij))*2, ai[ip]+aj[jp], ur, rs+start, nrs, step, cache); - converged[ip*npj+jp] = 1; - for (i = 0; i < d2; i++) { - if (!CLOSE_ENOUGH(plast[i],prad[i])) { + { + int _pair_close = 1; + for (i = 0; i < d2; i++) { + if (!CLOSE_ENOUGH(plast[i],prad[i])) { + _pair_close = 0; + break; + } + } + if (_pair_close) { + converged[ip*npj+jp] += 1; + if (converged[ip*npj+jp] < 2) { all_conv = 0; } + } else { converged[ip*npj+jp] = 0; all_conv = 0; - break; } } } @@ -6389,7 +6432,7 @@ void ECPdel_optimizer(ECPOpt **opt) free(opt0->u_ecp); } free(opt0); - opt = NULL; + *opt = NULL; } diff --git a/pyscf/lib/gto/nr_ecp.h b/pyscf/lib/gto/nr_ecp.h index 98bd122dd7..68da22e4a0 100644 --- a/pyscf/lib/gto/nr_ecp.h +++ b/pyscf/lib/gto/nr_ecp.h @@ -13,7 +13,7 @@ #define SIM_ZERO 1e-50 #define EXPCUTOFF 39 // 1e-17 #define CUTOFF 460 // ~ 1e200 -#define CLOSE_ENOUGH(x, y) (fabs(x-y) < 1e-12*fabs(y) || fabs(x-y) < 1e-12) +#define CLOSE_ENOUGH(x, y) (fabs(x-y) <= 1e-12 * fmax(fabs(x), fabs(y))) #define SQUARE(r) (r[0]*r[0]+r[1]*r[1]+r[2]*r[2]) #define CART_CUM (455+1) // upto l = 12 #define K_TAYLOR_MAX 7 diff --git a/pyscf/lib/mcscf/fci_rdm.c b/pyscf/lib/mcscf/fci_rdm.c index ef7ea2aa5d..9b81156b0b 100644 --- a/pyscf/lib/mcscf/fci_rdm.c +++ b/pyscf/lib/mcscf/fci_rdm.c @@ -149,7 +149,8 @@ static void tril_particle_symm(double *rdm2, double *tbra, double *tket, const char TRANS_T = 'T'; int nnorb = norb * norb; int i, j, k, m, n; - int blk = MIN(((int)(48/norb))*norb, nnorb); + int blk_units = 48 / norb; + int blk = MIN(MAX(blk_units, 1) * norb, nnorb); double *buf = malloc(sizeof(double) * nnorb*bcount); double *p1; @@ -309,7 +310,7 @@ void FCIrdm12kern_sf(double *rdm1, double *rdm2, double *bra, double *ket, clink_indexa, clink_indexb); if (csum > CSUMTHR) { dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf, &nnorb, - ket+stra_id*nb+strb_id, &INC1, &D1, rdm1, &INC1); + ket+stra_id*(size_t)nb+strb_id, &INC1, &D1, rdm1, &INC1); switch (symm) { case BRAKETSYM: dsyrk_(&UP, &TRANS_N, &nnorb, &bcount, @@ -366,7 +367,7 @@ void FCIrdm12kern_spin0(double *rdm1, double *rdm2, double *bra, double *ket, } if (csum > CSUMTHR) { dgemv_(&TRANS_N, &nnorb, &fill1, &D2, buf, &nnorb, - ket+stra_id*na+strb_id, &INC1, &D1, rdm1, &INC1); + ket+stra_id*(size_t)na+strb_id, &INC1, &D1, rdm1, &INC1); for (i = fill0*nnorb; i < fill1*nnorb; i++) { buf[i] *= SQRT2; @@ -417,7 +418,7 @@ void FCItdm12kern_sf(double *tdm1, double *tdm2, double *bra, double *ket, clink_indexa, clink_indexb); if (csum < CSUMTHR) { goto _normal_end; } dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf0, &nnorb, - bra+stra_id*nb+strb_id, &INC1, &D1, tdm1, &INC1); + bra+stra_id*(size_t)nb+strb_id, &INC1, &D1, tdm1, &INC1); switch (symm) { case PARTICLESYM: tril_particle_symm(tdm2, buf1, buf0, bcount, norb, D1, D1); @@ -456,7 +457,7 @@ void FCIrdm12kern_a(double *rdm1, double *rdm2, double *bra, double *ket, norb, nb, nlinka, clink_indexa); if (csum > CSUMTHR) { dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf, &nnorb, - ket+stra_id*nb+strb_id, &INC1, &D1, rdm1, &INC1); + ket+stra_id*(size_t)nb+strb_id, &INC1, &D1, rdm1, &INC1); switch (symm) { case BRAKETSYM: dsyrk_(&UP, &TRANS_N, &nnorb, &bcount, @@ -494,7 +495,7 @@ void FCIrdm12kern_b(double *rdm1, double *rdm2, double *bra, double *ket, norb, nb, nlinkb, clink_indexb); if (csum > CSUMTHR) { dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf, &nnorb, - ket+stra_id*nb+strb_id, &INC1, &D1, rdm1, &INC1); + ket+stra_id*(size_t)nb+strb_id, &INC1, &D1, rdm1, &INC1); switch (symm) { case BRAKETSYM: dsyrk_(&UP, &TRANS_N, &nnorb, &bcount, @@ -533,7 +534,7 @@ void FCItdm12kern_a(double *tdm1, double *tdm2, double *bra, double *ket, norb, nb, nlinka, clink_indexa); if (csum < CSUMTHR) { goto _normal_end; } dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf0, &nnorb, - bra+stra_id*nb+strb_id, &INC1, &D1, tdm1, &INC1); + bra+stra_id*(size_t)nb+strb_id, &INC1, &D1, tdm1, &INC1); switch (symm) { case PARTICLESYM: tril_particle_symm(tdm2, buf1, buf0, bcount, norb, D1, D1); @@ -568,7 +569,7 @@ void FCItdm12kern_b(double *tdm1, double *tdm2, double *bra, double *ket, norb, nb, nlinkb, clink_indexb); if (csum < CSUMTHR) { goto _normal_end; } dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf0, &nnorb, - bra+stra_id*nb+strb_id, &INC1, &D1, tdm1, &INC1); + bra+stra_id*(size_t)nb+strb_id, &INC1, &D1, tdm1, &INC1); switch (symm) { case PARTICLESYM: tril_particle_symm(tdm2, buf1, buf0, bcount, norb, D1, D1); diff --git a/pyscf/lib/mcscf/fci_string.c b/pyscf/lib/mcscf/fci_string.c index 5ef53b5161..e04e90314b 100644 --- a/pyscf/lib/mcscf/fci_string.c +++ b/pyscf/lib/mcscf/fci_string.c @@ -187,7 +187,9 @@ void FCIaddrs2str(uint64_t *strings, int *addrs, int count, int norb, int nelec) for (i = 0; i < count; i++) { addr = addrs[i]; if (addr == 0 || nelec == norb || nelec == 0) { - strings[i] = (1UL << nelec) - 1UL; + // 1ULL not 1UL: on LLP64 (Windows) "unsigned long" is + // 32 bits, so 1UL << nelec wraps for nelec >= 32. + strings[i] = (1ULL << nelec) - 1ULL; continue; } @@ -199,10 +201,10 @@ void FCIaddrs2str(uint64_t *strings, int *addrs, int count, int norb, int nelec) if (nelec_left == 0) { break; } else if (addr == 0) { - str1 |= (1UL << nelec_left) - 1UL; + str1 |= (1ULL << nelec_left) - 1ULL; break; } else if (nextaddr <= addr) { - str1 |= 1UL << norb_left; + str1 |= 1ULL << norb_left; addr -= nextaddr; nextaddr *= nelec_left; nextaddr /= norb_left; diff --git a/pyscf/lib/mcscf/select_ci.c b/pyscf/lib/mcscf/select_ci.c index 9486c68821..4652de6e80 100644 --- a/pyscf/lib/mcscf/select_ci.c +++ b/pyscf/lib/mcscf/select_ci.c @@ -355,6 +355,11 @@ void SCIcontract_2e_bbaa(double *eri, double *ci0, double *ci1, FCIcompress_link_tril(clinka, link_indexa, na, nlinka); FCIcompress_link_tril(clinkb, link_indexb, nb, nlinkb); + // NOTE: ci1 is intentionally NOT zeroed here. The Python wrappers + // (selected_ci.py / selected_ci_spin0.py) call this after the + // (aa|aa) and (bb|bb) SCIcontract_2e_aaaa kernels and rely on the + // (bb|aa) contribution being accumulated on top. + #pragma omp parallel { int strk, ib, blen; @@ -573,6 +578,11 @@ void SCIcontract_2e_bbaa_symm(double *eri, double *ci0, double *ci1, FCIcompress_link_tril(clinka, link_indexa, na, nlinka); FCIcompress_link_tril(clinkb, link_indexb, nb, nlinkb); + // NOTE: ci1 is intentionally NOT zeroed here. The Python wrappers + // (selected_ci_symm.py / selected_ci_spin0_symm.py) call this after + // the (aa|aa) and (bb|bb) SCIcontract_2e_aaaa_symm kernels and + // rely on accumulation. + #pragma omp parallel { int strk, ib, blen; diff --git a/pyscf/lib/misc.py b/pyscf/lib/misc.py index 746e8583e8..6b8236ffe6 100644 --- a/pyscf/lib/misc.py +++ b/pyscf/lib/misc.py @@ -22,6 +22,7 @@ import os import sys +import atexit import time import random import platform @@ -91,36 +92,89 @@ def _ldd(so_file): c_int_p = ctypes.POINTER(ctypes.c_int) c_null_ptr = ctypes.POINTER(ctypes.c_void_p) +_dll_deps = { + 'libcgto': ['libcint'], + 'libcvhf': ['libcint'], + 'libao2mo': ['libcint', 'libcvhf'], + 'libdft': ['libcvhf', 'libcgto', 'libcint'], + 'libpbc': ['libcint', 'libcgto'], + 'libri': ['libao2mo', 'libcvhf', 'libcgto', 'libcint'], + 'libxc_itrf': ['xc'], + 'libxcfun_itrf': ['xcfun'], +} + @functools.lru_cache(128) def load_library(libname): + lib = None try: _loaderpath = os.path.dirname(__file__) - return numpy.ctypeslib.load_library(libname, _loaderpath) + lib = numpy.ctypeslib.load_library(libname, _loaderpath) except OSError: + pass + + if lib is None and sys.platform == 'win32': + for env_path in [os.path.join(sys.prefix, 'Library', 'bin'), + os.path.join(sys.prefix, 'Library', 'lib')]: + try: + lib = numpy.ctypeslib.load_library(libname, env_path) + break + except OSError: + pass + + if lib is None: from pyscf import __path__ as ext_modules for path in ext_modules: libpath = os.path.join(path, 'lib') if os.path.isdir(libpath): for files in os.listdir(libpath): if files.startswith(libname): - return numpy.ctypeslib.load_library(libname, libpath) - raise + lib = numpy.ctypeslib.load_library(libname, libpath) + break + if lib is not None: + break + if lib is None: + raise OSError(f'Library {libname} not found') + + if sys.platform == 'win32' and libname in _dll_deps: + deps = [load_library(d) for d in _dll_deps[libname]] + lib = make_dll_wrapper(lib, *deps) + return lib + + +def make_dll_wrapper(lib, *fallbacks): + if sys.platform != 'win32': + return lib + class _DllWrapper: + def __init__(self, primary, *fallbacks): + object.__setattr__(self, '_primary', primary) + object.__setattr__(self, '_fallbacks', fallbacks) + def __getattr__(self, name): + for dll in (self._primary,) + self._fallbacks: + try: + return getattr(dll, name) + except AttributeError: + pass + raise AttributeError(f"function '{name}' not found") + return _DllWrapper(lib, *fallbacks) #Fixme, the standard resource module gives wrong number when objects are released # http://fa.bianp.net/blog/2013/different-ways-to-get-memory-consumption-or-lessons-learned-from-memory_profiler/#fn:1 #or use slow functions as memory_profiler._get_memory did -CLOCK_TICKS = os.sysconf("SC_CLK_TCK") -PAGESIZE = os.sysconf("SC_PAGE_SIZE") def current_memory(): '''Return the size of used memory and allocated virtual memory (in MB)''' - #import resource - #return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000 if sys.platform.startswith('linux'): + pagesize = os.sysconf("SC_PAGE_SIZE") with open("/proc/%s/statm" % os.getpid()) as f: - vms, rss = [int(x)*PAGESIZE for x in f.readline().split()[:2]] + vms, rss = [int(x) * pagesize for x in f.readline().split()[:2]] return rss/1e6, vms/1e6 else: - return 0, 0 + try: + import psutil + process = psutil.Process(os.getpid()) + mem_info = process.memory_info() + return mem_info.rss/1e6, mem_info.vms/1e6 + except (ImportError, Exception): + return 0, 0 def num_threads(n=None): '''Set the number of OMP threads. If argument is not specified, the @@ -492,7 +546,7 @@ def __enter__(self): self._contents = None self.old_stdout_fileno = sys.stdout.fileno() self.bak_stdout_fd = os.dup(self.old_stdout_fileno) - self.ftmp = tempfile.NamedTemporaryFile(dir=param.TMPDIR) + self.ftmp = NamedTemporaryFile(dir=param.TMPDIR) os.dup2(self.ftmp.file.fileno(), self.old_stdout_fileno) return self def __exit__(self, type, value, traceback): @@ -1257,6 +1311,35 @@ def __exit__(self, type, value, traceback): self.close() +def NamedTemporaryFile(*args, **kwargs): + '''Create a named temporary file object. This function wraps + `tempfile.NamedTemporaryFile`. On Windows, `delete=False` is forced + to prevent permission errors when the file is reopened by another + handle. + + Examples: + + >>> from pyscf import lib + >>> ftmp = lib.NamedTemporaryFile() + >>> ftmp.name + ''' + if sys.platform == 'win32': + kwargs['delete'] = False + f = tempfile.NamedTemporaryFile(*args, **kwargs) + if sys.platform == 'win32': + def _close_and_unlink(): + try: + f.close() + except Exception: + pass + try: + os.unlink(f.name) + except OSError: + pass + atexit.register(_close_and_unlink) + return f + + def fingerprint(a): '''Fingerprint of numpy array''' a = numpy.asarray(a) diff --git a/pyscf/lib/np_helper/condense.c b/pyscf/lib/np_helper/condense.c index 586ca80e07..56051bbd91 100644 --- a/pyscf/lib/np_helper/condense.c +++ b/pyscf/lib/np_helper/condense.c @@ -251,6 +251,9 @@ int8_t NP_all(int8_t *a, int nd, int di, int dj) uint8_t NP_Bmax(uint8_t *a, int nd, int di, int dj) { + if (di == 0 || dj == 0) { + return 0; + } int i, j; uint8_t out = a[0]; for (i = 0; i < di; i++) { @@ -262,6 +265,9 @@ uint8_t NP_Bmax(uint8_t *a, int nd, int di, int dj) int NP_imax(int *a, int nd, int di, int dj) { + if (di == 0 || dj == 0) { + return 0; + } int i, j; int out = a[0]; for (i = 0; i < di; i++) { @@ -273,6 +279,9 @@ int NP_imax(int *a, int nd, int di, int dj) float NP_fmax(float *a, int nd, int di, int dj) { + if (di == 0 || dj == 0) { + return 0.f; + } int i, j; float out = a[0]; for (i = 0; i < di; i++) { diff --git a/pyscf/lib/np_helper/pack_tril.c b/pyscf/lib/np_helper/pack_tril.c index 0859c44305..f855f09c03 100644 --- a/pyscf/lib/np_helper/pack_tril.c +++ b/pyscf/lib/np_helper/pack_tril.c @@ -217,8 +217,8 @@ void NPdunpack_tril_2d(int count, int n, double *tril, double *mat, int hermi) shared(count, n, tril, mat, hermi) { int ic; - size_t nn = n * n; - size_t n2 = n*(n+1)/2; + size_t nn = (size_t)n * n; + size_t n2 = (size_t)n*(n+1)/2; #pragma omp for schedule (static) for (ic = 0; ic < count; ic++) { NPdunpack_tril(n, tril+n2*ic, mat+nn*ic, hermi); @@ -233,8 +233,8 @@ void NPzunpack_tril_2d(int count, int n, shared(count, n, tril, mat, hermi) { int ic; - size_t nn = n * n; - size_t n2 = n*(n+1)/2; + size_t nn = (size_t)n * n; + size_t n2 = (size_t)n*(n+1)/2; #pragma omp for schedule (static) for (ic = 0; ic < count; ic++) { NPzunpack_tril(n, tril+n2*ic, mat+nn*ic, hermi); @@ -248,8 +248,8 @@ void NPdpack_tril_2d(int count, int n, double *tril, double *mat) shared(count, n, tril, mat) { int ic; - size_t nn = n * n; - size_t n2 = n*(n+1)/2; + size_t nn = (size_t)n * n; + size_t n2 = (size_t)n*(n+1)/2; #pragma omp for schedule (static) for (ic = 0; ic < count; ic++) { NPdpack_tril(n, tril+n2*ic, mat+nn*ic); @@ -263,8 +263,8 @@ void NPzpack_tril_2d(int count, int n, double complex *tril, double complex *mat shared(count, n, tril, mat) { int ic; - size_t nn = n * n; - size_t n2 = n*(n+1)/2; + size_t nn = (size_t)n * n; + size_t n2 = (size_t)n*(n+1)/2; #pragma omp for schedule (static) for (ic = 0; ic < count; ic++) { NPzpack_tril(n, tril+n2*ic, mat+nn*ic); diff --git a/pyscf/lib/np_helper/transpose.c b/pyscf/lib/np_helper/transpose.c index cb4ba042d5..265debc3f9 100644 --- a/pyscf/lib/np_helper/transpose.c +++ b/pyscf/lib/np_helper/transpose.c @@ -56,7 +56,7 @@ void NPdtranspose_021(int *shape, double *a, double *at) shared(shape, a, at) { int ic; - size_t nm = shape[1] * shape[2]; + size_t nm = (size_t)shape[1] * shape[2]; #pragma omp for schedule (static) for (ic = 0; ic < shape[0]; ic++) { NPdtranspose(shape[1], shape[2], a+ic*nm, at+ic*nm); @@ -70,7 +70,7 @@ void NPztranspose_021(int *shape, double complex *a, double complex *at) shared(shape, a, at) { int ic; - size_t nm = shape[1] * shape[2]; + size_t nm = (size_t)shape[1] * shape[2]; #pragma omp for schedule (static) for (ic = 0; ic < shape[0]; ic++) { NPztranspose(shape[1], shape[2], a+ic*nm, at+ic*nm); @@ -132,7 +132,7 @@ void NPdsymm_021_sum(int *shape, double *a, double *out, int hermi) shared(shape, a, out, hermi) { int ic; - size_t nn = shape[1] * shape[1]; + size_t nn = (size_t)shape[1] * shape[1]; #pragma omp for schedule (static) for (ic = 0; ic < shape[0]; ic++) { NPdsymm_sum(shape[1], a+ic*nn, out+ic*nn, hermi); @@ -146,7 +146,7 @@ void NPzhermi_021_sum(int *shape, double complex *a, double complex *out, int he shared(shape, a, out, hermi) { int ic; - size_t nn = shape[1] * shape[1]; + size_t nn = (size_t)shape[1] * shape[1]; #pragma omp for schedule (static) for (ic = 0; ic < shape[0]; ic++) { NPzhermi_sum(shape[1], a+ic*nn, out+ic*nn, hermi); diff --git a/pyscf/lib/numpy_helper.py b/pyscf/lib/numpy_helper.py index 276f0288cf..fd90ff4070 100644 --- a/pyscf/lib/numpy_helper.py +++ b/pyscf/lib/numpy_helper.py @@ -153,10 +153,16 @@ def contract(subscripts, A, B, alpha=1, beta=0, out=None, **kwargs): if A.size < EINSUM_MAX_SIZE or B.size < EINSUM_MAX_SIZE: return _numpy_einsum(idx_str, A, B, alpha=alpha, beta=beta, out=out) + C_dtype = numpy.result_type(A, B) if EINSUM_BACKEND == 'pytblis': + # pytblis cannot apply alpha/beta when it has to fall back to numpy + # tensordot, and it requires the output to share the IEEE type of the + # inputs. Route these cases through numpy instead. + if ((out is not None and out.dtype != C_dtype) or + numpy.result_type(C_dtype, alpha, beta) != C_dtype): + return _numpy_einsum(idx_str, A, B, alpha=alpha, beta=beta, out=out) return pytblis.contract(idx_str, A, B, alpha=alpha, beta=beta, out=out) - C_dtype = numpy.result_type(A, B) if EINSUM_BACKEND =='pyscf-tblis' and C_dtype == numpy.double: # tblis is slow for complex type return tblis_einsum.contract(idx_str, A, B, alpha=alpha, beta=beta, out=out) diff --git a/pyscf/lib/pbc/fill_ints.c b/pyscf/lib/pbc/fill_ints.c index 8c1a30fb2e..4e76098263 100644 --- a/pyscf/lib/pbc/fill_ints.c +++ b/pyscf/lib/pbc/fill_ints.c @@ -1350,7 +1350,8 @@ static int _nr2c_fill(int (*intor)(), double complex *out, int ishloc[ish1-ish0+1]; int nishloc = shloc_partition(ishloc, ao_loc, ish0, ish1, dimax); - int m, msh0, msh1, dmjc, ish, di, empty; + int m, msh0, msh1, dmjc, ish, di; + int empty = 1; int jL; int shls[2]; double *bufk_r = buf; diff --git a/pyscf/lib/pbc/fill_ints_screened.c b/pyscf/lib/pbc/fill_ints_screened.c index 5d100c7ae3..63c648daae 100644 --- a/pyscf/lib/pbc/fill_ints_screened.c +++ b/pyscf/lib/pbc/fill_ints_screened.c @@ -265,7 +265,7 @@ static void _nr3c_screened_sum_auxbas_fill_g(int (*intor)(), void (*fsort)(), do for (ksh = ksh0; ksh < ksh1; ksh++){ dk = ao_loc[ksh+1] - ao_loc[ksh]; - assert(dk < dkmax); + assert(dk <= dkmax); dijk = dij * dk; shls[2] = ksh; ksh_off = ksh - nshij; @@ -646,7 +646,7 @@ static void _nr3c1e_screened_nuc_grad_fill_g(int (*intor)(), void (*fcontract)() for (ksh = ksh0; ksh < ksh1; ksh++){ dk = ao_loc[ksh+1] - ao_loc[ksh]; - assert(dk < dkmax); + assert(dk <= dkmax); dijk = dij * dk; shls[2] = ksh; ksh_off = ksh - nbas*2; @@ -894,7 +894,8 @@ static int _nr2c_screened_fill( int ishloc[ish1-ish0+1]; int nishloc = shloc_partition(ishloc, ao_loc, ish0, ish1, dimax); - int m, msh0, msh1, dijc, dmjc, ish, di, empty; + int m, msh0, msh1, dijc, dmjc, ish, di; + int empty = 1; int jL, idx_j; int shls[2]; double *bufk_r = buf; diff --git a/pyscf/lib/pbc/hf_grad.c b/pyscf/lib/pbc/hf_grad.c index 7c781fba19..aa3ca32ede 100644 --- a/pyscf/lib/pbc/hf_grad.c +++ b/pyscf/lib/pbc/hf_grad.c @@ -67,7 +67,10 @@ void contract_vhf_dm(double* out, double* vhf, double* dm, jsh = ij % njsh + jsh0; if (nl0 != NULL) { - nimgs = ((nl0->pairs)[ish*nbas + jsh])->nimgs; + // Use nl0->njsh, not nbas: the neighbor list may have been built + // with a narrower shls_slice and indexing with nbas would read + // past the pairs allocation. + nimgs = ((nl0->pairs)[ish*nl0->njsh + jsh])->nimgs; } if (nimgs > 0) { // this shell pair has contribution p0 = ao_loc[ish] - ao_loc[ish0]; @@ -80,7 +83,10 @@ void contract_vhf_dm(double* out, double* vhf, double* dm, pdm = dm + (p0 * naoj + q0); for (ic = 0; ic < comp; ic++) { for (i = 0; i < ni; i++) { - buf[iatm*3+ic] += ddot_(&nj, pvhf+i*naoj, &I1, pdm+i*naoj, &I1); + // Stride is comp, not 3: comp is the gradient/derivative + // component count and is 3 for first derivatives but can + // be larger for higher-order routines. + buf[iatm*comp+ic] += ddot_(&nj, pvhf+i*naoj, &I1, pdm+i*naoj, &I1); } pvhf += naoi * naoj; } diff --git a/pyscf/lib/pbc/inner_dot.c b/pyscf/lib/pbc/inner_dot.c index a44458957c..4d9cd5ad99 100644 --- a/pyscf/lib/pbc/inner_dot.c +++ b/pyscf/lib/pbc/inner_dot.c @@ -252,13 +252,13 @@ void PBC_zdot_CNC_s1(double *outR, double *outI, double *aR, double *aI, } dg = ig1 - ig0; dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &D1, cR+ig0, &ng, bufR, &gsize, &D1, outR, &nc); + &D1, cR+ig0, &ng, bufR, &gsize, &D1, poutR, &nc); dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &D1, cI+ig0, &ng, bufI, &gsize, &D1, outR, &nc); + &D1, cI+ig0, &ng, bufI, &gsize, &D1, poutR, &nc); dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &D1, cR+ig0, &ng, bufI, &gsize, &D1, outI, &nc); + &D1, cR+ig0, &ng, bufI, &gsize, &D1, poutI, &nc); dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &ND1, cI+ig0, &ng, bufR, &gsize, &D1, outI, &nc); + &ND1, cI+ig0, &ng, bufR, &gsize, &D1, poutI, &nc); } } free(bufR); @@ -378,13 +378,13 @@ void PBC_zdot_CNN_s1(double *outR, double *outI, double *aR, double *aI, } dg = ig1 - ig0; dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &D1, cR+ig0, &ng, bufR, &gsize, &D1, outR, &nc); + &D1, cR+ig0, &ng, bufR, &gsize, &D1, poutR, &nc); dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &ND1, cI+ig0, &ng, bufI, &gsize, &D1, outR, &nc); + &ND1, cI+ig0, &ng, bufI, &gsize, &D1, poutR, &nc); dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &D1, cR+ig0, &ng, bufI, &gsize, &D1, outI, &nc); + &D1, cR+ig0, &ng, bufI, &gsize, &D1, poutI, &nc); dgemm_(&TRANS_T, &TRANS_N, &nc, &dab, &dg, - &D1, cI+ig0, &ng, bufR, &gsize, &D1, outI, &nc); + &D1, cI+ig0, &ng, bufR, &gsize, &D1, poutI, &nc); } } free(bufR); diff --git a/pyscf/lib/pbc/nr_direct.c b/pyscf/lib/pbc/nr_direct.c index 299340186d..6ed6711b87 100644 --- a/pyscf/lib/pbc/nr_direct.c +++ b/pyscf/lib/pbc/nr_direct.c @@ -954,8 +954,6 @@ void PBCVHF_direct_drv_nodddd( k = kl / nlsh + ksh0; l = kl % nlsh + lsh0; } - k = kl / nlsh + ksh0; - l = kl % nlsh + lsh0; qklkl_max = _max_qindex(qindex, Nbas, seg2sh[seg_loc[k]], seg2sh[seg_loc[k+1]], seg2sh[seg_loc[l]], seg2sh[seg_loc[l+1]]); diff --git a/pyscf/lib/pbc/optimizer.c b/pyscf/lib/pbc/optimizer.c index a37494ca0a..366815da80 100644 --- a/pyscf/lib/pbc/optimizer.c +++ b/pyscf/lib/pbc/optimizer.c @@ -43,7 +43,7 @@ void PBCdel_optimizer(PBCOpt **opt) if (opt0->rrcut != NULL) { free(opt0->rrcut); } - if (!opt0->rcut) { + if (opt0->rcut != NULL) { free(opt0->rcut); } free(opt0); diff --git a/pyscf/lib/pdft/nr_numint.c b/pyscf/lib/pdft/nr_numint.c index 6a14771b11..ae8cf94b2a 100644 --- a/pyscf/lib/pdft/nr_numint.c +++ b/pyscf/lib/pdft/nr_numint.c @@ -79,7 +79,7 @@ static void dot_ao_mo(double *vv, double *ao, double *mo, lenj = MIN(nmo-b0j, BOXSIZE); dgemm_(&TRANS_T, &TRANS_N, &lenj, &leni, &bgrids, &D1, mo+b0j*ngrids, &ngrids, ao+b0i*ngrids, &ngrids, - &D1, vv+b0i*nao+b0j, &nmo); + &D1, vv+b0i*nmo+b0j, &nmo); } } } } else { diff --git a/pyscf/lib/test/test_chkfile.py b/pyscf/lib/test/test_chkfile.py index 06101f3e4b..fe136177fa 100644 --- a/pyscf/lib/test/test_chkfile.py +++ b/pyscf/lib/test/test_chkfile.py @@ -15,14 +15,13 @@ import unittest import numpy -import tempfile from pyscf import lib, gto class KnownValues(unittest.TestCase): def test_save_load_mol(self): mol = gto.M(atom=[['H', (0,0,i)] for i in range(8)], basis='sto3g') - fchk = tempfile.NamedTemporaryFile() + fchk = lib.NamedTemporaryFile() lib.chkfile.save_mol(mol, fchk.name) mol1 = lib.chkfile.load_mol(fchk.name) self.assertTrue(numpy.all(mol1._atm == mol._atm)) @@ -30,7 +29,7 @@ def test_save_load_mol(self): self.assertTrue(numpy.all(mol1._env == mol._env)) def test_save_load_arrays(self): - fchk = tempfile.NamedTemporaryFile() + fchk = lib.NamedTemporaryFile() a = numpy.eye(3) lib.chkfile.save(fchk.name, 'a', a) self.assertTrue(numpy.all(a == lib.chkfile.load(fchk.name, 'a'))) diff --git a/pyscf/lib/test/test_diis.py b/pyscf/lib/test/test_diis.py index c568e84b65..0abce1377b 100644 --- a/pyscf/lib/test/test_diis.py +++ b/pyscf/lib/test/test_diis.py @@ -15,7 +15,6 @@ import unittest import numpy -import tempfile from pyscf import lib, gto def make_ab(n): @@ -51,7 +50,7 @@ def test_without_errvec(self): def test_restore(self): a, b, adiag, arest, x = make_ab(16) lib.diis.INCORE_SIZE, bak = 4, lib.diis.INCORE_SIZE - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() ad = lib.diis.DIIS(filename=ftmp.name) for i in range(8): x = (b - arest.dot(x)) / adiag diff --git a/pyscf/lib/test/test_einsum.py b/pyscf/lib/test/test_einsum.py index 4109a6b854..6d247cc2ea 100644 --- a/pyscf/lib/test/test_einsum.py +++ b/pyscf/lib/test/test_einsum.py @@ -3,6 +3,11 @@ from pyscf import lib einsum = lib.einsum +# pytblis does not support contractions between operands of different +# floating-point precision (e.g. float64 vs float32); real/complex mixing at +# the same precision is fine +_pytblis = lib.numpy_helper.EINSUM_BACKEND == 'pytblis' + def setUpModule(): global bak lib.numpy_helper.EINSUM_MAX_SIZE, bak = 0, lib.numpy_helper.EINSUM_MAX_SIZE @@ -92,6 +97,7 @@ def test_dslice_dslice1(self): self.assertTrue(c0.dtype == c1.dtype) self.assertTrue(abs(c0-c1).max() < 1e-14) + @unittest.skipIf(_pytblis, 'pytblis does not support operands of different precision') def test_d_cslice(self): a = numpy.random.random((7,1,3,4)) b = numpy.random.random((2,4,5,7)).astype(numpy.float32) @@ -100,6 +106,7 @@ def test_d_cslice(self): self.assertTrue(c0.dtype == c1.dtype) self.assertTrue(abs(c0-c1).max() < 1e-14) + @unittest.skipIf(_pytblis, 'pytblis does not support operands of different precision') def test_z_cslice(self): a = numpy.random.random((7,1,3,4)).astype(numpy.float32) + 0j b = numpy.random.random((2,4,5,7)) @@ -108,6 +115,7 @@ def test_z_cslice(self): self.assertTrue(c0.dtype == c1.dtype) self.assertTrue(abs(c0-c1).max() < 1e-14) + @unittest.skipIf(_pytblis, 'pytblis does not support operands of different precision') def test_cslice_dslice(self): a = numpy.random.random((7,1,3,4)).astype(numpy.float32) + 0j b = numpy.random.random((2,4,5,7)) diff --git a/pyscf/lib/vhf/fill_nr_s8.c b/pyscf/lib/vhf/fill_nr_s8.c index 4a8b1747e6..368799f661 100644 --- a/pyscf/lib/vhf/fill_nr_s8.c +++ b/pyscf/lib/vhf/fill_nr_s8.c @@ -92,7 +92,7 @@ static void store_ij(int (*intor)(), double *eri, double *buf, int ish, int jsh, for (i0 = ao_loc[ish], i = 0; i < di; i++, i0++) { for (j0 = ao_loc[jsh], j = 0; j < dj; j++, j0++) { if (i0 >= j0) { - ij0 = i0*(i0+1)/2 + j0; + ij0 = (size_t)i0*(i0+1)/2 + j0; peri = eri + ij0*(ij0+1)/2; pbuf = buf + nao2 * (i*dj+j); for (kl = 0, k = 0; k < i0; k++) { @@ -123,11 +123,12 @@ void GTO2e_cart_or_sph(int (*intor)(), CINTOpt *cintopt, double *eri, int *ao_lo #pragma omp parallel { - int i, j, ij; - double *buf = malloc(sizeof(double) * (di*di*nao*nao + cache_size)); + size_t i, j, ij; + double *buf = malloc(sizeof(double) * ((size_t)di*di*nao*nao + cache_size)); + size_t nshell_pairs = (size_t)nbas*(nbas+1)/2; #pragma omp for nowait schedule(dynamic, 2) - for (ij = 0; ij < nbas*(nbas+1)/2; ij++) { - i = (int)(sqrt(2*ij+.25) - .5 + 1e-7); + for (ij = 0; ij < nshell_pairs; ij++) { + i = (size_t)(sqrt(2.*ij+.25) - .5 + 1e-7); j = ij - (i*(i+1)/2); store_ij(intor, eri, buf, i, j, vhfopt, &envs); } diff --git a/pyscf/lib/vhf/hessian_screen.c b/pyscf/lib/vhf/hessian_screen.c index 94af78a770..010cb4a6a1 100644 --- a/pyscf/lib/vhf/hessian_screen.c +++ b/pyscf/lib/vhf/hessian_screen.c @@ -120,7 +120,7 @@ void CVHFnr_int2e_pp_q_cond(int (*intor)(), CINTOpt *cintopt, double *q_cond, int *ao_loc, int *atm, int natm, int *bas, int nbas, double *env) { - int nbas2 = nbas * nbas; + size_t nbas2 = (size_t)nbas * nbas; int shls_slice[] = {0, nbas}; const int cache_size = GTOmax_cache_size(intor, shls_slice, 1, atm, natm, bas, nbas, env); @@ -137,7 +137,7 @@ void CVHFnr_int2e_pp_q_cond(int (*intor)(), CINTOpt *cintopt, double *q_cond, dj = ao_loc[ish+1] - ao_loc[ish]; di = MAX(di, dj); } - double *buf = malloc(sizeof(double) * 9 * di*di*di*di); + double *buf = malloc(sizeof(double) * 9 * (size_t)di*di*di*di); double *bufx = buf; double *bufy, *bufz; #pragma omp for schedule(dynamic, 4) @@ -202,7 +202,8 @@ void CVHFgrad_jk_direct_scf_dm(CVHFOpt *opt, double *dm, int nset, int *ao_loc, free(opt->dm_cond); } nbas = opt->nbas; - opt->dm_cond = (double *)malloc(sizeof(double) * nbas*nbas); + size_t Nbas = nbas; + opt->dm_cond = (double *)malloc(sizeof(double) * Nbas*Nbas); CVHFnr_dm_cond1(opt->dm_cond, dm, nset, ao_loc, atm, natm, bas, nbas, env); } @@ -288,7 +289,7 @@ void CVHFnr_int2e_pppp_q_cond(int (*intor)(), CINTOpt *cintopt, double *q_cond, int *ao_loc, int *atm, int natm, int *bas, int nbas, double *env) { - int nbas2 = nbas * nbas; + size_t nbas2 = (size_t)nbas * nbas; int shls_slice[] = {0, nbas}; const int cache_size = GTOmax_cache_size(intor, shls_slice, 1, atm, natm, bas, nbas, env); @@ -305,7 +306,7 @@ void CVHFnr_int2e_pppp_q_cond(int (*intor)(), CINTOpt *cintopt, double *q_cond, dj = ao_loc[ish+1] - ao_loc[ish]; di = MAX(di, dj); } - double *buf = malloc(sizeof(double) * 256 * di*di*di*di); + double *buf = malloc(sizeof(double) * 256 * (size_t)di*di*di*di); double *bufxx = buf; double *bufxy, *bufxz, *bufyx, *bufyy, *bufyz, *bufzx, *bufzy, *bufzz; #pragma omp for schedule(dynamic, 4) @@ -363,7 +364,7 @@ void CVHFipip1_direct_scf(CVHFOpt *opt, int (*intor)(), CINTOpt *cintopt, size_t Nbas = nbas; size_t Nbas2 = Nbas * Nbas; // First n*n elements for derivatives, the next n*n elements for regular ERIs - opt->q_cond = (double *)malloc(sizeof(double) * nbas*nbas*2); + opt->q_cond = (double *)malloc(sizeof(double) * Nbas2*2); if (ao_loc[nbas] == CINTtot_cgto_spheric(bas, nbas)) { CVHFnr_int2e_q_cond(int2e_sph, NULL, opt->q_cond+Nbas2, ao_loc, diff --git a/pyscf/lib/vhf/nr_direct.c b/pyscf/lib/vhf/nr_direct.c index 130b40b507..c783583927 100644 --- a/pyscf/lib/vhf/nr_direct.c +++ b/pyscf/lib/vhf/nr_direct.c @@ -18,6 +18,7 @@ #include #include +#include #include #include //#include @@ -252,6 +253,11 @@ JKArray *CVHFallocate_JKArray(JKOperator *op, int *shls_slice, int *ao_loc, } jkarray->stack_size = 0; jkarray->data = malloc(sizeof(double) * (size_limit + 136*136)); + if (jkarray->data == NULL) { + fprintf(stderr, "malloc(%zu) failed in CVHFallocate_JKArray\n", + sizeof(double) * (size_limit + 136*136)); + exit(1); + } jkarray->ncomp = ncomp; int keys_max = size_limit / (AO_BLOCK_SIZE*AO_BLOCK_SIZE*ncomp); jkarray->keys_cache = malloc(sizeof(int) * keys_max); @@ -368,9 +374,9 @@ void CVHFnr_direct_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, shls_slice, ao_loc); } - size_t di = GTOmax_shell_dim(ao_loc, shls_slice, 4); - size_t cache_size = GTOmax_cache_size(intor, shls_slice, 4, - atm, natm, bas, nbas, env); + int64_t di = GTOmax_shell_dim(ao_loc, shls_slice, 4); + int64_t cache_size = GTOmax_cache_size(intor, shls_slice, 4, + atm, natm, bas, nbas, env); int ish0 = shls_slice[0]; int ish1 = shls_slice[1]; int jsh0 = shls_slice[2]; @@ -387,17 +393,23 @@ void CVHFnr_direct_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, int *block_jloc = block_iloc + nish + 1; int *block_kloc = block_jloc + njsh + 1; int *block_lloc = block_kloc + nksh + 1; - uint32_t nblock_i = CVHFshls_block_partition(block_iloc, shls_slice+0, ao_loc, AO_BLOCK_SIZE); - uint32_t nblock_j = CVHFshls_block_partition(block_jloc, shls_slice+2, ao_loc, AO_BLOCK_SIZE); - uint32_t nblock_k = CVHFshls_block_partition(block_kloc, shls_slice+4, ao_loc, AO_BLOCK_SIZE); - uint32_t nblock_l = CVHFshls_block_partition(block_lloc, shls_slice+6, ao_loc, AO_BLOCK_SIZE); - uint32_t nblock_kl = nblock_k * nblock_l; - uint32_t nblock_jkl = nblock_j * nblock_kl; + // size_t to keep nblock^3 from overflowing for large molecules. The + // same fix was applied to nr_sr_vhf.c. + size_t nblock_i = CVHFshls_block_partition(block_iloc, shls_slice+0, ao_loc, AO_BLOCK_SIZE); + size_t nblock_j = CVHFshls_block_partition(block_jloc, shls_slice+2, ao_loc, AO_BLOCK_SIZE); + size_t nblock_k = CVHFshls_block_partition(block_kloc, shls_slice+4, ao_loc, AO_BLOCK_SIZE); + size_t nblock_l = CVHFshls_block_partition(block_lloc, shls_slice+6, ao_loc, AO_BLOCK_SIZE); + size_t nblock_kl = nblock_k * nblock_l; + size_t nblock_jkl = nblock_j * nblock_kl; int nblock_max = MAX(nblock_i, nblock_j); nblock_max = MAX(nblock_max, nblock_k); nblock_max = MAX(nblock_max, nblock_l); - // up to 1.6 GB per thread - int size_limit = (200000000 - di*di*di*di*ncomp - cache_size) / n_dm; + // up to 3.2 GB per thread. + int64_t size_limit = (400000000 - di*di*di*di*ncomp - cache_size) / n_dm; + if (size_limit < 0) { + fprintf(stderr, "Insufficient memory for caching CVHFnr_direct_drv intermediates\n"); + exit(1); + } #pragma omp parallel { @@ -405,7 +417,8 @@ void CVHFnr_direct_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, int joff = ao_loc[jsh0]; int koff = ao_loc[ksh0]; int loff = ao_loc[lsh0]; - int i, j, k, l, n, r, blk_id; + int i, j, k, l, n; + size_t r, blk_id; JKArray *v_priv[n_dm]; for (i = 0; i < n_dm; i++) { v_priv[i] = CVHFallocate_JKArray(jkop[i], shls_slice, ao_loc, @@ -516,9 +529,9 @@ void CVHFnr_direct_ex_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, shls_slice, ao_loc); } - size_t di = GTOmax_shell_dim(ao_loc, shls_slice, 4); - size_t cache_size = GTOmax_cache_size(intor, shls_slice, 4, - atm, natm, bas, nbas, env); + int64_t di = GTOmax_shell_dim(ao_loc, shls_slice, 4); + int64_t cache_size = GTOmax_cache_size(intor, shls_slice, 4, + atm, natm, bas, nbas, env); int ish0 = shls_slice[0]; int ish1 = shls_slice[1]; int jsh0 = shls_slice[2]; @@ -545,8 +558,12 @@ void CVHFnr_direct_ex_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, int nblock_max = MAX(nblock_i, nblock_j); nblock_max = MAX(nblock_max, nblock_k); nblock_max = MAX(nblock_max, nblock_l); - // up to 1.6 GB per thread - int size_limit = (200000000 - di*di*di*di*ncomp - cache_size) / n_dm; + // up to 3.2 GB per thread. + int64_t size_limit = (400000000 - di*di*di*di*ncomp - cache_size) / n_dm; + if (size_limit < 0) { + fprintf(stderr, "Insufficient memory for caching CVHFnr_direct_ex_drv intermediates\n"); + exit(1); + } #pragma omp parallel { @@ -554,7 +571,8 @@ void CVHFnr_direct_ex_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, int joff = ao_loc[jsh0]; int koff = ao_loc[ksh0]; int loff = ao_loc[lsh0]; - int i, j, k, l, n, r, blk_id; + int i, j, k, l, n; + size_t r, blk_id; JKArray *v_priv[n_dm]; for (i = 0; i < n_dm; i++) { v_priv[i] = CVHFallocate_JKArray(jkop[i], shls_slice, ao_loc, diff --git a/pyscf/lib/vhf/nr_incore.c b/pyscf/lib/vhf/nr_incore.c index b3472f0262..ffcfea53fe 100644 --- a/pyscf/lib/vhf/nr_incore.c +++ b/pyscf/lib/vhf/nr_incore.c @@ -629,8 +629,8 @@ void CVHFnrs8_incore_drv(double *eri, double **dms, double **vjk, { int i, j, ic; size_t ij, off; - size_t npair = nao*(nao+1)/2; - size_t nn = nao * nao; + size_t npair = (size_t)nao*(nao+1)/2; + size_t nn = (size_t)nao * nao; double *v_priv = calloc(nn*n_dm, sizeof(double)); FjkPtr pf; double *pv; @@ -666,8 +666,8 @@ void CVHFnrs4_incore_drv(double *eri, double **dms, double **vjk, { int i, j, ic; size_t ij, off; - size_t npair = nao*(nao+1)/2; - size_t nn = nao * nao; + size_t npair = (size_t)nao*(nao+1)/2; + size_t nn = (size_t)nao * nao; double *v_priv = calloc(nn*n_dm, sizeof(double)); FjkPtr pf; double *pv; @@ -703,8 +703,8 @@ void CVHFnrs2ij_incore_drv(double *eri, double **dms, double **vjk, { int i, j, ic; size_t ij, off; - size_t npair = nao*(nao+1)/2; - size_t nn = nao * nao; + size_t npair = (size_t)nao*(nao+1)/2; + size_t nn = (size_t)nao * nao; double *v_priv = calloc(nn*n_dm, sizeof(double)); FjkPtr pf; double *pv; @@ -740,8 +740,8 @@ void CVHFnrs2kl_incore_drv(double *eri, double **dms, double **vjk, { int i, j, ic; size_t ij, off; - size_t npair = nao*(nao+1)/2; - size_t nn = nao * nao; + size_t npair = (size_t)nao*(nao+1)/2; + size_t nn = (size_t)nao * nao; double *v_priv = calloc(nn*n_dm, sizeof(double)); FjkPtr pf; double *pv; @@ -777,7 +777,7 @@ void CVHFnrs1_incore_drv(double *eri, double **dms, double **vjk, { int i, j, ic; size_t ij, off; - size_t nn = nao * nao; + size_t nn = (size_t)nao * nao; double *v_priv = calloc(nn*n_dm, sizeof(double)); FjkPtr pf; double *pv; diff --git a/pyscf/lib/vhf/nr_sgx_direct.c b/pyscf/lib/vhf/nr_sgx_direct.c index 054feed685..6570069296 100644 --- a/pyscf/lib/vhf/nr_sgx_direct.c +++ b/pyscf/lib/vhf/nr_sgx_direct.c @@ -840,19 +840,10 @@ void SGXdiagonal_ints(int (*intor)(), double *m_bi, int *ao_loc, CINTOpt *cintop double *widths, double *norms, double *vals, int nrad, double *atm_coords) { - int shls_slice[] = {0, nbas, 0, nbas}; - int di = GTOmax_shell_dim(ao_loc, shls_slice, 2); - int cache_size = _max_cache_size_sgx(intor, shls_slice, 2, - atm, natm, bas, nbas, env, - SGX_BLKSIZE); #pragma omp parallel { int ig0, ig1, dg; int ish; - int ncomp = 1; - double *buf = calloc(sizeof(double), SGX_BLKSIZE*di*di*ncomp); - double *cache = malloc(sizeof(double) * cache_size); - double *dists = malloc(sizeof(int) * SGX_BLKSIZE); const double omega = env[PTR_RANGE_OMEGA]; double *grids = env + (int) env[PTR_GRIDS]; double r; @@ -887,9 +878,6 @@ void SGXdiagonal_ints(int (*intor)(), double *m_bi, int *ao_loc, CINTOpt *cintop m_bi[ibatch * nbas + ish] = maxint; } } - free(buf); - free(cache); - free(dists); } } diff --git a/pyscf/lib/vhf/nr_sr_vhf.c b/pyscf/lib/vhf/nr_sr_vhf.c index bd4a93fda1..e80caa4c03 100644 --- a/pyscf/lib/vhf/nr_sr_vhf.c +++ b/pyscf/lib/vhf/nr_sr_vhf.c @@ -1,4 +1,5 @@ #include +#include #include #include //#include @@ -698,9 +699,9 @@ void CVHFnr_sr_direct_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, shls_slice, ao_loc); } - size_t di = GTOmax_shell_dim(ao_loc, shls_slice, 4); - size_t cache_size = GTOmax_cache_size(intor, shls_slice, 4, - atm, natm, bas, nbas, env); + int64_t di = GTOmax_shell_dim(ao_loc, shls_slice, 4); + int64_t cache_size = GTOmax_cache_size(intor, shls_slice, 4, + atm, natm, bas, nbas, env); int ish0 = shls_slice[0]; int ish1 = shls_slice[1]; int jsh0 = shls_slice[2]; @@ -717,11 +718,17 @@ void CVHFnr_sr_direct_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, assert(nksh == nish); assert(nlsh == nish); int *block_loc = malloc(sizeof(int) * (nish+1)); - uint32_t nblock = CVHFshls_block_partition(block_loc, shls_slice, ao_loc, AO_BLOCK_SIZE); - uint32_t nblock2 = nblock * nblock; - uint32_t nblock3 = nblock2 * nblock; - // up to 1.6 GB per thread - int size_limit = (200000000 - di*di*di*di*ncomp - cache_size) / n_dm; + // size_t to keep nblock3 = nblock^3 from overflowing for large molecules + // (nblock can reach ~10^3-10^4 with AO_BLOCK_SIZE=56). + size_t nblock = CVHFshls_block_partition(block_loc, shls_slice, ao_loc, AO_BLOCK_SIZE); + size_t nblock2 = nblock * nblock; + size_t nblock3 = nblock2 * nblock; + // up to 3.2 GB per thread. + int64_t size_limit = (400000000 - di*di*di*di*ncomp - cache_size) / n_dm; + if (size_limit < 0) { + fprintf(stderr, "Insufficient memory for caching CVHFnr_sr_direct_drv intermediates\n"); + exit(1); + } size_t Nbas = nbas; size_t Nbas2 = Nbas * Nbas; @@ -744,7 +751,8 @@ void CVHFnr_sr_direct_drv(int (*intor)(), void (*fdot)(), JKOperator **jkop, float log_cutoff = vhfopt->log_cutoff; float ij_cutoff, ik_cutoff, il_cutoff; float dm_max0, dm_max, log_dm; - int i, j, k, l, n, r, blk_id; + int i, j, k, l, n; + size_t r, blk_id; JKArray *v_priv[n_dm]; for (i = 0; i < n_dm; i++) { v_priv[i] = CVHFallocate_JKArray(jkop[i], shls_slice, ao_loc, diff --git a/pyscf/lib/vhf/rkb_screen.c b/pyscf/lib/vhf/rkb_screen.c index 223d1a64bb..613c55bfed 100644 --- a/pyscf/lib/vhf/rkb_screen.c +++ b/pyscf/lib/vhf/rkb_screen.c @@ -485,8 +485,13 @@ void CVHFrkbssll_direct_scf_dm(CVHFOpt *opt, double complex *dm, int nset, exit(1); } nset = nset / 4; - size_t nbas2 = nbas * nbas; + size_t nbas2 = (size_t)nbas * nbas; opt->dm_cond = (double *)malloc(sizeof(double)*nbas2*4*(1+nset)); + // CVHFrkbssll_dm_cond only writes the LL/SS/SL diagonal blocks for + // jsh<=ish; the strict upper triangle of the master slots would + // otherwise be read uninitialised by CVHFrkbssll_prescreen. Match + // the NPdset0() that the sibling CVHFrkbllll_direct_scf_dm uses. + NPdset0(opt->dm_cond, nbas2*4*(1+nset)); CVHFrkbssll_dm_cond(opt->dm_cond, dm, nset, ao_loc, atm, natm, bas, nbas, env); } diff --git a/pyscf/lib/vhf/test/test_nr_direct.py b/pyscf/lib/vhf/test/test_nr_direct.py index 0e5894a25e..c4e1c2530f 100644 --- a/pyscf/lib/vhf/test/test_nr_direct.py +++ b/pyscf/lib/vhf/test/test_nr_direct.py @@ -15,7 +15,6 @@ import os import ctypes -import _ctypes import unittest import numpy from pyscf import lib @@ -68,7 +67,7 @@ def runjk(dm1, ncomp, intorname, filldot, *namejk): dmsptr = (ctypes.c_void_p*(njk*n_dm))() vjkptr = (ctypes.c_void_p*(njk*n_dm))() for i, symb in enumerate(namejk): - f1 = ctypes.c_void_p(_ctypes.dlsym(libcvhf2._handle, symb)) + f1 = ctypes.cast(getattr(libcvhf2, symb), ctypes.c_void_p) for j in range(n_dm): dmsptr[i*n_dm+j] = dm1[j].ctypes.data_as(ctypes.c_void_p) vjkptr[i*n_dm+j] = vjk[i,j*ncomp].ctypes.data_as(ctypes.c_void_p) diff --git a/pyscf/lo/orth.py b/pyscf/lo/orth.py index 4cfe331906..5c623420c9 100644 --- a/pyscf/lo/orth.py +++ b/pyscf/lo/orth.py @@ -240,7 +240,13 @@ def ecp_ano_det_ovlp(atm_ecp, atm_ano, ecpcore): if symb in mol._basis: ano = aos[symb] else: - ano = aos[mol.atom_pure_symbol(ia)] + try: + ano = aos[mol.atom_pure_symbol(ia)] + except KeyError: + if symb in mol._ecp and mol._ecp[symb][0] == 0: + # Skip ECPs with nelec=0 (typically used for ghost atoms or custom ECPs + # in QM/MM embedding); see PR #3243 for details. + continue p0, p1 = p1, p1 + ano.shape[1] c[p0:p1,p0:p1] = ano return c diff --git a/pyscf/lo/test/test_orth.py b/pyscf/lo/test/test_orth.py index ef8ba90bd8..1e58319b5b 100644 --- a/pyscf/lo/test/test_orth.py +++ b/pyscf/lo/test/test_orth.py @@ -118,6 +118,52 @@ def test_pre_orth_ao_with_ecp(self): c0 = orth.pre_orth_ao(mol, method='ano') self.assertAlmostEqual(numpy.linalg.norm(c0), 5.9621174285790959, 9) + def test_pre_orth_ao_with_coreless_ecp(self): + mol = gto.M(atom = ''' + ghost-Cu2 2.33770 1.38257 -2.24106 + Cu0 -0.51523 1.42830 -3.17698 + Cu0 0.39700 3.33838 -1.04703 + Cu0 1.33168 3.68956 -3.87904 + O0 0.88779 2.45970 -2.58603 + O0 3.78761 0.30544 -1.89609 + O0 -0.09379 4.21706 0.49196 + O0 -1.91824 0.39690 -3.76793 + O0 1.77557 4.91941 -5.17205 + X-Cu1 5.19063 1.33684 -1.30514 + X-Cu1 4.27840 -0.57324 -3.43509 + X-Cu1 3.34372 -0.92441 -0.60308 + X-Cu1 1.30922 5.24846 1.08291 + X-Cu1 3.17859 5.95081 -4.58110 + X-Cu1 -1.42745 -0.48178 -5.30692 + X-Cu1 2.26636 4.04073 -6.71105 + X-Cu1 -0.53769 2.98721 1.78498 + X-Cu1 -2.36214 -0.83295 -2.47491 + X-Cu1 0.32566 5.99654 -5.51702 + X-Cu1 -1.54371 5.29420 0.14700 + X-Cu1 -3.36816 1.47403 -4.11289 + ''', + verbose=0, + spin=1, + charge=-6, + basis={'Cu0': 'cc-pvdz', 'O0': 'cc-pvdz', 'ghost-Cu2': gto.basis.load('cc-pvdz', 'Cu')}, + ecp={'Cu': 'cc-pvdz-pp', + 'X-Cu1': gto.basis.parse_ecp(''' + Cu nelec 0 + Cu ul + 2 1.000000000 0.000000000 + Cu S + 2 30.220000000 355.770158000 + 2 13.190000000 70.865357000 + Cu P + 2 33.130000000 233.891976000 + 2 13.220000000 53.947299000 + Cu D + 2 38.420000000 -31.272165000 + 2 13.260000000 -2.741104000 + ''')}) + + c = orth.pre_orth_ao(mol, method='ano') + self.assertAlmostEqual(numpy.linalg.norm(c), 35.14205617894, 9) if __name__ == "__main__": print("Test orth") diff --git a/pyscf/mcpdft/lpdft.py b/pyscf/mcpdft/lpdft.py index 34db9432e3..a04985f348 100644 --- a/pyscf/mcpdft/lpdft.py +++ b/pyscf/mcpdft/lpdft.py @@ -686,7 +686,7 @@ def linear_multi_state(mc, weights=(0.5, 0.5), **kwargs): mc = mc.state_average(weights=weights, **kwargs) else: - base_name = mc.__class__.bases__[0].__name__ + base_name = mc.__class__.__bases__[0].__name__ mcbase_class = mc.__class__ @@ -724,7 +724,7 @@ def linear_multi_state_mix(mc, fcisolvers, weights=(0.5, 0.5), **kwargs): raise RuntimeError("already a StateAverageMCSCF solver") else: - base_name = mc.__class__.bases__[0].__name__ + base_name = mc.__class__.__bases__[0].__name__ mcbase_class = mc.__class__ diff --git a/pyscf/mcpdft/otfnal.py b/pyscf/mcpdft/otfnal.py index a24483f812..406030be7e 100644 --- a/pyscf/mcpdft/otfnal.py +++ b/pyscf/mcpdft/otfnal.py @@ -35,6 +35,8 @@ OT_ALIAS = { 'MC23': 'tMC23', 'MC25': 'tMC25', + 'MC26': 'tMC26', + 'COF26': 'tCOF26', } OT_HYB_ALIAS = {'PBE0' : '0.25*HF + 0.75*PBE, 0.25*HF + 0.75*PBE', } @@ -86,6 +88,82 @@ 'hyb': (0.28, 0.28, 0), 'facs': (0.72, 0.72) }, + + # MC26 = a0*E_CAS + E_xc[rep-M06L] + # Y. Chen, D. Zhang, D. G. Truhlar, and X. He + # Pushing the accuracy of on-top functionals with agent-driven + # supervised learning, arXiv:2605.06215 (2026). + # https://arxiv.org/abs/2605.06215 + 'MC26': { + 'xc_base': 'M06L', + 'ext_params': { + 203: np.array([12.793598175048828, 1.0464407205581665, -1.1021970510482788, + -1.4680061340332031, 1.0868027210235596, 11.653898239135742, + -3.4057228565216064, -20.206926345825195, -1.7893168926239014, + 14.40688705444336, 1.7784547805786133, -0.3958134949207306, + -12.139795303344727, -0.0605972521007061, 0.016891608014702797, + -7.153533806558698e-05, 0.0001199805992655456, 0.0]), + 233: np.array([0.06, 0.0031, 0.00515088, 0.00304966, + -0.6178147196769714, 8.792010307312012, -8.655962944030762, + 15.397195816040039, -9.685625076293945, 2.904688835144043, + -0.982710599899292, 1.7047909498214722, -1.9396733045578003, + -5.875694274902344, 1.1270228624343872, -0.29264968633651733, + 0.10097602754831314, 0.002418402349576354, + -0.0004997584619559348, 0.0, -1.0493528842926025, + -0.03480437397956848, 0.01626494713127613, + 7.84311632742174e-05, 0.000405816943384707, 0.0, 1e-10]), + }, + 'hyb': (0.278090700064691, 0.278090700064691, 0), + }, + + # COF26 = a0*E_CAS + E_xc[rep-M06L + rep-MN15L] + # Y. Chen, D. Zhang, D. G. Truhlar, and X. He + # Pushing the accuracy of on-top functionals with agent-driven + # supervised learning, arXiv:2605.06215 (2026). + # https://arxiv.org/abs/2605.06215 + 'COF26': { + 'xc_base': 'MGGA_X_M06_L + MGGA_X_MN15_L, MGGA_C_M06_L + MGGA_C_MN15_L', + 'ext_params': { + 203: np.array([4.46751594543457, -0.620290219783783, -0.02489340677857399, + -1.9508483409881592, 3.8321266174316406, -4.5821146965026855, + -5.959300518035889, -0.26544812321662903, -1.444387435913086, + 0.7572097778320312, 3.510108470916748, -1.1088151931762695, + -3.569631576538086, -0.06943392008543015, 0.042370155453681946, + 7.512031879741699e-05, -0.000407030078349635, 0.0]), + 233: np.array([0.06, 0.0031, 0.00515088, 0.00304966, + -4.060972213745117, 8.054978370666504, 0.16315306723117828, + 0.20903074741363525, 1.67588472366333, 0.837023913860321, + -1.3942575454711914, -2.884153366088867, -0.7865201830863953, + 5.253849029541016, -6.900444984436035, -0.07099238783121109, + -0.9084649085998535, -2.1175485017010942e-05, + 0.011801144108176231, 0.0, 1.2753486633300781, + -0.022736486047506332, 0.09527082741260529, + 0.000708779611159116, -0.0018802996492013335, 0.0, 1e-10]), + 260: np.array([1.5309321880340576, -0.5386894345283508, 0.2505153715610504, + 4.978420257568359, -5.5219902992248535, 6.497469425201416, + 3.688972234725952, -0.6701527833938599, -0.7988651394844055, + -7.4512176513671875, 10.058389663696289, 2.617449998855591, + -4.1134748458862305, -4.58927059173584, 2.2586185932159424, + -8.232332229614258, 4.996926307678223, -4.7641282081604, + -2.3733041286468506, 4.265657424926758, -6.0180840492248535, + -6.202260494232178, 6.2710113525390625, 5.919536590576172, + -0.17825216054916382, -7.480823516845703, 6.210508823394775, + 3.045118570327759, -1.476043462753296, -6.93911075592041, + 1.2295597791671753, -5.026687145233154, 11.215118408203125, + 2.8131494522094727, 5.998229503631592, -2.111699104309082, + -10.391032218933105, -0.4673156142234802, 3.2028167247772217, + -8.067900657653809]), + 261: np.array([-0.642463207244873, -0.9184160828590393, 6.772172451019287, + -9.329075813293457, 0.7022364139556885, -1.3836524486541748, + 11.549406051635742, -0.8307218551635742, 5.020711421966553, + -0.16478510200977325, 1.7352665662765503, -1.243597149848938, + 4.824436187744141, -3.134183645248413, 0.6350889801979065, + -7.111184597015381, 3.5491936206817627, -2.827716112136841, + 5.681900501251221, -4.908012866973877, 6.956517696380615, + -4.321927070617676, 4.578726768493652, -1.5277433395385742]), + }, + 'hyb': (0.30959611760805744, 0.30959611760805744, 0), + }, } def register_otfnal(xc_code, preset): diff --git a/pyscf/mcpdft/test/test_lpdft.py b/pyscf/mcpdft/test/test_lpdft.py index 71be3fb345..655d9c897c 100644 --- a/pyscf/mcpdft/test/test_lpdft.py +++ b/pyscf/mcpdft/test/test_lpdft.py @@ -15,7 +15,7 @@ # # Author: Matthew Hennefarth -import tempfile, h5py +import h5py import numpy as np from pyscf import gto, scf, dft, fci, lib from pyscf import mcpdft @@ -57,9 +57,9 @@ def get_water(functional='tpbe', basis='6-31g'): solver2.spin = 2 mc = mcpdft.CASSCF(mf, functional, 4, 4, grids_level=1) - mc.chkfile = tempfile.NamedTemporaryFile().name # mc.chk_ci = True mc = mc.multi_state_mix([solver1, solver2], weights, "lin") + mc.chkfile = lib.NamedTemporaryFile().name mc.run() return mc @@ -82,9 +82,9 @@ def get_water_triplet(functional='tPBE', basis="6-31G"): solver2.nroots = 2 mc = mcpdft.CASSCF(mf, functional, 4, 4, grids_level=1) - mc.chkfile = tempfile.NamedTemporaryFile().name # mc.chk_ci = True mc = mc.multi_state_mix([solver1, solver2], weights, "lin") + mc.chkfile = lib.NamedTemporaryFile().name mc.run() return mc diff --git a/pyscf/mcpdft/test/test_mcpdft.py b/pyscf/mcpdft/test/test_mcpdft.py index 1219d3b278..f96c66a4ef 100644 --- a/pyscf/mcpdft/test/test_mcpdft.py +++ b/pyscf/mcpdft/test/test_mcpdft.py @@ -30,7 +30,7 @@ # Some assertAlmostTrue thresholds are loose because we are only # trying to test the API here; we need tight convergence and grids # to reproduce well when OMP is on. -import tempfile, h5py +import h5py import numpy as np from pyscf import gto, scf, mcscf, lib, fci, dft from pyscf import mcpdft @@ -46,13 +46,16 @@ def auto_setup(xyz="Li 0 0 0\nH 1.5 0 0", fnal="tPBE"): atom=xyz, basis="sto3g", symmetry=True, verbose=0, output="/dev/null" ) mf_nosym = scf.RHF(mol_nosym).run(conv_tol=1e-12) - mc_nosym = mcscf.CASSCF(mf_nosym, 5, 2).run(conv_tol=1e-8) + mc_nosym = mcscf.CASSCF(mf_nosym, 5, 2) mf_sym = scf.RHF(mol_sym).run() mc_sym = mcscf.CASSCF(mf_sym, 5, 2).run(conv_tol=1e-8) + mc_nosym.run (mo_coeff=mc_sym.mo_coeff, + ci=mc_sym.ci, + conv_tol=1e-8) mcp_ss_nosym = mcpdft.CASSCF(mc_nosym, fnal, 5, 2).run(conv_tol=1e-8) mcp_ss_sym = ( mcpdft.CASSCF(mc_sym, fnal, 5, 2) - .set(chkfile=tempfile.NamedTemporaryFile().name)#, chk_ci=True) + .set(chkfile=lib.NamedTemporaryFile().name, chk_ci=True) .run(conv_tol=1e-8) ) mcp_sa_0 = mcp_ss_nosym.state_average( @@ -60,7 +63,7 @@ def auto_setup(xyz="Li 0 0 0\nH 1.5 0 0", fnal="tPBE"): 1.0 / 5, ] * 5 - ).run(conv_tol=1e-8) + ) solver_S = fci.solver(mol_nosym, singlet=True).set(spin=0, nroots=2) solver_T = fci.solver(mol_nosym, singlet=False).set(spin=2, nroots=3) mcp_sa_1 = ( @@ -72,7 +75,6 @@ def auto_setup(xyz="Li 0 0 0\nH 1.5 0 0", fnal="tPBE"): * 5, ) .set(ci=None) - .run(conv_tol=1e-8) ) solver_A1 = fci.solver(mol_sym).set(wfnsym="A1", nroots=3) solver_E1x = fci.solver(mol_sym).set(wfnsym="E1x", nroots=1, spin=2) @@ -85,9 +87,13 @@ def auto_setup(xyz="Li 0 0 0\nH 1.5 0 0", fnal="tPBE"): ] * 5, ) - .set(ci=None, chkfile=tempfile.NamedTemporaryFile().name)#, chk_ci=True) + .set(ci=None, chkfile=lib.NamedTemporaryFile().name, chk_ci=True) .run(conv_tol=1e-8) ) + mcp_sa_1.run (mo_coeff=mcp_sa_2.mo_coeff, + conv_tol=1e-8) + mcp_sa_0.run (mo_coeff=mcp_sa_2.mo_coeff, + conv_tol=1e-8) mcp = [[mcp_ss_nosym, mcp_ss_sym], [mcp_sa_0, mcp_sa_1, mcp_sa_2]] nosym = [mol_nosym, mf_nosym, mc_nosym] sym = [mol_sym, mf_sym, mc_sym] diff --git a/pyscf/mcscf/test/test_h2o.py b/pyscf/mcscf/test/test_h2o.py index b91fa08739..f226f05e32 100644 --- a/pyscf/mcscf/test/test_h2o.py +++ b/pyscf/mcscf/test/test_h2o.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile import numpy from pyscf import gto from pyscf import scf @@ -200,7 +199,7 @@ def test_chkfile_mixed(self): * 4, ) mo = mc.sort_mo([4, 5, 6, 10], base=1) - mc.chkfile = tempfile.NamedTemporaryFile().name + mc.chkfile = lib.NamedTemporaryFile().name mc.chk_ci = True mc.kernel(mo) self.assertAlmostEqual(mc.e_tot, mc_ref.e_tot, 8) diff --git a/pyscf/mcscf/test/test_mc1step.py b/pyscf/mcscf/test/test_mc1step.py index a89b8f08c5..9cb694ea23 100644 --- a/pyscf/mcscf/test/test_mc1step.py +++ b/pyscf/mcscf/test/test_mc1step.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile import numpy import h5py from pyscf import lib @@ -37,7 +36,7 @@ def setUpModule(): ) m = scf.RHF(mol) m.conv_tol = 1e-10 - m.chkfile = tempfile.NamedTemporaryFile().name + m.chkfile = lib.NamedTemporaryFile().name m.scf() mc0 = mcscf.CASSCF(m, 4, 4).run() @@ -51,8 +50,8 @@ def setUpModule(): symmetry = True ) msym = scf.RHF(molsym) - msym.chkfile = tempfile.NamedTemporaryFile().name msym.conv_tol = 1e-10 + msym.chkfile = lib.NamedTemporaryFile().name msym.scf() def tearDownModule(): diff --git a/pyscf/mcscf/test/test_umc1step.py b/pyscf/mcscf/test/test_umc1step.py index afbca92483..37d3cded85 100644 --- a/pyscf/mcscf/test/test_umc1step.py +++ b/pyscf/mcscf/test/test_umc1step.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile import numpy from pyscf import lib from pyscf import gto @@ -48,8 +47,8 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_ucasscf(self): - with tempfile.NamedTemporaryFile() as f: - mc = mcscf.UCASSCF(m, 4, 4) + mc = mcscf.UCASSCF(m, 4, 4) + with lib.NamedTemporaryFile() as f: mc.chkfile = f.name mc.run() self.assertAlmostEqual(mc.e_tot, -75.7460662487894, 6) diff --git a/pyscf/mp/__init__.py b/pyscf/mp/__init__.py index 33b1ee1522..023ca79f37 100644 --- a/pyscf/mp/__init__.py +++ b/pyscf/mp/__init__.py @@ -23,6 +23,7 @@ from pyscf.mp import dfump2 from pyscf.mp import gmp2 from pyscf.mp import dfgmp2 +from pyscf.mp import cabs def MP2(mf, frozen=None, mo_coeff=None, mo_occ=None): if mf.istype('UHF'): diff --git a/pyscf/mp/cabs.py b/pyscf/mp/cabs.py new file mode 100644 index 0000000000..4122730444 --- /dev/null +++ b/pyscf/mp/cabs.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python +# Copyright 2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Complementary auxiliary basis set (CABS). + +Refs: +* JCC 127, 221106 (2007); DOI:10.1063/1.2817618 +* JCP 128, 154103 (2008); DOI:10.1063/1.2889388 +""" + +import numpy +import scipy.linalg + +from pyscf import gto, scf +from pyscf.data import elements +from pyscf.lib import logger +from pyscf.scf import hf + + +def find_cabs(mol, auxmol, lindep=1e-8): + """Project an auxiliary basis to the complement of the orbital basis.""" + cabs_mol = gto.conc_mol(mol, auxmol) + nao = mol.nao_nr() + s = cabs_mol.intor_symmetric('int1e_ovlp') + + ls12 = scipy.linalg.solve(s[:nao, :nao], s[:nao, nao:], assume_a='pos') + s[nao:, nao:] -= s[nao:, :nao].dot(ls12) + w, v = scipy.linalg.eigh(s[nao:, nao:]) + c2 = v[:, w > lindep] / numpy.sqrt(w[w > lindep]) + c1 = ls12.dot(c2) + return cabs_mol, numpy.vstack((-c1, c2)) + + +def make_cabs_auxmol(mol, auxbasis): + """Build a basis-only Mole object for the CABS basis. + + The auxiliary functions must sit on the molecular centers, but they should + not add another copy of the nuclear attraction operator when the OBS and + CABS spaces are concatenated for one-electron matrix elements. + """ + auxmol = mol.copy() + auxmol.basis = auxbasis + auxmol.build(False, False) + auxmol._atm[:, gto.CHARGE_OF] = 0 + auxmol._ecpbas = auxmol._ecpbas[:0] + return auxmol + + +def _as_cabs_auxmol(mol, auxmol_or_basis): + if isinstance(auxmol_or_basis, gto.MoleBase): + auxmol = auxmol_or_basis + if not auxmol._built: + auxmol.build(False, False) + if ( + auxmol.natm == mol.natm + and numpy.linalg.norm(auxmol.atom_coords() - mol.atom_coords()) < 1e-10 + and numpy.linalg.norm(auxmol.atom_charges()) > 1e-12 + ): + return make_cabs_auxmol(mol, auxmol._basis) + return auxmol + return make_cabs_auxmol(mol, auxmol_or_basis) + + +def _frozen_mask(mol, mo_occ, frozen): + mask = numpy.zeros(mo_occ.size, dtype=bool) + if frozen is None: + return mask + + if isinstance(frozen, str): + scheme = frozen.lower() + if scheme == 'chemcore': + frozen = elements.chemcore(mol) + elif scheme == 'none': + frozen = 0 + else: + raise ValueError(f'Unsupported CABS frozen orbital scheme {frozen!r}') + + if isinstance(frozen, (bool, numpy.bool_)): + raise TypeError('CABS frozen orbitals must be specified as an int, sequence, tuple, or named scheme') + if isinstance(frozen, (int, numpy.integer)): + mask[:frozen] = True + else: + mask[numpy.asarray(frozen, dtype=int)] = True + return mask + + +def _active_masks(mol, mo_occ, frozen): + frozen_mask = _frozen_mask(mol, mo_occ, frozen) + occidx = (mo_occ > 0) & ~frozen_mask + viridx = (mo_occ == 0) & ~frozen_mask + return occidx, viridx + + +def _spin_masks(mol, spin_occ, frozen): + if isinstance(frozen, (tuple, list)) and len(frozen) == 2 and not isinstance(frozen[0], (int, numpy.integer)): + return tuple(_active_masks(mol, occ, frz) for occ, frz in zip(spin_occ, frozen)) + return tuple(_active_masks(mol, occ, frozen) for occ in spin_occ) + + +def _embed_dm(dm, nao, nca): + dm = numpy.asarray(dm) + dm_ext = numpy.zeros(dm.shape[:-2] + (nca, nca), dtype=dm.dtype) + dm_ext[..., :nao, :nao] = dm + return dm_ext + + +def _get_jk(mf, cabs_mol, dm): + if getattr(mf, 'with_df', None) is not None: + dfmf = scf.RHF(cabs_mol).density_fit(auxbasis=mf.with_df.auxbasis) + dfmf.with_df.max_memory = mf.with_df.max_memory + dfmf.with_df.stdout = mf.with_df.stdout + dfmf.with_df.verbose = mf.with_df.verbose + return dfmf.get_jk(cabs_mol, dm, hermi=1) + return hf.get_jk(cabs_mol, dm, hermi=1) + + +def _unrestricted_focks(mf, cabs_mol, dm): + vj, vk = _get_jk(mf, cabs_mol, dm) + hcore = mf.get_hcore(cabs_mol) + vj_tot = vj[0] + vj[1] + return hcore + vj_tot - vk[0], hcore + vj_tot - vk[1] + + +def _extended_projector(mo_coeff, cabs_coeff): + nao, nmo = mo_coeff.shape + nca = cabs_coeff.shape[0] + pcoeff = numpy.zeros((nca, nmo + cabs_coeff.shape[1])) + pcoeff[:nao, :nmo] = mo_coeff + pcoeff[:, nmo:] = cabs_coeff + return pcoeff + + +def _cabs_singles_from_fock(fock, pcoeff, mo_occ, mo_energy, occidx, viridx): + nmo = mo_occ.size + if not numpy.any(occidx): + return 0.0 + + # Diagonalize the external space formed by orbital-basis virtual MOs and CABS. + # The MO-virtual block is zero for canonical RHF/UHF, but gives the ROHF/non-canonical singles contribution, + # and MolPro separates those contributions. + extidx = numpy.r_[numpy.where(viridx)[0], numpy.arange(nmo, pcoeff.shape[1])] + + fock_p = pcoeff.T.dot(fock).dot(pcoeff) + e_cabs, u_cabs = scipy.linalg.eigh(fock_p[numpy.ix_(extidx, extidx)]) + fia = fock_p[numpy.ix_(occidx, extidx)].dot(u_cabs) + denom = mo_energy[occidx, None] - e_cabs + return numpy.einsum('i,ia,ia,ia->', mo_occ[occidx], fia, fia, 1.0 / denom) + + +def energy_singles(mf, auxbasis, *, frozen='chemcore', lindep=1e-8): + r"""CABS singles correction to the Hartree-Fock reference energy. + + For a closed-shell reference this evaluates + + .. math:: + E_\mathrm{CABS} = 2 \sum_{iA} + \frac{|F_{iA}|^2}{\epsilon_i - \epsilon_A} + + where ``A`` denotes canonical orbitals in the external space formed by + the virtual MOs of the orbital basis and CABS. For UHF and ROHF references + the same expression is evaluated for the alpha and beta Fock matrices with + spin occupations as prefactors. + + Args: + mf : SCF object + Converged molecular HF object. + auxbasis : Mole, str, list, tuple, or dict + CABS/OptRI basis as a Mole object or in the usual Mole.basis format. + If a normal charged Mole is supplied on the same centers as ``mf``, + only its basis is used; its nuclear charges and ECPs must be discarded. + frozen : None, int, sequence, tuple, or str + Frozen orbital selection. ``'chemcore'`` (default) freezes the chemical + core. ``None`` or ``0`` includes all orbitals. An integer freezes + the lowest orbitals and a sequence freezes explicit MO indices. For + UHF, a flat sequence is applied to both spins; a nested two-item + sequence gives separate alpha and beta frozen orbitals. + lindep : float + Linear-dependence threshold in the CABS projection. + """ + mol = mf.mol + mo_coeff = mf.mo_coeff + mo_occ = mf.mo_occ + mo_energy = mf.mo_energy + is_uhf = isinstance(mo_coeff, (tuple, list)) or getattr(mo_coeff, 'ndim', 0) == 3 + is_rohf = not is_uhf and numpy.any(mo_occ == 1) + + if not is_uhf: + valid_occ = (mo_occ == 0) | (mo_occ == 2) + if is_rohf: + valid_occ |= mo_occ == 1 + if numpy.any(~valid_occ): + raise NotImplementedError('CABS singles for general fractional-occupation references is not implemented.') + + if is_rohf: + spin_coeff = (mo_coeff, mo_coeff) + spin_occ = (mo_occ > 0, mo_occ == 2) + spin_energy = (mo_energy.mo_ea, mo_energy.mo_eb) + spin_masks = _spin_masks(mol, spin_occ, frozen) + elif is_uhf: + spin_coeff = mo_coeff + spin_occ = mo_occ + spin_energy = mo_energy + spin_masks = _spin_masks(mol, spin_occ, frozen) + else: + occidx, viridx = _active_masks(mol, mo_occ, frozen) + + auxmol = _as_cabs_auxmol(mol, auxbasis) + cabs_mol, cabs_coeff = find_cabs(mol, auxmol, lindep) + if cabs_coeff.shape[1] == 0: + logger.note(mf, 'CABS singles correction = 0.0') + return 0.0 + nao = mol.nao_nr() + nca = cabs_mol.nao_nr() + + if is_rohf or is_uhf: + dm = _embed_dm(mf.make_rdm1(), nao, nca) + focks = _unrestricted_focks(mf, cabs_mol, dm) + e_cabs = 0.0 + for fock, coeff, occ, energy, (occidx, viridx) in zip(focks, spin_coeff, spin_occ, spin_energy, spin_masks): + pcoeff = _extended_projector(coeff, cabs_coeff) + e_cabs += _cabs_singles_from_fock(fock, pcoeff, occ, energy, occidx, viridx) + else: + pcoeff = _extended_projector(mo_coeff, cabs_coeff) + dm = _embed_dm(mf.make_rdm1(), nao, nca) + vj, vk = _get_jk(mf, cabs_mol, dm) + fock = mf.get_hcore(cabs_mol) + vj - vk * 0.5 + e_cabs = _cabs_singles_from_fock(fock, pcoeff, mo_occ, mo_energy, occidx, viridx) + + logger.note(mf, 'CABS singles correction = %.15g', e_cabs) + return e_cabs + + +energy_cabs_singles = energy_singles diff --git a/pyscf/mp/dfmp2.py b/pyscf/mp/dfmp2.py index c81eed2bb9..fc4dca11a0 100644 --- a/pyscf/mp/dfmp2.py +++ b/pyscf/mp/dfmp2.py @@ -445,7 +445,7 @@ def _init_mp_df_eris_direct(with_df, occ_coeff, vir_coeff, max_memory, h5obj=Non # precompute for fitting j2c = fill_2c2e(mol, auxmol) try: - m2c = scipy.linalg.cholesky(j2c, lower=True) + m2c = np.asfortranarray(scipy.linalg.cholesky(j2c, lower=True)) tag = 'cd' except scipy.linalg.LinAlgError: e, u = np.linalg.eigh(j2c) diff --git a/pyscf/mp/dfump2.py b/pyscf/mp/dfump2.py index 1835f68dd3..430922d9e1 100644 --- a/pyscf/mp/dfump2.py +++ b/pyscf/mp/dfump2.py @@ -468,7 +468,7 @@ def _init_mp_df_eris_direct(with_df, occ_coeff, vir_coeff, max_memory, h5obj=Non # precompute for fitting j2c = fill_2c2e(mol, auxmol) try: - m2c = scipy.linalg.cholesky(j2c, lower=True) + m2c = np.asfortranarray(scipy.linalg.cholesky(j2c, lower=True)) tag = 'cd' except scipy.linalg.LinAlgError: e, u = np.linalg.eigh(j2c) diff --git a/pyscf/mp/mp2f12_slow.py b/pyscf/mp/mp2f12_slow.py index 83decc6c6c..4f7ab3f335 100644 --- a/pyscf/mp/mp2f12_slow.py +++ b/pyscf/mp/mp2f12_slow.py @@ -26,29 +26,18 @@ import warnings from functools import reduce import numpy -import scipy.linalg from pyscf import lib from pyscf.lib import logger from pyscf import gto -from pyscf import ao2mo -from pyscf.scf import jk from pyscf.mp import mp2 +from pyscf.mp import cabs warnings.warn('Module MP2-F12 is under testing') # The cabs space, the complimentary space to the OBS. def find_cabs(mol, auxmol, lindep=1e-8): - cabs_mol = gto.conc_mol(mol, auxmol) - nao = mol.nao_nr() - s = cabs_mol.intor_symmetric('int1e_ovlp') - - ls12 = scipy.linalg.solve(s[:nao,:nao], s[:nao,nao:], assume_a='pos') - s[nao:,nao:] -= s[nao:,:nao].dot(ls12) - w, v = scipy.linalg.eigh(s[nao:,nao:]) - c2 = v[:,w>lindep]/numpy.sqrt(w[w>lindep]) - c1 = ls12.dot(c2) - return cabs_mol, numpy.vstack((-c1,c2)) + return cabs.find_cabs(mol, auxmol, lindep) def trans(eri, mos): naoi, nmoi = mos[0].shape diff --git a/pyscf/mp/test/test_cabs.py b/pyscf/mp/test/test_cabs.py new file mode 100644 index 0000000000..db0050484d --- /dev/null +++ b/pyscf/mp/test/test_cabs.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# Copyright 2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import reduce +import os +import numpy +from pyscf import gto +from pyscf import lo +from pyscf import scf +from pyscf.mp import cabs + + +def setUpModule(): + global mol, mf, mf1 + mol = gto.Mole() + mol.verbose = 7 + mol.output = '/dev/null' + mol.atom = [ + [8 , (0. , 0. , 0.)], + [1 , (0. , -0.757 , 0.587)], + [1 , (0. , 0.757 , 0.587)]] + + mol.basis = {'H': 'cc-pvdz', + 'O': 'cc-pvdz',} + mol.build() + mf = scf.RHF(mol) + mf.conv_tol = 1e-12 + mf.scf() + +def tearDownModule(): + global mol, mf + mol.stdout.close() + del mol, mf + + +class KnownValues(unittest.TestCase): + def test_find_cabs(self): + auxmol = mol.copy() + auxmol.basis = 'def2-tzvp' + auxmol.build(False, False) + cabs_mol, cabs_coeff = cabs.find_cabs(mol, auxmol) + nao = mol.nao_nr() + nca = cabs_coeff.shape[0] + c1 = numpy.zeros((nca,nao)) + c1[:nao,:nao] = lo.orth.lowdin(mol.intor('int1e_ovlp_sph')) + c = numpy.hstack((c1,cabs_coeff)) + s = reduce(numpy.dot, (c.T, cabs_mol.intor('int1e_ovlp_sph'), c)) + self.assertAlmostEqual(numpy.linalg.norm(s-numpy.eye(c.shape[1])), 0, 8) + + def test_rhf_cabs_singles(self): + mol = gto.Mole(atom=''' + H 0.000000000 0.000000000 0.457870600 + F 0.000000000 0.000000000 -0.457870600 + ''', basis='cc-pvdz', verbose=0) + mol.build() + mf = scf.RHF(mol).density_fit(auxbasis='cc-pvdz-jkfit').run() + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit') + # MolPro: -0.033551214 + self.assertAlmostEqual(e, -0.033551214448, 9) + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit', frozen=[0]) + self.assertAlmostEqual(e, -0.033551214448, 9) + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit', frozen=0) + # MRCC: -0.033749160781 + self.assertAlmostEqual(e, -0.033749161358, 9) + + def test_uhf_cabs_singles(self): + mol = gto.Mole(atom=''' + H 0.000000000 0.000000000 0.457870600 + O 0.000000000 0.000000000 -0.457870600 + ''', basis='cc-pvdz', spin=1, verbose=0) + mol.build() + mf = scf.UHF(mol).density_fit(auxbasis='cc-pvdz-jkfit').run() + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit') + # no MolPro, it has only ROHF-MP2 + self.assertAlmostEqual(e, -0.022958881659, 9) + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit', frozen=([0], [0])) + self.assertAlmostEqual(e, -0.022958881659, 9) + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit', frozen=0) + # MRCC: -0.023166900284 + self.assertAlmostEqual(e, -0.023166901553, 9) + + def test_rohf_cabs_singles(self): + mol = gto.Mole(atom=''' + H 0.000000000 0.000000000 0.457870600 + O 0.000000000 0.000000000 -0.457870600 + ''', basis='cc-pvdz', spin=1, verbose=0) + mol.build() + mf = scf.ROHF(mol).density_fit(auxbasis='cc-pvdz-jkfit').run() + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit') + # MolPro: + # TOTAL ALPHA BETA + # Singles Contributions MO -0.002681874 -0.001339551 -0.001342323 + # Singles Contributions CABS -0.022664571 -0.013326055 -0.009338516 + # Pure DF-RHF relaxation -0.022020485 + # + # One has to sum MO + CABS contributions: + # -0.002681874 + -0.022664571 = -0.025346445 + self.assertAlmostEqual(e, -0.025343529823, 9) + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit', frozen=([0], [0])) + self.assertAlmostEqual(e, -0.025343529823, 9) + e = cabs.energy_singles(mf, 'cc-pvdz-jkfit', frozen=0) + # MolPro: + # TOTAL ALPHA BETA + # Singles Contributions MO -0.002709569 -0.001352954 -0.001356615 + # Singles Contributions CABS -0.022873615 -0.013438077 -0.009435538 + # Pure DF-RHF relaxation -0.022198754 + # Sum: -0.025583184 + self.assertAlmostEqual(e, -0.025580122540, 9) + + +if __name__ == "__main__": + print("Full Tests for CABS") + unittest.main() diff --git a/pyscf/mp/test/test_dfmp2.py b/pyscf/mp/test/test_dfmp2.py index 42a693739b..66bfdbdf61 100644 --- a/pyscf/mp/test/test_dfmp2.py +++ b/pyscf/mp/test/test_dfmp2.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile from functools import reduce import numpy import numpy as np @@ -125,7 +124,7 @@ def test_read_ovL_incore(self): self.assertAlmostEqual(mmp.e_corr, mmp1.e_corr, 8) def test_read_ovL_outcore(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() mmp = mp.dfmp2.DFMP2(mf) eris = mmp.ao2mo(ovL_to_save=ftmp.name) diff --git a/pyscf/mp/test/test_dfump2.py b/pyscf/mp/test/test_dfump2.py index ada048fdf8..3741b2c2c8 100644 --- a/pyscf/mp/test/test_dfump2.py +++ b/pyscf/mp/test/test_dfump2.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile from functools import reduce import numpy import numpy as np @@ -106,7 +105,7 @@ def test_read_ovL_incore(self): self.assertAlmostEqual(mmp.e_corr, mmp1.e_corr, 8) def test_read_ovL_outcore(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() mmp = mp.dfump2.DFUMP2(mf) eris = mmp.ao2mo(ovL_to_save=ftmp.name) diff --git a/pyscf/mrpt/dfnevpt2.py b/pyscf/mrpt/dfnevpt2.py index 4a11d64c4a..6c1b9a632c 100644 --- a/pyscf/mrpt/dfnevpt2.py +++ b/pyscf/mrpt/dfnevpt2.py @@ -16,7 +16,6 @@ # Authors: Bhavnesh Jangid import ctypes -import tempfile import numpy as np from functools import reduce from pyscf import lib @@ -128,7 +127,7 @@ def _dfnevpt2_eris_outcore(mc, mo_coeff, with_df): # Step-3: from the transfomed (L|pq), build pacv and cvcv tmpdir = lib.param.TMPDIR - cvcvfile = tempfile.NamedTemporaryFile(dir=tmpdir) + cvcvfile = lib.NamedTemporaryFile(dir=tmpdir) # Edge cases if ncore * nvir == 0 or ncore * nvir == 0: f5 = lib.H5TmpFile(cvcvfile.name, 'w') diff --git a/pyscf/mrpt/nevpt2.py b/pyscf/mrpt/nevpt2.py index 39a41d7e66..77d9abde53 100644 --- a/pyscf/mrpt/nevpt2.py +++ b/pyscf/mrpt/nevpt2.py @@ -19,7 +19,6 @@ import ctypes -import tempfile from functools import reduce import numpy import h5py @@ -381,7 +380,7 @@ def Sijrs(mc, eris, verbose=None): ncas = mo_cas.shape[1] nocc = ncore + ncas if eris is None: - erifile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + erifile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) feri = ao2mo.outcore.general(mc.mol, (mo_core,mo_virt,mo_core,mo_virt), erifile.name, verbose=mc.verbose) else: @@ -992,7 +991,7 @@ def trans_e1_outcore(mc, mo, max_memory=None, ioblk_size=256, tmpdir=None, if tmpdir is None: tmpdir = lib.param.TMPDIR - swapfile = tempfile.NamedTemporaryFile(dir=tmpdir) + swapfile = lib.NamedTemporaryFile(dir=tmpdir) ao2mo.outcore.half_e1(mol, (mo[:,:nocc],mo[:,ncore:]), swapfile.name, max_memory=max_memory, ioblk_size=ioblk_size, verbose=log, compact=False) @@ -1016,7 +1015,7 @@ def load_buf(r0,r1): time0 = logger.timer(mol, 'halfe1', *time0) time1 = [logger.process_clock(), logger.perf_counter()] ao_loc = numpy.array(mol.ao_loc_nr(), dtype=numpy.int32) - cvcvfile = tempfile.NamedTemporaryFile(dir=tmpdir) + cvcvfile = lib.NamedTemporaryFile(dir=tmpdir) with lib.H5TmpFile(cvcvfile.name, 'w') as f5: cvcv = f5.create_dataset('eri_mo', (ncore*nvir,ncore*nvir), 'f8') ppaa, papa, pacv = _trans(mo, ncore, ncas, load_buf, cvcv, ao_loc)[:3] diff --git a/pyscf/pbc/df/df.py b/pyscf/pbc/df/df.py index d5be40139d..96aca10ca7 100644 --- a/pyscf/pbc/df/df.py +++ b/pyscf/pbc/df/df.py @@ -31,8 +31,8 @@ import os import ctypes +import sys import warnings -import tempfile import contextlib import itertools import numpy @@ -166,7 +166,7 @@ def __init__(self, cell, kpts=None): self.linear_dep_threshold = LINEAR_DEP_THR self._j_only = False # If _cderi_to_save is specified, the 3C-integral tensor will be saved in this file. - self._cderi_to_save = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + self._cderi_to_save = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) # If _cderi is specified, the 3C-integral tensor will be read from this file self._cderi = None self._rsh_df = {} # Range separated Coulomb DF objects @@ -277,6 +277,10 @@ def build(self, j_only=None, with_j3c=True, kpts_band=None): if self._cderi == cderi and os.path.isfile(cderi): logger.warn(self, 'File %s (specified by ._cderi) is ' 'overwritten by GDF initialization.', cderi) + # On Windows, close the handle before os.remove to avoid + # permission error. + if sys.platform == 'win32': + self._cderi_to_save.close() os.remove(cderi) else: logger.warn(self, 'Value of ._cderi is ignored. ' diff --git a/pyscf/pbc/df/gdf_builder.py b/pyscf/pbc/df/gdf_builder.py index 3f0a20b0ce..880d58b8c0 100644 --- a/pyscf/pbc/df/gdf_builder.py +++ b/pyscf/pbc/df/gdf_builder.py @@ -26,7 +26,6 @@ import os import ctypes -import tempfile import numpy as np import scipy.linalg from pyscf import gto @@ -204,7 +203,7 @@ def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, shls_slice : Indicate the shell slices in the primitive cell ''' - swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) + swapfile = lib.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(swapfile.name) swapfile = None @@ -213,6 +212,7 @@ def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, rs_cell = self.rs_cell fused_cell = self.fused_cell naux = self.auxcell.nao + nauxc = self.fused_cell.nao kpts = self.kpts nkpts = kpts.shape[0] @@ -243,7 +243,6 @@ def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, else: merge_dd = None - reindex_k = None # TODO: shape = (comp, nao_pair, naux) shape = (nao_pair, naux) if j_only or nkpts == 1: @@ -254,14 +253,12 @@ def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, # Ensure kk_idx is a subset of all possible ki-kj paris assert np.all(np.isin(kk_idx, kikj_idx)) kikj_idx = kk_idx - reindex_k = kikj_idx // nkpts else: nkpts_ij = nkpts * nkpts if kk_idx is None: kikj_idx = np.arange(nkpts_ij, dtype=np.int32) else: kikj_idx = kk_idx - reindex_k = kikj_idx if merge_dd and kk_idx is None: kpt_ij_iters = list(kk_adapted_iter(cell, kpts)) @@ -280,80 +277,169 @@ def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, self._outcore_dd_block(fswap, intor, aosym, comp, j_only, dataname, kk_idx=kk_idx) - # int3c2e for (cell, cell | fused_cell) - int3c = self.gen_int3c_kernel(intor, aosym, comp, j_only, - reindex_k=reindex_k, auxcell=self.fused_cell) - mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(2000, self.max_memory-mem_now) # split the 3-center tensor (nkpts_ij, i, j, aux) along shell i. # plus 1 to ensure the intermediates in libpbc do not overflow - buflen = min(max(int(max_memory*.9e6/16/naux/(nkpts_ij+1)), 1), nao_pair) + buflen = min(max(int(max_memory*.9e6/16/nauxc/(nkpts_ij+1)), 1), nao_pair) # lower triangle part sh_ranges = _guess_shell_ranges(cell, buflen, aosym, start=ish0, stop=ish1) max_buflen = max([x[2] for x in sh_ranges]) - if max_buflen > buflen: + + # The per-step int3c output buffer is (nkpts_ij_chunk, max_buflen, + # nauxc) doubles for both R and I, i.e. nkpts_ij_chunk*max_buflen*nauxc*16 + # bytes. When nkpts_ij is large (e.g. 6x6x6 k-mesh with j_only=False + # gives 46656 pairs), this buffer dwarfs max_memory and the libpbc + # int3c kernel OOMs. Split kikj_idx into chunks so each chunk's int3c + # call fits within max_memory, then loop over chunks. The shell-block + # granularity (sh_ranges) is held fixed across chunks so that fswap + # row writes (row0:row1) remain consistent. + bytes_per_pair = max_buflen * nauxc * 16 + kpts_chunk = max(1, int(max_memory * .9e6 / bytes_per_pair)) + kpts_chunk = min(kpts_chunk, nkpts_ij) + + # Build chunks of kpt-pair indices. The merge_dd branch that pairs + # (ij_idx, ji_idx) within an adapted-kpt group (see kpt_ij_iters + # below) requires both indices to live in the same chunk; for that + # path we group whole kk_adapted groups together. Otherwise we chunk + # contiguously. + need_group_chunks = (merge_dd is not None + and not (j_only or nkpts == 1) + and not gamma_point_only + and kk_idx is None) + if need_group_chunks: + chunks_meta = [] + cur_pairs = [] + cur_groups = [] + for grp in kpt_ij_iters: + _, ki_idx_g, kj_idx_g, self_conj_g = grp + grp_pairs = list(ki_idx_g * nkpts + kj_idx_g) + if not self_conj_g: + grp_pairs += list(kj_idx_g * nkpts + ki_idx_g) + if cur_pairs and len(cur_pairs) + len(grp_pairs) > kpts_chunk: + chunks_meta.append((np.asarray(cur_pairs, dtype=np.int32), + cur_groups)) + cur_pairs = [] + cur_groups = [] + cur_pairs.extend(grp_pairs) + cur_groups.append(grp) + if cur_pairs: + chunks_meta.append((np.asarray(cur_pairs, dtype=np.int32), + cur_groups)) + else: + chunks_meta = [] + for start in range(0, nkpts_ij, kpts_chunk): + end = min(start + kpts_chunk, nkpts_ij) + chunks_meta.append((np.asarray(kikj_idx[start:end], + dtype=np.int32), None)) + nchunks = len(chunks_meta) + + if max_buflen > buflen and nchunks == 1: + # Only meaningful when chunking did not bring usage under budget. log.warn('memory usage of outcore_auxe2 may be %.2f times over max_memory', (max_buflen/buflen - 1)) + if nchunks > 1: + log.debug('outcore_auxe2: splitting %d kpt pairs into %d chunks ' + '(<= %d pairs/chunk, max_buflen=%d, nauxc=%d)', + nkpts_ij, nchunks, kpts_chunk, max_buflen, nauxc) cpu0 = logger.process_clock(), logger.perf_counter() nsteps = len(sh_ranges) - row1 = 0 - for istep, (sh_start, sh_end, nrow) in enumerate(sh_ranges): - if aosym == 's2': - shls_slice = (sh_start, sh_end, jsh0, sh_end, ksh0, ksh1) + + for ichunk, (kikj_idx_chunk, chunk_groups) in enumerate(chunks_meta): + # Build a fresh int3c kernel restricted to this chunk's kpt-pairs. + # The chunk's reindex_k follows the same convention as the original + # reindex_k assignment above. + if j_only or nkpts == 1: + reindex_k_chunk = kikj_idx_chunk // nkpts else: - shls_slice = (sh_start, sh_end, jsh0, jsh1, ksh0, ksh1) - outR, outI = int3c(shls_slice) - log.debug2(' step [%d/%d], shell range [%d:%d], len(buf) = %d', - istep+1, nsteps, sh_start, sh_end, nrow) - cpu0 = log.timer_debug1(f'outcore_auxe2 [{istep+1}/{nsteps}]', *cpu0) - - outR = list(outR) - if outI is not None: - outI = list(outI) - for k, idx in enumerate(kikj_idx): - outR[k] = self.fuse(outR[k], axis=1) - if f'{dataname}I/{idx}' in fswap and outI[k] is not None: - outI[k] = self.fuse(outI[k], axis=1) - - shls_slice = (sh_start, sh_end, 0, cell.nbas) - row0, row1 = row1, row1 + nrow - if merge_dd is not None: - if gamma_point_only: - merge_dd(outR[0], fswap[f'{dataname}R-dd/0'], shls_slice) - elif j_only or nkpts == 1: - for k, idx in enumerate(kikj_idx): - merge_dd(outR[k], fswap[f'{dataname}R-dd/{idx}'], shls_slice) - merge_dd(outI[k], fswap[f'{dataname}I-dd/{idx}'], shls_slice) - elif kk_idx is None: - for _, ki_idx, kj_idx, self_conj in kpt_ij_iters: - kpt_ij_idx = ki_idx * nkpts + kj_idx - if self_conj: - for ij_idx in kpt_ij_idx: - merge_dd(outR[ij_idx], fswap[f'{dataname}R-dd/{ij_idx}'], shls_slice) - merge_dd(outI[ij_idx], fswap[f'{dataname}I-dd/{ij_idx}'], shls_slice) - else: - kpt_ji_idx = kj_idx * nkpts + ki_idx - for ij_idx, ji_idx in zip(kpt_ij_idx, kpt_ji_idx): - j3cR_dd = np.asarray(fswap[f'{dataname}R-dd/{ij_idx}']) - merge_dd(outR[ij_idx], j3cR_dd, shls_slice) - merge_dd(outR[ji_idx], j3cR_dd.transpose(1,0,2), shls_slice) - j3cI_dd = np.asarray(fswap[f'{dataname}I-dd/{ij_idx}']) - merge_dd(outI[ij_idx], j3cI_dd, shls_slice) - merge_dd(outI[ji_idx],-j3cI_dd.transpose(1,0,2), shls_slice) + reindex_k_chunk = kikj_idx_chunk + chunk_int3c = self.gen_int3c_kernel( + intor, aosym, comp, j_only, + reindex_k=reindex_k_chunk, auxcell=self.fused_cell) + # Local map from global kpt-pair index -> position in outR/outI + pair_pos = {int(idx): k for k, idx in enumerate(kikj_idx_chunk)} + + row1 = 0 + for istep, (sh_start, sh_end, nrow) in enumerate(sh_ranges): + if aosym == 's2': + int3c_shls_slice = (sh_start, sh_end, jsh0, sh_end, ksh0, ksh1) else: - for k, idx in enumerate(kikj_idx): - merge_dd(outR[k], fswap[f'{dataname}R-dd/{idx}'], shls_slice) - merge_dd(outI[k], fswap[f'{dataname}I-dd/{idx}'], shls_slice) - - for k, idx in enumerate(kikj_idx): - fswap[f'{dataname}R/{idx}'][row0:row1] = outR[k] - if f'{dataname}I/{idx}' in fswap: - fswap[f'{dataname}I/{idx}'][row0:row1] = outI[k] - outR = outI = None + int3c_shls_slice = (sh_start, sh_end, jsh0, jsh1, ksh0, ksh1) + outR, outI = chunk_int3c(int3c_shls_slice) + log.debug2(' chunk [%d/%d] step [%d/%d], shell range [%d:%d], ' + 'len(buf) = %d', + ichunk+1, nchunks, istep+1, nsteps, sh_start, sh_end, nrow) + cpu0 = log.timer_debug1( + f'outcore_auxe2 chunk[{ichunk+1}/{nchunks}] ' + f'step[{istep+1}/{nsteps}]', *cpu0) + + outR = list(outR) + if outI is not None: + outI = list(outI) + for k, idx in enumerate(kikj_idx_chunk): + outR[k] = self.fuse(outR[k], axis=1) + if f'{dataname}I/{idx}' in fswap and outI[k] is not None: + outI[k] = self.fuse(outI[k], axis=1) + + merge_shls_slice = (sh_start, sh_end, 0, cell.nbas) + row0, row1 = row1, row1 + nrow + if merge_dd is not None: + if gamma_point_only: + merge_dd(outR[0], fswap[f'{dataname}R-dd/0'], + merge_shls_slice) + elif j_only or nkpts == 1: + for k, idx in enumerate(kikj_idx_chunk): + merge_dd(outR[k], fswap[f'{dataname}R-dd/{idx}'], + merge_shls_slice) + merge_dd(outI[k], fswap[f'{dataname}I-dd/{idx}'], + merge_shls_slice) + elif kk_idx is None: + for _, ki_idx, kj_idx, self_conj in chunk_groups: + kpt_ij_idx = ki_idx * nkpts + kj_idx + if self_conj: + for ij_idx in kpt_ij_idx: + ij_local = pair_pos[int(ij_idx)] + merge_dd(outR[ij_local], + fswap[f'{dataname}R-dd/{ij_idx}'], + merge_shls_slice) + merge_dd(outI[ij_local], + fswap[f'{dataname}I-dd/{ij_idx}'], + merge_shls_slice) + else: + kpt_ji_idx = kj_idx * nkpts + ki_idx + for ij_idx, ji_idx in zip(kpt_ij_idx, kpt_ji_idx): + ij_local = pair_pos[int(ij_idx)] + ji_local = pair_pos[int(ji_idx)] + j3cR_dd = np.asarray( + fswap[f'{dataname}R-dd/{ij_idx}']) + merge_dd(outR[ij_local], j3cR_dd, + merge_shls_slice) + merge_dd(outR[ji_local], + j3cR_dd.transpose(1,0,2), + merge_shls_slice) + j3cI_dd = np.asarray( + fswap[f'{dataname}I-dd/{ij_idx}']) + merge_dd(outI[ij_local], j3cI_dd, + merge_shls_slice) + merge_dd(outI[ji_local], + -j3cI_dd.transpose(1,0,2), + merge_shls_slice) + else: + for k, idx in enumerate(kikj_idx_chunk): + merge_dd(outR[k], fswap[f'{dataname}R-dd/{idx}'], + merge_shls_slice) + merge_dd(outI[k], fswap[f'{dataname}I-dd/{idx}'], + merge_shls_slice) + + for k, idx in enumerate(kikj_idx_chunk): + fswap[f'{dataname}R/{idx}'][row0:row1] = outR[k] + if f'{dataname}I/{idx}' in fswap: + fswap[f'{dataname}I/{idx}'][row0:row1] = outI[k] + outR = outI = None + chunk_int3c = None return fswap def weighted_ft_ao(self, kpt): diff --git a/pyscf/pbc/df/mdf.py b/pyscf/pbc/df/mdf.py index e8f522130a..00e869660b 100644 --- a/pyscf/pbc/df/mdf.py +++ b/pyscf/pbc/df/mdf.py @@ -22,7 +22,6 @@ J. Chem. Phys. 147, 164119 (2017) ''' -import tempfile import numpy as np import h5py import scipy.linalg @@ -89,7 +88,7 @@ def __init__(self, cell, kpts=np.zeros((1,3))): self.linear_dep_threshold = df.LINEAR_DEP_THR self._j_only = False # If _cderi_to_save is specified, the 3C-integral tensor will be saved in this file. - self._cderi_to_save = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + self._cderi_to_save = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) # If _cderi is specified, the 3C-integral tensor will be read from this file self._cderi = None self._rsh_df = {} # Range separated Coulomb DF objects diff --git a/pyscf/pbc/df/rsdf.py b/pyscf/pbc/df/rsdf.py index a9e7b13100..4f7142cad4 100644 --- a/pyscf/pbc/df/rsdf.py +++ b/pyscf/pbc/df/rsdf.py @@ -40,9 +40,9 @@ ''' import os +import sys import h5py import scipy.linalg -import tempfile import numpy as np from pyscf import lib @@ -424,10 +424,11 @@ def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, kptij_lst=None, j_only=False, dataname='j3c-junk', shls_slice=None): # Deadlock on NFS if you open an already-opened tmpfile in H5PY - # swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) + # swapfile = lib.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(dir=os.path.dirname(cderi_file), prefix='.outcore_auxe2_swap') # avoid trash files - os.unlink(fswap.filename) + if sys.platform != 'win32': + os.unlink(fswap.filename) cell = self.cell if self.use_bvk and self.kpts_band is None: diff --git a/pyscf/pbc/df/rsdf_builder.py b/pyscf/pbc/df/rsdf_builder.py index 846db5cd83..b222140b88 100644 --- a/pyscf/pbc/df/rsdf_builder.py +++ b/pyscf/pbc/df/rsdf_builder.py @@ -28,6 +28,7 @@ ''' import os +import sys import ctypes import warnings import tempfile @@ -384,7 +385,8 @@ def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, # as cderi_file. fswap = lib.H5TmpFile(dir=os.path.dirname(cderi_file), prefix='.outcore_auxe2_swap') # Unlink swapfile to avoid trash files - os.unlink(fswap.filename) + if sys.platform != 'win32': + os.unlink(fswap.filename) log = logger.new_logger(self) cell = self.cell diff --git a/pyscf/pbc/df/test/test_gdf_builder.py b/pyscf/pbc/df/test/test_gdf_builder.py index 5f49ef477e..aeddf16c96 100644 --- a/pyscf/pbc/df/test/test_gdf_builder.py +++ b/pyscf/pbc/df/test/test_gdf_builder.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest -import tempfile import numpy as np import scipy.linalg from pyscf import lib @@ -108,7 +107,7 @@ def test_get_2c2e_cart(self): def test_make_j3c_gamma(self): dfbuilder = gdf_builder._CCGDFBuilder(cell, auxcell).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 1.5094843470069796, 7) @@ -124,7 +123,7 @@ def test_make_j3c_gamma(self): def test_make_j3c(self): dfbuilder = gdf_builder._CCGDFBuilder(cell, auxcell, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -148,7 +147,7 @@ def test_make_j3c(self): def test_make_j3c_j_only(self): dfbuilder = gdf_builder._CCGDFBuilder(cell, auxcell, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -170,7 +169,7 @@ def test_make_j3c_gamma_2d(self): dimension=2) auxcell = df.make_auxcell(cell, auxbasis) dfbuilder = gdf_builder._CCGDFBuilder(cell, auxcell).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2.T.dot(v2)), 0.3289627476345819, 8) @@ -183,7 +182,7 @@ def test_make_j3c_gamma_1d(self): dimension=1) auxcell = df.make_auxcell(cell, auxbasis) dfbuilder = gdf_builder._CCGDFBuilder(cell, auxcell).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 1.7171975296579753, 6) @@ -197,7 +196,7 @@ def test_make_j3c_gamma_0d(self): auxcell = df.make_auxcell(cell, auxbasis) dfbuilder = gdf_builder._CCGDFBuilder(cell, auxcell).build() ref = cholesky_eri(cell, auxmol=auxcell) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(v2 - ref).max(), 0, 9) @@ -278,7 +277,7 @@ def test_vs_fft(self): j3c = lib.dot(auxG.conj()*wcoulG, aopair.reshape(ngrids,-1)) j2c = scipy.linalg.cholesky(j2c[0], lower=True) ref = scipy.linalg.solve_triangular(j2c, j3c, lower=True) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 7) @@ -304,7 +303,7 @@ def test_get_2c2e_cart_lr(self): def test_make_j3c_gamma_lr(self): dfbuilder = gdf_builder._CCGDFBuilder(cell_lr, auxcell_lr).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 1.0942903795950072, 7) @@ -320,7 +319,7 @@ def test_make_j3c_gamma_lr(self): def test_make_j3c_lr(self): dfbuilder = gdf_builder._CCGDFBuilder(cell_lr, auxcell_lr, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -343,7 +342,7 @@ def test_make_j3c_lr(self): def test_make_j3c_j_only_lr(self): dfbuilder = gdf_builder._CCGDFBuilder(cell_lr, auxcell_lr, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -392,7 +391,7 @@ def test_vs_fft_lr(self): j3c = lib.dot(auxG.conj()*wcoulG, aopair.reshape(ngrids,-1)) j2c = scipy.linalg.cholesky(j2c[0], lower=True) ref = scipy.linalg.solve_triangular(j2c, j3c, lower=True) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 7) @@ -418,7 +417,7 @@ def test_get_2c2e_cart_sr(self): def test_make_j3c_gamma_sr(self): dfbuilder = gdf_builder._CCGDFBuilder(cell_sr, auxcell_sr).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 0.9647178630555139, 7) @@ -434,7 +433,7 @@ def test_make_j3c_gamma_sr(self): def test_make_j3c_sr(self): dfbuilder = gdf_builder._CCGDFBuilder(cell_sr, auxcell_sr, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -458,7 +457,7 @@ def test_make_j3c_sr(self): def test_make_j3c_j_only_sr(self): dfbuilder = gdf_builder._CCGDFBuilder(cell_sr, auxcell_sr, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -506,7 +505,7 @@ def test_vs_fft_sr(self): j3c = lib.dot(auxG.conj()*wcoulG, aopair.reshape(ngrids,-1)) j2c = scipy.linalg.cholesky(j2c[0], lower=True) ref = scipy.linalg.solve_triangular(j2c, j3c, lower=True) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 7) diff --git a/pyscf/pbc/df/test/test_mdf_builder.py b/pyscf/pbc/df/test/test_mdf_builder.py index ca519c587b..f1de15a61d 100644 --- a/pyscf/pbc/df/test/test_mdf_builder.py +++ b/pyscf/pbc/df/test/test_mdf_builder.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest -import tempfile import numpy as np import scipy.linalg from pyscf import lib @@ -109,7 +108,7 @@ def test_ccmdf_get_2c2e_cart(self): def test_ccmdf_make_j3c_gamma(self): dfbuilder = mdf._CCMDFBuilder(cell, auxcell).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 0.01486794482668373, 7) @@ -127,7 +126,7 @@ def test_ccmdf_make_j3c_gamma(self): def test_ccmdf_make_j3c(self): dfbuilder = mdf._CCMDFBuilder(cell, auxcell, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -149,7 +148,7 @@ def test_ccmdf_make_j3c(self): def test_ccmdf_make_j3c_j_only(self): dfbuilder = mdf._CCMDFBuilder(cell, auxcell, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -200,7 +199,7 @@ def test_ccmdf_vs_fft(self): j2c = dfbuilder.eigenvalue_decomposed_metric(j2c[0]) ref = lib.dot(j2c[0], j3c) ref = ref.T.dot(ref) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 8) @@ -224,7 +223,7 @@ def test_rsmdf_get_2c2e_cart(self): def test_rsmdf_make_j3c_gamma(self): dfbuilder = mdf._RSMDFBuilder(cell, auxcell).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 0.01486794482668373, 7) @@ -257,7 +256,7 @@ def test_rsmdf_make_j3c_gamma(self): def test_rsmdf_make_j3c(self): dfbuilder = mdf._RSMDFBuilder(cell, auxcell, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -279,7 +278,7 @@ def test_rsmdf_make_j3c(self): def test_rsmdf_make_j3c_j_only(self): dfbuilder = mdf._RSMDFBuilder(cell, auxcell, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -316,7 +315,7 @@ def test_ccmdf_get_2c2e_cart_lr(self): def test_ccmdf_make_j3c_gamma_lr(self): dfbuilder = mdf._CCMDFBuilder(cell_lr, auxcell_lr).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 1.0439710349332878e-05, 7) @@ -334,7 +333,7 @@ def test_ccmdf_make_j3c_gamma_lr(self): def test_ccmdf_make_j3c_lr(self): dfbuilder = mdf._CCMDFBuilder(cell_lr, auxcell_lr, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -356,7 +355,7 @@ def test_ccmdf_make_j3c_lr(self): def test_ccmdf_make_j3c_j_only_lr(self): dfbuilder = mdf._CCMDFBuilder(cell_lr, auxcell_lr, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -411,7 +410,7 @@ def test_ccmdf_vs_fft_lr(self): j2c = dfbuilder.eigenvalue_decomposed_metric(j2c[0]) ref = lib.dot(j2c[0], j3c) ref = ref.T.dot(ref) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 9) @@ -437,7 +436,7 @@ def test_ccmdf_get_2c2e_cart_sr(self): def test_ccmdf_make_j3c_gamma_sr(self): dfbuilder = mdf._CCMDFBuilder(cell_sr, auxcell_sr).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 0.014857466177913803, 7) @@ -455,7 +454,7 @@ def test_ccmdf_make_j3c_gamma_sr(self): def test_ccmdf_make_j3c_sr(self): dfbuilder = mdf._CCMDFBuilder(cell_sr, auxcell_sr, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -477,7 +476,7 @@ def test_ccmdf_make_j3c_sr(self): def test_ccmdf_make_j3c_j_only_sr(self): dfbuilder = mdf._CCMDFBuilder(cell_sr, auxcell_sr, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -531,7 +530,7 @@ def test_ccmdf_vs_fft_sr(self): j2c = dfbuilder.eigenvalue_decomposed_metric(j2c[0]) ref = lib.dot(j2c[0], j3c) ref = ref.T.dot(ref) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 8) @@ -557,7 +556,7 @@ def test_rsmdf_get_2c2e_cart_sr(self): def test_rsmdf_make_j3c_gamma_sr(self): dfbuilder = mdf._RSMDFBuilder(cell_sr, auxcell_sr).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 0.014857466177913803, 7) @@ -590,7 +589,7 @@ def test_rsmdf_make_j3c_gamma_sr(self): def test_rsmdf_make_j3c_sr(self): dfbuilder = mdf._RSMDFBuilder(cell_sr, auxcell_sr, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -612,7 +611,7 @@ def test_rsmdf_make_j3c_sr(self): def test_rsmdf_make_j3c_j_only_sr(self): dfbuilder = mdf._RSMDFBuilder(cell_sr, auxcell_sr, kpts).set(mesh=mesh).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): diff --git a/pyscf/pbc/df/test/test_outcore.py b/pyscf/pbc/df/test/test_outcore.py index 4541db1327..d2fcf254e0 100644 --- a/pyscf/pbc/df/test/test_outcore.py +++ b/pyscf/pbc/df/test/test_outcore.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest -import tempfile import numpy import h5py from pyscf import lib @@ -45,7 +44,7 @@ def test_aux_e1(self): numpy.random.seed(1) kptij_lst = numpy.random.random((3,2,3)) kptij_lst[0] = 0 - with tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) as tmpfile: + with lib.NamedTemporaryFile(dir=lib.param.TMPDIR) as tmpfile: outcore.aux_e1(cell, cell, tmpfile.name, aosym='s2', comp=1, kptij_lst=kptij_lst, verbose=0) refk = incore.aux_e2(cell, cell, aosym='s1', kptij_lst=kptij_lst) diff --git a/pyscf/pbc/df/test/test_rsdf_1.py b/pyscf/pbc/df/test/test_rsdf_1.py index 9f29f9b9e1..01d7f37884 100644 --- a/pyscf/pbc/df/test/test_rsdf_1.py +++ b/pyscf/pbc/df/test/test_rsdf_1.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest -import tempfile import numpy as np import scipy.linalg from pyscf import lib @@ -88,7 +87,7 @@ def test_make_j3c_gamma(self): dfbuilder = rsdf._RSGDFBuilder(cell, auxcell, kpts) dfbuilder.__dict__.update(dfobj.__dict__) dfbuilder.build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 1.4877735852543206, 8) @@ -97,7 +96,7 @@ def test_make_j3c(self): dfbuilder = rsdf._RSGDFBuilder(cell, auxcell, kpts) dfbuilder.__dict__.update(dfobj.__dict__) dfbuilder.build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') self.assertAlmostEqual(lib.fp(load(tmpf.name, kpts[[0, 0]])), 1.4877735860707935, 7) self.assertAlmostEqual(lib.fp(load(tmpf.name, kpts[[2, 4]])), 4.530919637533813+0.10852447737595214j, 7) @@ -107,7 +106,7 @@ def test_make_j3c_j_only(self): dfbuilder = rsdf._RSGDFBuilder(cell, auxcell, kpts) dfbuilder.__dict__.update(dfobj.__dict__) dfbuilder.build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) self.assertAlmostEqual(lib.fp(load(tmpf.name, kpts[[0, 0]])), 1.4877735860707935, 7) self.assertAlmostEqual(lib.fp(load(tmpf.name, kpts[[2, 2]])), 1.4492567814298059, 7) @@ -147,7 +146,7 @@ def test_vs_fft(self): j3c = lib.dot(auxG.conj()*coulG, aopair.reshape(ngrids,-1)) j2c = scipy.linalg.cholesky(j2c[0], lower=True) ref = scipy.linalg.solve_triangular(j2c, j3c, lower=True) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 9) diff --git a/pyscf/pbc/df/test/test_rsdf_builder.py b/pyscf/pbc/df/test/test_rsdf_builder.py index 0edec5ebd4..61abe2956b 100644 --- a/pyscf/pbc/df/test/test_rsdf_builder.py +++ b/pyscf/pbc/df/test/test_rsdf_builder.py @@ -13,7 +13,6 @@ # limitations under the License. import unittest -import tempfile import numpy as np import scipy.linalg from pyscf import lib @@ -116,7 +115,7 @@ def test_get_2c2e_cart(self): def test_make_j3c_gamma(self): dfbuilder = rsdf_builder._RSGDFBuilder(cell, auxcell).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 1.5094843470069796, 7) @@ -145,7 +144,7 @@ def test_make_j3c_gamma(self): def test_make_j3c(self): dfbuilder = rsdf_builder._RSGDFBuilder(cell, auxcell, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -168,7 +167,7 @@ def test_make_j3c(self): def test_make_j3c_j_only(self): dfbuilder = rsdf_builder._RSGDFBuilder(cell, auxcell, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -189,13 +188,13 @@ def test_make_j3c_kptij_lst(self): kj_idx = np.array([15, 18, 21, 1, 2 , 4, 5]) kij_idx = np.array([ki_idx,kj_idx]).T kptij_lst = kpts[kij_idx] - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: cderi = tmpf.name dfbuilder.make_j3c(cderi, aosym='s1') with df.CDERIArray(cderi) as cderi_array: ref = np.array([cderi_array[ki, kj] for ki, kj in kij_idx]) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: cderi = tmpf.name dfbuilder.make_j3c(cderi, aosym='s1', kptij_lst=kptij_lst) with df.CDERIArray(cderi) as cderi_array: @@ -210,7 +209,7 @@ def test_make_j3c_gamma_2d(self): dimension=2) auxcell = df.make_auxcell(cell, auxbasis) dfbuilder = rsdf_builder._RSGDFBuilder(cell, auxcell).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2.T.dot(v2)), 0.3289627476345819, 7) @@ -223,7 +222,7 @@ def test_make_j3c_gamma_1d(self): dimension=1) auxcell = df.make_auxcell(cell, auxbasis) dfbuilder = rsdf_builder._RSGDFBuilder(cell, auxcell).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 1.7171973261620863, 5) @@ -236,7 +235,7 @@ def test_make_j3c_gamma_0d(self): dimension=0) auxcell = df.make_auxcell(cell, auxbasis) dfbuilder = rsdf_builder._RSGDFBuilder(cell, auxcell).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) ref = cholesky_eri(cell, auxmol=auxcell) @@ -314,7 +313,7 @@ def test_vs_fft(self): j3c = lib.dot(auxG.conj()*wcoulG, aopair.reshape(ngrids,-1)) j2c = scipy.linalg.cholesky(j2c[0], lower=True) ref = scipy.linalg.solve_triangular(j2c, j3c, lower=True) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 7) @@ -344,7 +343,7 @@ def test_get_2c2e_cart_sr(self): def test_make_j3c_gamma_sr(self): dfbuilder = rsdf_builder._RSGDFBuilder(cell_sr, auxcell_sr).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name) v2 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(lib.fp(v2), 0.9647178630614499, 8) @@ -373,7 +372,7 @@ def test_make_j3c_gamma_sr(self): def test_make_j3c_sr_high_cost(self): dfbuilder = rsdf_builder._RSGDFBuilder(cell_sr, auxcell_sr, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2') v_s2 = [] for ki in range(nkpts): @@ -397,7 +396,7 @@ def test_make_j3c_sr_high_cost(self): def test_make_j3c_j_only_sr(self): dfbuilder = rsdf_builder._RSGDFBuilder(cell_sr, auxcell_sr, kpts).build() - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v_s2 = [] for ki in range(nkpts): @@ -445,7 +444,7 @@ def test_vs_fft_sr(self): j3c = lib.dot(auxG.conj()*wcoulG, aopair.reshape(ngrids,-1)) j2c = scipy.linalg.cholesky(j2c[0], lower=True) ref = scipy.linalg.solve_triangular(j2c, j3c, lower=True) - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.make_j3c(tmpf.name, aosym='s2', j_only=True) v1 = load(tmpf.name, kpts[[0, 0]]) self.assertAlmostEqual(abs(ref - v1).max(), 0, 7) @@ -471,7 +470,7 @@ def test_off_center_kpts_rsdf_vs_fft(self): dfbuilder = rsdf_builder._RSGDFBuilder(cell, auxcell, kpts) dfbuilder.fft_dd_block = False dfbuilder.exclude_d_aux = False - with tempfile.NamedTemporaryFile() as tmpf: + with lib.NamedTemporaryFile() as tmpf: dfbuilder.build() dfbuilder.make_j3c(tmpf.name) nkpts = len(kpts) diff --git a/pyscf/pbc/dft/test/test_krkspu.py b/pyscf/pbc/dft/test/test_krkspu.py index 732d1a6b8c..3dda042b01 100644 --- a/pyscf/pbc/dft/test/test_krkspu.py +++ b/pyscf/pbc/dft/test/test_krkspu.py @@ -93,7 +93,7 @@ def test_get_veff(self): self.assertAlmostEqual(vxc.E_U, 0.07587726255165786, 11) self.assertAlmostEqual(lib.fp(vxc), 12.77643098220399, 8) - def test_KRKSpU_linear_response(self): + def test_KRKSpU_linear_response_high_cost(self): cell = pgto.Cell() cell.unit = 'A' cell.atom = 'C 0., 0., 0.; C 0.8917, 0.8917, 0.8917' diff --git a/pyscf/pbc/dft/test/test_rks.py b/pyscf/pbc/dft/test/test_rks.py index 8acb246522..ec641b2f1f 100644 --- a/pyscf/pbc/dft/test/test_rks.py +++ b/pyscf/pbc/dft/test/test_rks.py @@ -18,8 +18,8 @@ # import unittest -import tempfile import numpy as np +from pyscf import lib from pyscf.pbc import gto as pbcgto from pyscf.pbc import dft as pbcdft import pyscf.pbc @@ -82,8 +82,8 @@ def test_chkfile_k_point(self): cell.verbose = 0 cell.build() mf1 = pbcdft.RKS(cell) - mf1.chkfile = tempfile.NamedTemporaryFile().name mf1.max_cycle = 1 + mf1.chkfile = lib.NamedTemporaryFile().name mf1.kernel() cell = pbcgto.Cell() diff --git a/pyscf/pbc/gto/test/test_cell.py b/pyscf/pbc/gto/test/test_cell.py index cb2ce01c2b..9305ab248b 100644 --- a/pyscf/pbc/gto/test/test_cell.py +++ b/pyscf/pbc/gto/test/test_cell.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import ctypes import numpy import numpy as np @@ -65,7 +64,7 @@ def test_nimgs(self): def test_Gv(self): a = cl1.get_Gv() - self.assertAlmostEqual(lib.fp(a), -99.791927068519939, 10) + self.assertAlmostEqual(lib.fp(a), -99.791927068519939, 9) def test_SI(self): a = cl1.get_SI() @@ -616,7 +615,7 @@ def test_fromstring(self): def test_fromfile(self): ref = cl.atom_coords().copy() - with tempfile.NamedTemporaryFile() as f: + with lib.NamedTemporaryFile() as f: cl.tofile(f.name, 'xyz') cell = pgto.Cell() cell.fromfile(f.name, 'xyz') diff --git a/pyscf/pbc/gw/gw_ac.py b/pyscf/pbc/gw/gw_ac.py new file mode 100644 index 0000000000..96590b252c --- /dev/null +++ b/pyscf/pbc/gw/gw_ac.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Tianyu Zhu +# Author: Christopher Hillenbrand +# Author: Jiachen Li +# + +""" +PBC gamma-point spin-restricted G0W0 method based on the analytic continuation scheme. +This implementation has N^4 scaling, +and is faster than GW-CD (N^4~N5) and fully analytic GW (N^6) methods. +GW-AC is recommended for valence states only, and is inaccurate for core states. + +References: + T. Zhu and G.K.-L. Chan, J. Chem. Theory. Comput. 17, 727-741 (2021) + New J. Phys. 14 053020 (2012) +""" + +from functools import reduce +import numpy as np + +from pyscf.ao2mo._ao2mo import nr_e2 +from pyscf.lib import current_memory, logger +from pyscf.pbc import df, scf +from pyscf.pbc.df.fft_ao2mo import _format_kpts +from pyscf.pbc.df.df_ao2mo import warn_pbc2d_eri + +from pyscf.gw.gw_ac import GWAC as GWAC_mol + + +class GWAC(GWAC_mol): + def __init__(self, mf, frozen=None, auxbasis=None): + if abs(mf.kpt).max() > 1e-9: + raise NotImplementedError + warn_pbc2d_eri(mf) + + GWAC_mol.__init__(self, mf, frozen=frozen, auxbasis=auxbasis) + self.fc = False + + return + + def dump_flags(self): + log = logger.Logger(self.stdout, self.verbose) + log.info('') + log.info('******** %s ********', self.__class__) + log.info('method = %s', self.__class__.__name__) + nocc = self.nocc + nvir = self.nmo - nocc + log.info('GW nocc = %d, nvir = %d', nocc, nvir) + log.info('frozen orbitals = %s', self.frozen) + log.info('off-diagonal self-energy = %s', self.fullsigma) + log.info('GW density matrix = %s', self.rdm) + log.info('density-fitting for exchange = %s', self.vhf_df) + log.info('finite-size correction = %s', self.fc) + log.info('outcore for self-energy= %s', self.outcore) + if self.outcore is True: + log.info('outcore segment size = %d', self.segsize) + log.info('broadening parameter = %.3e', self.eta) + if self.nw2 is None: + log.info('number of grids = %d', self.nw) + else: + log.info('grid size for W is %d', self.nw) + log.info('grid size for self-energy is %d', self.nw2) + log.info('analytic continuation method = %s', self.ac) + log.info('imaginary frequency cutoff = %.1f', self.ac_iw_cutoff) + if self.ac == 'pade': + log.info('Pade points = %d', self.ac_pade_npts) + log.info('Pade step ratio = %.3f', self.ac_pade_step_ratio) + log.info('use perturbative linearized QP eqn = %s', self.qpe_linearized) + if self.qpe_linearized is True: + log.info('linearized factor range = %s', self.qpe_linearized_range) + else: + log.info('QPE max iter = %d', self.qpe_max_iter) + log.info('QPE tolerance = %.1e', self.qpe_tol) + log.info('') + return + + def initialize_df(self, auxbasis=None): + """Initialize density fitting. + + Parameters + ---------- + auxbasis : str, optional + name of auxiliary basis set, by default None + """ + if getattr(self._scf, 'with_df', None): + self.with_df = self._scf.with_df + else: + self.with_df = df.DF(self._scf.mol) + if auxbasis is not None: + self.with_df.auxbasis = auxbasis + else: + try: + self.with_df.auxbasis = df.make_auxbasis(self._scf.mol, mp2fit=True) + except RuntimeError: + self.with_df.auxbasis = df.make_auxbasis(self._scf.mol, mp2fit=False) + self._keys.update(['with_df']) + return + + def ao2mo(self, mo_coeff=None): + """Transform density-fitting integral from AO to MO. + + Parameters + ---------- + mo_coeff : double 2d array, optional + coefficient from AO to MO, by default None + + Returns + ------- + Lpq : double 3d array + three-center density-fitting matrix in MO + """ + if mo_coeff is None: + mo_coeff = self.mo_coeff + nmo = mo_coeff.shape[1] + nao = self.mo_coeff.shape[0] + naux = self.with_df.get_naoaux() + kpts = self._scf.with_df.kpts + max_memory = max(2000, self._scf.max_memory - current_memory()[0] - nao**2 * naux * 8 / 1e6) + + mo = np.asarray(mo_coeff, order='F') + ijslice = (0, nmo, 0, nmo) + + kptijkl = _format_kpts(kpts) + eri_3d = [] + for LpqR, _, _ in self._scf.with_df.sr_loop(kptijkl[:2], max_memory=0.3 * max_memory, compact=False): + Lpq = None + Lpq = nr_e2(LpqR.reshape(-1, nao, nao), mo, ijslice, aosym='s1', mosym='s1', out=Lpq) + eri_3d.append(Lpq) + eri_3d = np.vstack(eri_3d).reshape(-1, nmo, nmo) + + return eri_3d + + def loop_ao2mo(self, mo_coeff=None, ijslice=None): + """Transform density-fitting integral from AO to MO by block. + + Parameters + ---------- + mo_coeff : double 2d array, optional + coefficient from AO to MO, by default None + ijslice : tuple, optional + tuples for (1st idx start, 1st idx end, 2nd idx start, 2nd idx end), by default None + + Returns + ------- + eri_3d : double 3d array + three-center density-fitting matrix in MO in a block + """ + if mo_coeff is None: + mo_coeff = self.mo_coeff + nmo = mo_coeff.shape[1] + nao = self.mo_coeff.shape[0] + naux = self.with_df.get_naoaux() + kpts = self._scf.with_df.kpts + max_memory = max(2000, self._scf.max_memory - current_memory()[0] - nao**2 * naux * 8 / 1e6) + + mo = np.asarray(mo_coeff, order='F') + if ijslice is None: + ijslice = (0, nmo, 0, nmo) + nislice = ijslice[1] - ijslice[0] + njslice = ijslice[3] - ijslice[2] + + kptijkl = _format_kpts(kpts) + eri_3d = [] + for LpqR, _, _ in self._scf.with_df.sr_loop(kptijkl[:2], max_memory=0.2 * max_memory, compact=False): + Lpq = None + Lpq = nr_e2(LpqR.reshape(-1, nao, nao), mo, ijslice, aosym='s1', mosym='s1', out=Lpq) + eri_3d.append(Lpq) + eri_3d = np.vstack(eri_3d).reshape(-1, nislice, njslice) + + return eri_3d + + def get_sigma_exchange(self, mo_coeff): + """Get exchange self-energy (EXX). + The exchange self-energy is calculated via PySCF functions with exxdiv=None, + then the finite-size correction for GW exchange self-energy is added if self.fc is True. + The finite-size correction is defined similar to k-point GW method, + as in equation 46 in doi.org/10.1021/acs.jctc.0c00704 + + Parameters + ---------- + mo_coeff : double 2d array + orbital coefficient + + Returns + ------- + vk : double 2d array + exchange self-energy + """ + dm = self._scf.make_rdm1() + if isinstance(self._scf.with_df, df.GDF): + rhf = scf.RHF(self.mol).density_fit() + elif isinstance(self._scf.with_df, df.RSDF): + rhf = scf.RHF(self.mol).rs_density_fit() + if hasattr(self._scf, 'sigma'): + rhf = scf.addons.smearing_(rhf, sigma=self._scf.sigma, method=self._scf.smearing_method) + rhf.exxdiv = None + rhf.with_df = self.with_df + vk = rhf.get_veff(self.mol, dm) - rhf.get_j(self.mol, dm) + vk = reduce(np.matmul, (mo_coeff.T, vk, mo_coeff)) + + if self.fc: + vk_corr = -2.0 / np.pi * (6.0 * np.pi**2 / self.mol.vol) ** (1.0 / 3.0) + for i in range(self.nocc): + vk[i, i] = vk[i, i] + vk_corr + return vk diff --git a/pyscf/pbc/gw/krgw_ac.py b/pyscf/pbc/gw/krgw_ac.py index c9a67b3216..5c828e611f 100644 --- a/pyscf/pbc/gw/krgw_ac.py +++ b/pyscf/pbc/gw/krgw_ac.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,250 +14,495 @@ # limitations under the License. # # Author: Tianyu Zhu +# Author: Christopher Hillenbrand +# Author: Jiachen Li # ''' -PBC spin-restricted G0W0-AC QP eigenvalues with k-point sampling -This implementation has N^4 scaling, and is faster than GW-CD (N^4) -and analytic GW (N^6) methods. +Periodic spin-restricted G0W0 method based on the analytic continuation scheme. +This implementation has N^4 scaling, +and is faster than GW-CD (N^4~N^5) and fully analytic GW (N^6) methods. GW-AC is recommended for valence states only, and is inaccurate for core states. -Method: - See T. Zhu and G.K.-L. Chan, arxiv:2007.03148 (2020) for details - Compute Sigma on imaginary frequency with density fitting, - then analytically continued to real frequency. - Gaussian density fitting must be used (FFTDF and MDF are not supported). +References: + T. Zhu and G.K.-L. Chan, J. Chem. Theory. Comput. 17, 727-741 (2021) + New J. Phys. 14 053020 (2012) ''' from functools import reduce -import numpy -import numpy as np import h5py -from scipy.optimize import newton, least_squares +import numpy as np +import scipy +import time + +import scipy.linalg from pyscf import lib -from pyscf.lib import logger from pyscf.ao2mo import _ao2mo from pyscf.ao2mo.incore import _conc_mos -from pyscf.pbc import df, dft, scf -from pyscf.pbc.mp.kmp2 import get_nocc, get_nmo, get_frozen_mask -from pyscf import __config__ +from pyscf.lib import einsum, logger, temporary_env +from pyscf.pbc import df, dft +from pyscf.pbc.mp.kmp2 import get_frozen_mask -einsum = lib.einsum +from pyscf.gw.utils.ac_grid import _get_scaled_legendre_roots, PadeAC, TwoPoleAC +from pyscf.gw.utils.gw_np_helper import mkslice, array_scale -def kernel(gw, mo_energy, mo_coeff, orbs=None, - kptlist=None, nw=None, verbose=logger.NOTE): - '''GW-corrected quasiparticle orbital energies - Returns: - A list : converged, mo_energy, mo_coeff - ''' +def kernel(gw): mf = gw._scf - assert gw.frozen is None + nocc = gw.nocc + nmo = gw.nmo + nkpts = gw.nkpts - if orbs is None: - orbs = range(gw.nmo) + # set frozen orbitals + gw.set_frozen_orbs() + orbs = gw.orbs + orbs_frz = gw.orbs_frz + kptlist = gw.kptlist if kptlist is None: - kptlist = range(gw.nkpts) - nkpts = gw.nkpts - nklist = len(kptlist) + gw.kptlist = kptlist = range(gw.nkpts) + mo_energy_frz = _mo_energy_frozen(gw, gw.mo_energy) + mo_coeff_frz = _mo_frozen(gw, gw.mo_coeff) # v_xc - dm = np.array(mf.make_rdm1()) - v_mf = np.array(mf.get_veff()) - np.array(mf.get_j(dm_kpts=dm)) + with temporary_env(mf, verbose=0), temporary_env(mf.mol, verbose=0), temporary_env(mf.with_df, verbose=0): + dm = mf.make_rdm1() + v_mf_ao = mf.get_veff() - mf.get_j(dm_kpts=dm) + v_mf = np.zeros(shape=[nkpts, nmo, nmo], dtype=np.complex128) for k in range(nkpts): - v_mf[k] = reduce(numpy.dot, (mo_coeff[k].T.conj(), v_mf[k], mo_coeff[k])) - - nocc = gw.nocc - nmo = gw.nmo + v_mf[k] = reduce(np.matmul, (mo_coeff_frz[k].T.conj(), v_mf_ao[k], mo_coeff_frz[k])) + gw.vxc = v_mf # v_hf from DFT/HF density + vk = gw.get_sigma_exchange() + + # finite size correction for exchange self-energy if gw.fc: - exxdiv = 'ewald' + vk_corr = -2.0 / np.pi * (6.0 * np.pi**2 / gw.mol.vol / nkpts) ** (1.0 / 3.0) + for k in range(nkpts): + for i in range(nocc): + vk[k][i, i] = vk[k][i, i] + vk_corr + gw.vk = vk + + # set up Fermi level + gw.ef = ef = get_ef(kmf=mf, mo_energy=mf.mo_energy) + + # grids for integration on imaginary axis + gw.freqs, gw.wts = freqs, wts = _get_scaled_legendre_roots(gw.nw) + + # calculate self-energy on imaginary axis + if gw.outcore: + sigmaI, omega = get_sigma_outcore( + gw, freqs, wts, ef=ef, mo_energy=mo_energy_frz, orbs=orbs_frz, kptlist=kptlist, iw_cutoff=gw.ac_iw_cutoff, + fullsigma=gw.fullsigma, + ) else: - exxdiv = None - rhf = scf.KRHF(gw.mol, gw.kpts, exxdiv=exxdiv) - rhf.with_df = gw.with_df - if getattr(gw.with_df, '_cderi', None) is None: - raise RuntimeError('Found incompatible integral scheme %s.' - 'KGWAC can be only used with GDF integrals' % - gw.with_df.__class__) - if rhf.with_df._j_only: - logger.debug(gw, 'Rebuild CDERI for exchange integrals') - rhf.with_df.build(j_only=False) - - vk = rhf.get_veff(gw.mol,dm_kpts=dm) - rhf.get_j(gw.mol,dm_kpts=dm) - for k in range(nkpts): - vk[k] = reduce(numpy.dot, (mo_coeff[k].T.conj(), vk[k], mo_coeff[k])) - - # Grids for integration on imaginary axis - freqs,wts = _get_scaled_legendre_roots(nw) + sigmaI, omega = get_sigma( + gw, freqs, wts, ef=ef, mo_energy=mo_energy_frz, orbs=orbs_frz, kptlist=kptlist, iw_cutoff=gw.ac_iw_cutoff, + fullsigma=gw.fullsigma, + ) - # Compute self-energy on imaginary axis i*[0,iw_cutoff] - sigmaI, omega = get_sigma_diag(gw, orbs, kptlist, freqs, wts, iw_cutoff=5.) - - # Analytic continuation - coeff = [] + # analytic continuation if gw.ac == 'twopole': - for k in range(nklist): - coeff.append(AC_twopole_diag(sigmaI[k], omega, orbs, nocc)) + acobj = TwoPoleAC(list(range(nmo)), nocc) elif gw.ac == 'pade': - for k in range(nklist): - coeff_tmp, omega_fit = AC_pade_thiele_diag(sigmaI[k], omega) - coeff.append(coeff_tmp) - coeff = np.array(coeff) - - conv = True - # This code does not support metals - homo = -99. - lumo = 99. - for k in range(nkpts): - if homo < mf.mo_energy[k][nocc-1]: - homo = mf.mo_energy[k][nocc-1] - if lumo > mf.mo_energy[k][nocc]: - lumo = mf.mo_energy[k][nocc] - ef = (homo+lumo)/2. - - mo_energy = np.zeros_like(np.array(mf.mo_energy)) - for k in range(nklist): - kn = kptlist[k] - for p in orbs: - if gw.linearized: + acobj = PadeAC(npts=gw.ac_pade_npts, step_ratio=gw.ac_pade_step_ratio) + else: + raise ValueError('Unknown GW-AC type %s' % (str(gw.ac))) + + acobj.ac_fit(sigmaI, omega, axis=-1) + + if gw.fullsigma: + diag_acobj = acobj.diagonal(axis1=1, axis2=2) + else: + diag_acobj = acobj + + mo_energy = np.zeros_like(mf.mo_energy) + for ik, k in enumerate(kptlist): + for ip, p in enumerate(orbs_frz): + if gw.qpe_linearized: # linearized G0W0 de = 1e-6 - ep = mf.mo_energy[kn][p] - #TODO: analytic sigma derivative - if gw.ac == 'twopole': - sigmaR = two_pole(ep-ef, coeff[k,:,p-orbs[0]]).real - dsigma = two_pole(ep-ef+de, coeff[k,:,p-orbs[0]]).real - sigmaR.real - elif gw.ac == 'pade': - sigmaR = pade_thiele(ep-ef, omega_fit[p-orbs[0]], coeff[k,:,p-orbs[0]]).real - dsigma = pade_thiele(ep-ef+de, omega_fit[p-orbs[0]], coeff[k,:,p-orbs[0]]).real - sigmaR.real - zn = 1.0/(1.0-dsigma/de) - e = ep + zn*(sigmaR.real + vk[kn,p,p].real - v_mf[kn,p,p].real) - mo_energy[kn,p] = e + ep = mf.mo_energy[k][orbs[ip]] + sigmaR = diag_acobj[ik, ip].ac_eval(ep).real + dsigma = diag_acobj[ik, ip].ac_eval(ep + de).real - sigmaR.real + zn = 1.0 / (1.0 - dsigma / de) + if gw.qpe_linearized_range is not None: + zn = 1.0 if zn < gw.qpe_linearized_range[0] or zn > gw.qpe_linearized_range[1] else zn + mo_energy[k, orbs[ip]] = ep + zn * (sigmaR + vk[k, p, p] - v_mf[k, p, p]).real else: # self-consistently solve QP equation def quasiparticle(omega): - if gw.ac == 'twopole': - sigmaR = two_pole(omega-ef, coeff[k,:,p-orbs[0]]).real - elif gw.ac == 'pade': - sigmaR = pade_thiele(omega-ef, omega_fit[p-orbs[0]], coeff[k,:,p-orbs[0]]).real - return omega - mf.mo_energy[kn][p] - (sigmaR.real + vk[kn,p,p].real - v_mf[kn,p,p].real) + sigmaR = diag_acobj[ik, ip].ac_eval(omega) + return omega - mf.mo_energy[k][orbs[ip]] - (sigmaR + vk[k, p, p] - v_mf[k, p, p]).real + try: - e = newton(quasiparticle, mf.mo_energy[kn][p], tol=1e-6, maxiter=100) - mo_energy[kn,p] = e + mo_energy[k, orbs[ip]] = scipy.optimize.newton( + quasiparticle, mf.mo_energy[k][orbs[ip]], tol=gw.qpe_tol, maxiter=gw.qpe_max_iter + ) except RuntimeError: - conv = False - mo_coeff = mf.mo_coeff + logger.warn(gw, 'QPE for k=%d orbital=%d not converged!', k, orbs[ip]) - if gw.verbose >= logger.DEBUG: - numpy.set_printoptions(threshold=nmo) + # save GW results + gw.mo_energy = mo_energy + gw.acobj = acobj + + with np.printoptions(threshold=len(mf.mo_energy[0])): for k in range(nkpts): - logger.debug(gw, ' GW mo_energy @ k%d =\n%s', k,mo_energy[k]) - numpy.set_printoptions(threshold=1000) + logger.debug(gw, ' GW mo_energy @ k%d =\n%s', k, mo_energy[k]) + logger.warn(gw, 'GW QP energies may not be sorted from min to max') + + if gw.writefile > 0: + with h5py.File('vxc.h5', 'w') as feri: + feri['vk'] = np.asarray(vk) + feri['v_mf'] = np.asarray(v_mf) + + with h5py.File('sigma_imag.h5', 'w') as feri: + feri['sigmaI'] = np.asarray(sigmaI) + feri['omega'] = np.asarray(omega) + if gw.sigmaI is not None: + feri['sigmaI_full'] = np.asarray(gw.sigmaI) + + acobj.save('ac_coeff.h5') + + return + + +def get_rho_response(omega, mo_energy, Lia, kidx): + """Get Pi=PV. + P is density-density response function. + V is two-electron integral. + See equation 24 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + real position of imaginary frequency + mo_energy : double 2d array + orbital energy + Lia : complex 4d ndarray + occupied-virtual block of three-center density-fitting matrix in MO + kidx : list + momentum-conserved k-point list kj=kidx[ki] + + Returns + ------- + Pi : complex ndarray + Pi in auxiliary basis at freq iw + """ + nkpts, naux, nocc, nvir = Lia.shape - return conv, mo_energy, mo_coeff + # Compute Pi for kL + Pi = np.zeros(shape=[naux, naux], dtype=np.complex128) + for i in range(nkpts): + # Find ka that conserves with ki and kL (-ki+ka+kL=G) + a = kidx[i] + eia = mo_energy[i, :nocc, None] - mo_energy[a, None, nocc:] + Lia_i = Lia[i] + eia = eia / (omega**2 + eia**2) + Pia = Lia_i * eia + # Response from both spin-up and spin-down density + # Pi += (4./nkpts) * einsum('Pia,Qia->PQ', Pia, Lov.conj()) + scipy.linalg.blas.zgemm( + alpha=4.0 / nkpts, + a=Lia_i.reshape(naux, nocc * nvir).T, + b=Pia.reshape(naux, nocc * nvir).T, + c=Pi.T, + trans_a=2, + trans_b=0, + beta=1.0, + overwrite_c=True, + ) + Pia = Lia_i = None + + return Pi -def get_rho_response(gw, omega, mo_energy, Lpq, kL, kidx): - ''' - Compute density response function in auxiliary basis at freq iw - ''' - nkpts, naux, nmo, nmo = Lpq.shape - nocc = gw.nocc - kpts = gw.kpts - kscaled = gw.mol.get_scaled_kpts(kpts) - kscaled -= kscaled[0] + +def get_rho_response_metal(omega, mo_energy, mo_occ, Lpq, kidx): + """Get Pi=PV for metallic systems. + P is density-density response function. + V is two-electron integral. + See equation 24 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + real position of imaginary frequency + mo_energy : double ndarray + orbital energy + mo_occ : double ndarray + occupation number + Lpq : complex ndarray + three-center density-fitting matrix in MO + kidx : list + momentum-conserved k-point list kj=kidx[ki] + + Returns + ------- + Pi : complex ndarray + Pi in auxiliary basis at freq iw + """ + nkpts, naux, nmo, _ = Lpq.shape + mo_occ = [x / 2.0 for x in mo_occ] # Compute Pi for kL - Pi = np.zeros((naux,naux),dtype=np.complex128) - for i, kpti in enumerate(kpts): + Pi = np.zeros(shape=[naux, naux], dtype=np.complex128) + for i in range(nkpts): # Find ka that conserves with ki and kL (-ki+ka+kL=G) a = kidx[i] - eia = mo_energy[i,:nocc,None] - mo_energy[a,None,nocc:] - eia = eia/(omega**2+eia*eia) - Pia = einsum('Pia,ia->Pia',Lpq[i][:,:nocc,nocc:],eia) + eia = mo_energy[i, :, None] - mo_energy[a, None, :] + fia = mo_occ[i][:, None] - mo_occ[a][None, :] + Lia = np.ascontiguousarray(Lpq[i]) + eia = eia * fia / (omega**2 + eia**2) + Pia = Lia * eia # Response from both spin-up and spin-down density - Pi += 4./nkpts * einsum('Pia,Qia->PQ',Pia,Lpq[i][:,:nocc,nocc:].conj()) + # both ia and ai are included, this gives a factor of 2.0 + # Pi += (2./nkpts) * einsum('Pia,Qia->PQ', Pia, Lpq_i.conj()) + scipy.linalg.blas.zgemm( + alpha=2.0 / nkpts, + a=Lia.reshape(naux, nmo * nmo).T, + b=Pia.reshape(naux, nmo * nmo).T, + c=Pi.T, + trans_a=2, + trans_b=0, + beta=1.0, + overwrite_c=True, + ) + Pia = Lia = None + + return Pi + + +def get_rho_response_head(omega, mo_energy, qij): + """Compute head (G=0, G'=0) density response function in auxiliary basis at freq iw. + equation 48 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + frequency point + mo_energy : double ndarray + orbital energy + qij : complex ndarray + pair density matrix defined as equation 51 in 10.1021/acs.jctc.0c00704 + + Returns + ------- + Pi_00 : complex + head response function + """ + nkpts, nocc = qij.shape[:2] + + Pi_00 = 0j + for k in range(nkpts): + eia = mo_energy[k, :nocc, None] - mo_energy[k, None, nocc:] + eia = eia / (omega**2 + eia**2) + Pi_00 += 4.0 / nkpts * einsum('ia,ia->', eia, qij[k].conj() * qij[k]) + return Pi_00 + + +def get_rho_response_wing(omega, mo_energy, Lia, qij): + """Compute wing (G=P, G'=0) density response function in auxiliary basis at freq iw. + equation 48 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + frequency point + mo_energy : double 2d array + orbital energy + Lia : complex 4d array + occupied-virtual block of three-center density fitting matrix in MO + qij : complex ndarray + pair density matrix defined as equation 51 in 10.1021/acs.jctc.0c00704 + + Returns + ------- + Pi : complex ndarray + wing response function + """ + nkpts, naux, nocc, nvir = Lia.shape + + Pi = np.zeros(shape=[naux], dtype=np.complex128) + for k in range(nkpts): + eia = mo_energy[k, :nocc, None] - mo_energy[k, None, nocc:] + eia = eia / (omega**2 + eia**2) + eia_q = eia * qij[k].conj() + Pi += 4.0 / nkpts * np.matmul(Lia[k].reshape(naux, nocc * nvir), eia_q.reshape(nocc * nvir)) return Pi -def get_sigma_diag(gw, orbs, kptlist, freqs, wts, iw_cutoff=None, max_memory=8000): - ''' - Compute GW correlation self-energy (diagonal elements) - in MO basis on imaginary axis - ''' - mo_energy = np.array(gw._scf.mo_energy) - mo_coeff = np.array(gw._scf.mo_coeff) + +def get_qij(gw, q, mo_energy, mo_coeff, uniform_grids=False): + """Compute pair density matrix in the long-wavelength limit through kp perturbation theory + qij = 1/Omega * |< psi_{ik} | e^{iqr} | psi_{ak-q} >|^2 + equation 51 in 10.1021/acs.jctc.0c00704 + Ref: Phys. Rev. B 83, 245122 (2011) + + Parameters + ---------- + gw : KRGWAC + gw object, provides attributes: nocc, nmo, kpts, mol + q : double + q grid + mo_energy : double ndarray + orbital energy + mo_coeff : complex ndarray + coefficient from AO to MO + uniform_grids : bool, optional + use uniform grids, by default False + + Returns + ------- + qij : complex ndarray + pair density matrix in the long-wavelength limit + """ + nocc = gw.nocc + nmo = gw.nmo + nvir = nmo - nocc + kpts = gw.kpts + nkpts = len(kpts) + cell = gw.mol + + if uniform_grids: + with temporary_env(cell, verbose=0): + mydf = df.FFTDF(cell, kpts=kpts) + coords = cell.gen_uniform_grids(mydf.mesh) + else: + with temporary_env(cell, verbose=0): + coords, weights = dft.gen_grid.get_becke_grids(cell, level=4) + ngrid = len(coords) + + qij = np.zeros(shape=[nkpts, nocc, nvir], dtype=np.complex128) + for i, kpti in enumerate(kpts): + ao_p = dft.numint.eval_ao(cell, coords, kpt=kpti, deriv=1) + ao = ao_p[0] + ao_grad = ao_p[1:4] + if uniform_grids: + ao_ao_grad = einsum('mg,xgn->xmn', ao.T.conj(), ao_grad) * cell.vol / ngrid + else: + ao_ao_grad = einsum('g,mg,xgn->xmn', weights, ao.T.conj(), ao_grad) + q_ao_ao_grad = -1j * einsum('x,xmn->mn', q, ao_ao_grad) + q_mo_mo_grad = reduce(np.matmul, (mo_coeff[i][:, :nocc].T.conj(), q_ao_ao_grad, mo_coeff[i][:, nocc:])) + enm = 1.0 / (mo_energy[i][nocc:, None] - mo_energy[i][None, :nocc]) + dens = enm.T * q_mo_mo_grad + qij[i] = dens / np.sqrt(cell.vol) + + return qij + + +def get_sigma( + gw, freqs, wts, ef, mo_energy, orbs=None, kptlist=None, mo_coeff=None, mo_occ=None, iw_cutoff=None, fullsigma=False +): + """Get GW self-energy. + See equation 27 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + gw : KRGWAC + GW objects, + provides attributes: _scf, mol, frozen, nmo, nocc, kpts, nkpts, mo_coeff, mo_occ, fc, fc_grid, with_df + freqs : double array + position of imaginary frequency + wts : double array + weight of frequency points + ef : double + Fermi level + mo_energy : double ndarray + non-frozen orbital energy + orbs : list, optional + orbital index in non-frozen nmo to calculate self-energy, by default None + kptlist : list, optional + k-point index to calculate self-energy, by default None + mo_coeff : complex ndarray, optional + coefficient from AO to non-frozen MO, by default None + mo_occ : double ndarray, optional + non-frozen occupation number, by default None + iw_cutoff : complex, optional + imaginary grid cutoff for fitting, by default None + fullsigma : bool, optional + calculate off-diagonal elements, by default False + + Returns + ------- + sigma: complex ndarray + self-energy on the imaginary axis + omega: complex ndarray + imaginary frequency grids of self-energy + """ nocc = gw.nocc nmo = gw.nmo nkpts = gw.nkpts kpts = gw.kpts + + if orbs is None: + orbs = list(range(nmo)) + if kptlist is None: + kptlist = list(range(nkpts)) + norbs = len(orbs) nklist = len(kptlist) nw = len(freqs) - norbs = len(orbs) - mydf = gw.with_df + + if mo_coeff is None: + mo_coeff = _mo_frozen(gw, gw.mo_coeff) + if mo_occ is None: + mo_occ = _mo_occ_frozen(gw, gw.mo_occ) + nao = mo_coeff[0].shape[0] # possible kpts shift center kscaled = gw.mol.get_scaled_kpts(kpts) kscaled -= kscaled[0] - # This code does not support metals - homo = -99. - lumo = 99. - for k in range(nkpts): - if homo < mo_energy[k][nocc-1]: - homo = mo_energy[k][nocc-1] - if lumo > mo_energy[k][nocc]: - lumo = mo_energy[k][nocc] - if (lumo-homo)<1e-3: - logger.warn(gw, 'This GW-AC code is not supporting metals!') - ef = (homo+lumo)/2. - # Integration on numerical grids - if iw_cutoff is not None: + if iw_cutoff is not None and gw.rdm is False: nw_sigma = sum(iw < iw_cutoff for iw in freqs) + 1 else: nw_sigma = nw + 1 - # Compute occ for -iw and vir for iw separately - # to avoid branch cuts in analytic continuation - omega_occ = np.zeros((nw_sigma), dtype=np.complex128) - omega_vir = np.zeros((nw_sigma), dtype=np.complex128) - omega_occ[1:] = -1j*freqs[:(nw_sigma-1)] - omega_vir[1:] = 1j*freqs[:(nw_sigma-1)] - orbs_occ = [i for i in orbs if i < nocc] - norbs_occ = len(orbs_occ) - - emo_occ = np.zeros((nkpts,nmo,nw_sigma),dtype=np.complex128) - emo_vir = np.zeros((nkpts,nmo,nw_sigma),dtype=np.complex128) - for k in range(nkpts): - emo_occ[k] = omega_occ[None,:] + ef - mo_energy[k][:,None] - emo_vir[k] = omega_vir[None,:] + ef - mo_energy[k][:,None] - - sigma = np.zeros((nklist,norbs,nw_sigma),dtype=np.complex128) - omega = np.zeros((norbs,nw_sigma),dtype=np.complex128) - for p in range(norbs): - orbp = orbs[p] - if orbp < nocc: - omega[p] = omega_occ.copy() - else: - omega[p] = omega_vir.copy() + omega = np.zeros(shape=[nw_sigma], dtype=np.complex128) + omega[1:] = 1j * freqs[: (nw_sigma - 1)] + ef + emo = omega[None, None, :] - mo_energy[:, :, None] + if fullsigma is False: + sigma = np.zeros(shape=[nklist, norbs, nw_sigma], dtype=np.complex128) + else: + sigma = np.zeros(shape=[nklist, norbs, norbs, nw_sigma], dtype=np.complex128) if gw.fc: # Set up q mesh for q->0 finite size correction - q_pts = np.array([1e-3,0,0]).reshape(1,3) + if not gw.fc_grid: + q_pts = np.array([1e-3, 0, 0], dtype=np.double).reshape(1, 3) + else: + Nq = 3 + q_pts = np.zeros(shape=[Nq**3 - 1, 3], dtype=np.double) + for i in range(Nq): + for j in range(Nq): + for k in range(Nq): + if i == 0 and j == 0 and k == 0: + continue + else: + q_pts[i * Nq**2 + j * Nq + k - 1, 0] = k * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 1] = j * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 2] = i * 5e-4 + nq_pts = len(q_pts) q_abs = gw.mol.get_abs_kpts(q_pts) # Get qij = 1/sqrt(Omega) * < psi_{ik} | e^{iqr} | psi_{ak-q} > at q: (nkpts, nocc, nvir) - qij = get_qij(gw, q_abs[0], mo_coeff) + qij = np.zeros(shape=[nq_pts, nkpts, nocc, nmo - nocc], dtype=np.complex128) + + if not gw.fc_grid: + for k in range(nq_pts): + qij[k] = get_qij(gw, q_abs[k], mo_energy, mo_coeff) + else: + for k in range(nq_pts): + qij[k] = get_qij(gw, q_abs[k], mo_energy, mo_coeff) + cderiarr = gw.with_df.cderi_array() for kL in range(nkpts): # Lij: (ki, L, i, j) for looping every kL Lij = [] # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) - kidx = np.zeros((nkpts),dtype=np.int64) - kidx_r = np.zeros((nkpts),dtype=np.int64) + kidx = np.zeros(shape=[nkpts], dtype=np.int64) + kidx_r = np.zeros(shape=[nkpts], dtype=np.int64) for i, kpti in enumerate(kpts): for j, kptj in enumerate(kpts): # Find (ki,kj) that satisfies momentum conservation with kL @@ -266,313 +511,852 @@ def get_sigma_diag(gw, orbs, kptlist, freqs, wts, iw_cutoff=None, max_memory=800 if is_kconserv: kidx[i] = j kidx_r[j] = i - logger.debug(gw, "Read Lpq (kL: %s / %s, ki: %s, kj: %s)"%(kL+1, nkpts, i, j)) + logger.debug(gw, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s)' % (kL + 1, nkpts, i, j)) Lij_out = None # Read (L|pq) and ao2mo transform to (L|ij) - Lpq = [] - for LpqR, LpqI, sign \ - in mydf.sr_loop([kpti, kptj], max_memory=0.1*gw._scf.max_memory, compact=False): - Lpq.append(LpqR+LpqI*1.0j) # support unequal naux on different k points - Lpq = np.vstack(Lpq).reshape(-1,nmo**2) - tao = [] - ao_loc = None + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nao*(nao+1))//2: + Lpq = lib.unpack_tril(Lpq).reshape(-1,nao**2) + else: + Lpq = Lpq.reshape(-1,nao**2) + Lpq = Lpq.astype(np.complex128) + moij, ijslice = _conc_mos(mo_coeff[i], mo_coeff[j])[2:] - Lij_out = _ao2mo.r_e2(Lpq, moij, ijslice, tao, ao_loc, out=Lij_out) - Lij.append(Lij_out.reshape(-1,nmo,nmo)) - Lij = np.asarray(Lij) + Lij_out = _ao2mo.r_e2(Lpq, moij, ijslice, tao=[], ao_loc=None, out=Lij_out) + Lij.append(Lij_out.reshape(-1, nmo, nmo)) + Lij = np.ascontiguousarray(Lij) naux = Lij.shape[1] - if kL == 0: - for w in range(nw): - # body dielectric matrix eps_body - Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) - eps_body_inv = np.linalg.inv(np.eye(naux)-Pi) - - if gw.fc: - # head dielectric matrix eps_00 - Pi_00 = get_rho_response_head(gw, freqs[w], mo_energy, qij) - eps_00 = 1. - 4. * np.pi/np.linalg.norm(q_abs[0])**2 * Pi_00 + if hasattr(gw._scf, 'sigma') is False: + Lia = np.ascontiguousarray(Lij[:, :, :nocc, nocc:]) - # wings dielectric matrix eps_P0 - Pi_P0 = get_rho_response_wing(gw, freqs[w], mo_energy, Lij, qij) - eps_P0 = -np.sqrt(4.*np.pi) / np.linalg.norm(q_abs[0]) * Pi_P0 + # allocate intermediates + naux_ones = np.ones(shape=[1, naux], dtype=np.complex128) + mnQ = np.zeros(shape=[nmo * norbs, naux], dtype=np.complex128) + if fullsigma is False: + Qmn = np.zeros(shape=[naux, nmo * norbs], dtype=np.complex128) + Wmn = np.zeros(shape=[nmo, norbs], dtype=np.complex128) + else: + Wmn = np.zeros(shape=[nmo, norbs, norbs], dtype=np.complex128) + Lij_kmQn = np.ascontiguousarray(Lij.transpose(0, 2, 1, 3)) - # inverse dielectric matrix - eps_inv_00 = 1./(eps_00 - np.dot(np.dot(eps_P0.conj(),eps_body_inv),eps_P0)) - eps_inv_P0 = -eps_inv_00 * np.dot(eps_body_inv, eps_P0) + for w in range(nw): + if hasattr(gw._scf, 'sigma'): + Pi = get_rho_response_metal(freqs[w], mo_energy, mo_occ, Lij, kidx) + else: + Pi = get_rho_response(freqs[w], mo_energy, Lia, kidx) + Pi_inv = np.linalg.inv(np.eye(naux) - Pi) - # head correction - Del_00 = 2./np.pi * (6.*np.pi**2/gw.mol.vol/nkpts)**(1./3.) * (eps_inv_00 - 1.) + if gw.fc and kL == 0: + eps_inv_00 = 0j + eps_inv_P0 = np.zeros(shape=[naux], dtype=np.complex128) + for iq in range(nq_pts): + # head dielectric matrix eps_00, equation 47 in 10.1021/acs.jctc.0c00704 + Pi_00 = get_rho_response_head(freqs[w], mo_energy, qij[iq]) + eps_00 = 1.0 - 4.0 * np.pi / np.linalg.norm(q_abs[iq]) ** 2.0 * Pi_00 - eps_inv_PQ = eps_body_inv - g0_occ = wts[w] * emo_occ / (emo_occ**2+freqs[w]**2) - g0_vir = wts[w] * emo_vir / (emo_vir**2+freqs[w]**2) + # wings dielectric matrix eps_P0, equation 48 in 10.1021/acs.jctc.0c00704 + Pi_P0 = get_rho_response_wing(freqs[w], mo_energy, Lia, qij[iq]) + eps_P0 = -np.sqrt(4.0 * np.pi) / np.linalg.norm(q_abs[iq]) * Pi_P0 - for k in range(nklist): - kn = kptlist[k] - # Find km that conserves with kn and kL (-km+kn+kL=G) - km = kidx_r[kn] - Qmn = einsum('Pmn,PQ->Qmn',Lij[km][:,:,orbs].conj(),eps_inv_PQ-np.eye(naux)) - Wmn = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn,Lij[km][:,:,orbs]) - sigma[k][:norbs_occ] += -einsum('mn,mw->nw',Wmn[:,:norbs_occ],g0_occ[km])/np.pi - sigma[k][norbs_occ:] += -einsum('mn,mw->nw',Wmn[:,norbs_occ:],g0_vir[km])/np.pi - - if gw.fc: - # apply head correction - assert (kn == km) - sigma[k][:norbs_occ] += -Del_00 * g0_occ[kn][orbs][:norbs_occ] /np.pi - sigma[k][norbs_occ:] += -Del_00 * g0_vir[kn][orbs][norbs_occ:] /np.pi - - # apply wing correction - Wn_P0 = einsum('Pnm,P->nm',Lij[kn],eps_inv_P0).diagonal() - Wn_P0 = Wn_P0.real * 2. - Del_P0 = np.sqrt(gw.mol.vol/4./np.pi**3) * (6.*np.pi**2/gw.mol.vol/nkpts)**(2./3.) * Wn_P0[orbs] - sigma[k][:norbs_occ] += -einsum('n,nw->nw', Del_P0[:norbs_occ], - g0_occ[kn][orbs][:norbs_occ]) /np.pi - sigma[k][norbs_occ:] += -einsum('n,nw->nw', Del_P0[norbs_occ:], - g0_vir[kn][orbs][norbs_occ:]) /np.pi - else: - for w in range(nw): - Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) - Pi_inv = np.linalg.inv(np.eye(naux)-Pi)-np.eye(naux) - g0_occ = wts[w] * emo_occ / (emo_occ**2+freqs[w]**2) - g0_vir = wts[w] * emo_vir / (emo_vir**2+freqs[w]**2) - for k in range(nklist): - kn = kptlist[k] + # inverse dielectric matrix + # equation 53 in 10.1021/acs.jctc.0c00704 + eps_inv_00 += 1.0 / nq_pts * 1.0 / (eps_00 - reduce(np.matmul, (eps_P0.conj(), Pi_inv, eps_P0))) + # equation 54 in 10.1021/acs.jctc.0c00704 + eps_inv_P0 += 1.0 / nq_pts * (-eps_inv_00) * np.matmul(Pi_inv, eps_P0) + + # head correction, equation 43 in 10.1021/acs.jctc.0c00704 + Del_00 = 2.0 / np.pi * (6.0 * np.pi**2 / gw.mol.vol / nkpts) ** (1.0 / 3.0) * (eps_inv_00 - 1.0) + + Pi_inv -= np.eye(naux) + g0 = wts[w] * emo / (emo**2 + freqs[w] ** 2) + for k in range(nklist): + kn = kptlist[k] + # Find km that conserves with kn and kL (-km+kn+kL=G) + km = kidx_r[kn] + + + if len(orbs) == nmo: + l_slice = np.ascontiguousarray(Lij[km].reshape(naux, -1)) + if fullsigma: + l_slice_mQn = np.ascontiguousarray(Lij_kmQn[km]) + else: + l_slice = np.ascontiguousarray(Lij[km, :, :, mkslice(orbs)].reshape(naux, -1)) + if fullsigma: + l_slice_mQn = np.ascontiguousarray(Lij_kmQn[km, :, :, mkslice(orbs)]) + + # Qmn = einsum('Pmn,PQ->Qmn', Lij[km][:, :, orbs].conj(), Pi_inv) + scipy.linalg.blas.zgemm(alpha=1.0, a=Pi_inv.T, b=l_slice.T, c=mnQ.T, overwrite_c=1, trans_b=2) + + if fullsigma is False: + # Wmn = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn,Lij[km][:,:,orbs]) + Qmn[:] = mnQ.T * l_slice + np.matmul(naux_ones, Qmn, out=Wmn.reshape(1, nmo * norbs)) + array_scale(Wmn, 1.0 / nkpts / np.pi) + + # sigma[k] += -einsum('mn,mw->nw',Wmn,g0[km]) / np.pi + # 1 / np.pi is included in Wmn above + sigma[k] -= np.matmul(Wmn.reshape(nmo, norbs).T, g0[km]) + else: + # for orbm in range(nmo): + # Wmn[orbm] = 1./nkpts * np.dot(Qmn[:,orbm,:].transpose(),Lij[km][:,orbm,orbs]) + #for m in range(nmo): + # np.matmul(Qmn[:, m, :].T, np.ascontiguousarray(Lij[km, :, m, mkslice(orbs)]), out=Wmn[m]) + np.matmul(mnQ.reshape(nmo, norbs, naux), l_slice_mQn, out=Wmn) + array_scale(Wmn, 1.0 / nkpts / np.pi) + + #Wmn = Wmn.reshape(nmo, norbs * norbs).T + # sigma[k] += -einsum('mnl,mw->nlw',Wmn,g0[km])/np.pi + # 1 / np.pi is included in Wmn above + sigma[k] -= np.matmul(Wmn.reshape(nmo, norbs * norbs).T, g0[km]).reshape(norbs, norbs, nw_sigma) + + if gw.fc and kL == 0: # Find km that conserves with kn and kL (-km+kn+kL=G) - km = kidx_r[kn] - Qmn = einsum('Pmn,PQ->Qmn',Lij[km][:,:,orbs].conj(),Pi_inv) - Wmn = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn,Lij[km][:,:,orbs]) - sigma[k][:norbs_occ] += -einsum('mn,mw->nw',Wmn[:,:norbs_occ],g0_occ[km])/np.pi - sigma[k][norbs_occ:] += -einsum('mn,mw->nw',Wmn[:,norbs_occ:],g0_vir[km])/np.pi + assert kn == km + if fullsigma is False: + # head correction + sigma[k] += -Del_00 * g0[kn][orbs] / np.pi + + # wing correction + Wn_P0 = einsum('Pnn,P->n', Lij[kn], eps_inv_P0) + Wn_P0 = Wn_P0[orbs].real * 2.0 + Del_P0 = np.sqrt(gw.mol.vol/4/np.pi**3) * (6*np.pi**2/gw.mol.vol/nkpts) ** (2/3) * Wn_P0 + sigma[k] += -einsum('n,nw->nw', Del_P0, g0[kn][orbs]) / np.pi + else: + # head correction + tmp = -Del_00 * g0[kn][orbs] / np.pi + sigma[k, np.arange(norbs), np.arange(norbs), :] += tmp + + # wing correction + Wn_P0 = einsum('Pnn,P->n', Lij[kn], eps_inv_P0) + Wn_P0 = Wn_P0[orbs].real * 2.0 + Del_P0 = np.sqrt(gw.mol.vol/4/np.pi**3) * (6*np.pi**2/gw.mol.vol/nkpts) ** (2/3) * Wn_P0 + tmp = -einsum('n,nw->nw', Del_P0, g0[kn][orbs]) / np.pi + sigma[k, np.arange(norbs), np.arange(norbs), :] += tmp + + if gw.rdm: + gw.sigmaI = sigma return sigma, omega -def get_rho_response_head(gw, omega, mo_energy, qij): - ''' - Compute head (G=0, G'=0) density response function in auxiliary basis at freq iw - ''' - nkpts, nocc, nvir = qij.shape + +def get_sigma_outcore( + gw, freqs, wts, ef, mo_energy, orbs=None, kptlist=None, mo_coeff=None, mo_occ=None, iw_cutoff=None, fullsigma=False +): + """Low-memory routine to get GW self-energy. + See equation 27 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + gw : KRGWAC + GW objects, + provides attributes: _scf, mol, frozen, nmo, nocc, kpts, nkpts, mo_coeff, mo_occ, fc, fc_grid, with_df + freqs : double array + position of imaginary frequency + wts : double array + weight of frequency points + ef : double + Fermi level + mo_energy : double ndarray + non-frozen orbital energy + orbs : list, optional + orbital index in non-frozen nmo to calculate self-energy, by default None + kptlist : list, optional + k-point index to calculate self-energy, by default None + mo_coeff : complex ndarray, optional + coefficient from AO to non-frozen MO, by default None + mo_occ : double ndarray, optional + non-frozen occupation number, by default None + iw_cutoff : complex, optional + imaginary grid cutoff for fitting, by default None + fullsigma : bool, optional + calculate off-diagonal elements, by default False + + Returns + ------- + sigma: complex ndarray + self-energy on the imaginary axis + omega: complex ndarray + imaginary frequency grids of self-energy + """ + assert gw.fc is False, "finite-size correction is not implemented in get_sigma_outcore" nocc = gw.nocc + nmo = gw.nmo + nkpts = gw.nkpts kpts = gw.kpts - # Compute Pi head - Pi_00 = 0j - for i, kpti in enumerate(kpts): - eia = mo_energy[i,:nocc,None] - mo_energy[i,None,nocc:] - eia = eia/(omega**2+eia*eia) - Pi_00 += 4./nkpts * einsum('ia,ia->',eia,qij[i].conj()*qij[i]) - return Pi_00 + if orbs is None: + orbs = list(range(nmo)) + if kptlist is None: + kptlist = list(range(nkpts)) + norbs = len(orbs) + nklist = len(kptlist) + nw = len(freqs) -def get_rho_response_wing(gw, omega, mo_energy, Lpq, qij): - ''' - Compute wing (G=P, G'=0) density response function in auxiliary basis at freq iw - ''' - nkpts, naux, nmo, nmo = Lpq.shape - nocc = gw.nocc - kpts = gw.kpts + if mo_coeff is None: + mo_coeff = _mo_frozen(gw, gw.mo_coeff) + if mo_occ is None: + mo_occ = _mo_occ_frozen(gw, gw.mo_occ) + nao = mo_coeff[0].shape[0] - # Compute Pi wing - Pi = np.zeros(naux,dtype=np.complex128) - for i, kpti in enumerate(kpts): - eia = mo_energy[i,:nocc,None] - mo_energy[i,None,nocc:] - eia = eia/(omega**2+eia*eia) - eia_q = eia * qij[i].conj() - Pi += 4./nkpts * einsum('Pia,ia->P',Lpq[i][:,:nocc,nocc:],eia_q) - return Pi + # possible kpts shift center + kscaled = gw.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] -def get_qij(gw, q, mo_coeff, uniform_grids=False): - ''' - Compute qij = 1/Omega * |< psi_{ik} | e^{iqr} | psi_{ak-q} >|^2 at q: (nkpts, nocc, nvir) - through kp perturbation theory - Ref: Phys. Rev. B 83, 245122 (2011) - ''' - nocc = gw.nocc - nmo = gw.nmo - nvir = nmo - nocc - kpts = gw.kpts - nkpts = len(kpts) - cell = gw.mol - mo_energy = gw._scf.mo_energy + # Integration on numerical grids + if iw_cutoff is not None and gw.rdm is False: + nw_sigma = sum(iw < iw_cutoff for iw in freqs) + 1 + else: + nw_sigma = nw + 1 - if uniform_grids: - mydf = df.FFTDF(cell, kpts=kpts) - coords = cell.gen_uniform_grids(mydf.mesh) + omega = np.zeros(shape=[nw_sigma], dtype=np.complex128) + omega[1:] = 1j * freqs[: (nw_sigma - 1)] + ef + emo = omega[None, None, :] - mo_energy[:, :, None] + + if fullsigma is False: + sigma = np.zeros(shape=[nklist, norbs, nw_sigma], dtype=np.complex128) else: - coords, weights = dft.gen_grid.get_becke_grids(cell,level=5) - ngrid = len(coords) + sigma = np.zeros(shape=[nklist, norbs, norbs, nw_sigma], dtype=np.complex128) - qij = np.zeros((nkpts,nocc,nvir),dtype=np.complex128) - for i, kpti in enumerate(kpts): - ao_p = dft.numint.eval_ao(cell, coords, kpt=kpti, deriv=1) - ao = ao_p[0] - ao_grad = ao_p[1:4] - if uniform_grids: - ao_ao_grad = einsum('mg,xgn->xmn',ao.T.conj(),ao_grad) * cell.vol / ngrid + cput0 = (time.process_time(), time.perf_counter()) + cderiarr = gw.with_df.cderi_array() + for kL in range(nkpts): + cput3 = (time.process_time(), time.perf_counter()) + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + kidx = np.zeros(shape=[nkpts], dtype=np.int64) + kidx_r = np.zeros(shape=[nkpts], dtype=np.int64) + + for i in range(nkpts): + for j in range(nkpts): + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + is_kconserv = np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 + if is_kconserv: + kidx[i] = j + kidx_r[j] = i + + # TODO: more efficient way to find naux without loading the whole array + Lpq_ao = cderiarr.load(kpts[0], kpts[kidx[0]]) + assert len(Lpq_ao.shape) == 2 + naux = Lpq_ao.shape[0] + + Pi = np.zeros(shape=[nw, naux, naux], dtype=np.complex128) + cput1 = (time.process_time(), time.perf_counter()) + for i in range(nkpts): + a = kidx[i] + logger.debug(gw, 'Pi (kL: %s / %s, ki: %s, kj: %s)' % (kL + 1, nkpts, a, kidx_r[a])) + Lpq_ao = cderiarr.load(kpts[i], kpts[a]) + if Lpq_ao.shape[-1] == (nao * (nao + 1)) // 2: + Lpq_ao = lib.unpack_tril(Lpq_ao).reshape(-1, nao**2) + else: + Lpq_ao = Lpq_ao.reshape(-1, nao**2) + Lpq_ao = Lpq_ao.astype(np.complex128) + + moij, ijslice = _conc_mos(mo_coeff[i], mo_coeff[a])[2:] + Lpq = None + Lpq = _ao2mo.r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lpq) + del Lpq_ao + Lpq = np.ascontiguousarray(Lpq.reshape(-1, nmo, nmo)) + + if hasattr(gw._scf, 'sigma'): + eia = mo_energy[i, :, None] - mo_energy[a, None, :] + fia = (mo_occ[i][:, None] - mo_occ[a][None, :]) / 2.0 + Lia = Lpq + for w in range(nw): + freqs_w = freqs[w] + eia_w = eia * fia / (freqs_w**2 + eia**2) + Pia = Lia * eia_w + # Response from both spin-up and spin-down density + # both ia and ai are included, this gives a factor of 2.0 + # Pi += (2./nkpts) * einsum('Pia,Qia->PQ', Pia, Lpq_i.conj()) + scipy.linalg.blas.zgemm( + alpha=2.0 / nkpts, + a=Lia.reshape(naux, nmo * nmo).T, + b=Pia.reshape(naux, nmo * nmo).T, + c=Pi[w].T, + trans_a=2, + trans_b=0, + beta=1.0, + overwrite_c=True, + ) + del eia_w, Pia + del eia, fia + else: + eia = mo_energy[i, :nocc, None] - mo_energy[a, None, nocc:] + Lia = np.ascontiguousarray(Lpq[:, :nocc, nocc:]) + nvir = Lia.shape[-1] + for w in range(nw): + freqs_w = freqs[w] + eia_w = eia / (freqs_w**2 + eia**2) + Pia = Lia * eia_w + # Response from both spin-up and spin-down density + # Pi += (4./nkpts) * einsum('Pia,Qia->PQ', Pia, Lov.conj()) + scipy.linalg.blas.zgemm( + alpha=4.0 / nkpts, + a=Lia.reshape(naux, nocc * nvir).T, + b=Pia.reshape(naux, nocc * nvir).T, + c=Pi[w].T, + trans_a=2, + trans_b=0, + beta=1.0, + overwrite_c=True, + ) + del eia_w, Pia + del eia + del Lpq, Lia + + logger.timer(gw, 'Calculate Pi for kL: %s / %s' % (kL + 1, nkpts), *cput1) + + for w in range(nw): + Pi[w] = np.linalg.inv(np.eye(naux) - Pi[w]) + Pi[w] -= np.eye(naux) + Pi_inv = Pi + + # allocate intermediates + naux_ones = np.ones(shape=[1, naux], dtype=np.complex128) + mnQ = np.zeros(shape=[nmo * norbs, naux], dtype=np.complex128) + if fullsigma is False: + Qmn = np.zeros(shape=[naux, nmo * norbs], dtype=np.complex128) + Wmn = np.zeros(shape=[nmo, norbs], dtype=np.complex128) else: - ao_ao_grad = einsum('g,mg,xgn->xmn',weights,ao.T.conj(),ao_grad) - q_ao_ao_grad = -1j * einsum('x,xmn->mn',q,ao_ao_grad) - q_mo_mo_grad = np.dot(np.dot(mo_coeff[i][:,:nocc].T.conj(), q_ao_ao_grad), mo_coeff[i][:,nocc:]) - enm = 1./(mo_energy[i][nocc:,None] - mo_energy[i][None,:nocc]) - dens = enm.T * q_mo_mo_grad - qij[i] = dens / np.sqrt(cell.vol) + Wmn = np.zeros(shape=[nmo, norbs, norbs], dtype=np.complex128) + #Lij_kmQn = np.ascontiguousarray(Lij.transpose(0, 2, 1, 3)) + + for kn in range(nklist): + # Find km that conserves with kn and kL (-km+kn+kL=G) + km = kidx_r[kn] + + cput2 = (time.process_time(), time.perf_counter()) + logger.debug(gw, 'sigma (kL: %s / %s, ki: %s, kj: %s)' % (kL + 1, nkpts, km, kn)) + Lpq_ao = cderiarr.load(kpts[km], kpts[kn]) + if Lpq_ao.shape[-1] == (nao * (nao + 1)) // 2: + Lpq_ao = lib.unpack_tril(Lpq_ao).reshape(-1,nao**2) + else: + Lpq_ao = Lpq_ao.reshape(-1,nao**2) + Lpq_ao = Lpq_ao.astype(np.complex128) + + Lpq = None + moij, ijslice = _conc_mos(mo_coeff[km], mo_coeff[kn])[2:] + Lpq = _ao2mo.r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lpq) + Lpq = np.ascontiguousarray(Lpq.reshape(-1, nmo, nmo)) + + if len(orbs) == nmo: + l_slice = np.ascontiguousarray(Lpq.reshape(naux, -1)) + if fullsigma: + l_slice_mQn = np.ascontiguousarray(Lpq.transpose(1, 0, 2)) + else: + l_slice = np.ascontiguousarray(Lpq[:, :, mkslice(orbs)].reshape(naux, -1)) + if fullsigma: + l_slice_mQn = np.ascontiguousarray(Lpq[:, :, mkslice(orbs)].transpose(1, 0, 2)) - return qij + for w in range(nw): + g0 = wts[w] * emo[km] / (emo[km]**2 + freqs[w] ** 2) + + # Qmn = einsum('Pmn,PQ->Qmn', Lij[km][:, :, orbs].conj(), Pi_inv) + scipy.linalg.blas.zgemm(alpha=1.0, a=Pi_inv[w].T, b=l_slice.T, c=mnQ.T, overwrite_c=1, trans_b=2) + + if fullsigma is False: + # Wmn = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn,Lij[km][:,:,orbs]) + Qmn[:] = mnQ.T * l_slice + np.matmul(naux_ones, Qmn, out=Wmn.reshape(1, nmo * norbs)) + array_scale(Wmn, 1.0 / nkpts / np.pi) + + # sigma[kn] += -einsum('mn,mw->nw',Wmn,g0[km]) / np.pi + # 1 / np.pi is included in Wmn above + sigma[kn] -= np.matmul(Wmn.reshape(nmo, norbs).T, g0) + else: + # for orbm in range(nmo): + # Wmn[orbm] = 1./nkpts * np.dot(Qmn[:,orbm,:].transpose(),Lij[km][:,orbm,orbs]) + #for m in range(nmo): + # np.matmul(Qmn[:, m, :].T, np.ascontiguousarray(Lij[km, :, m, mkslice(orbs)]), out=Wmn[m]) + np.matmul(mnQ.reshape(nmo, norbs, naux), l_slice_mQn, out=Wmn) + array_scale(Wmn, 1.0 / nkpts / np.pi) + + #Wmn = Wmn.reshape(nmo, norbs * norbs).T + # sigma[kn] += -einsum('mnl,mw->nlw',Wmn,g0[km])/np.pi + # 1 / np.pi is included in Wmn above + sigma[kn] -= np.matmul(Wmn.reshape(nmo, norbs * norbs).T, g0).reshape(norbs, norbs, nw_sigma) + + del Lpq, l_slice + if fullsigma: + del l_slice_mQn + logger.timer(gw, 'GW correlation self-energy for kL: %s / %s kn: %d' % (kL + 1, nkpts, kn), *cput2) + + del Pi, Pi_inv, mnQ, Wmn + if fullsigma is False: + del Qmn + logger.timer(gw, 'GW correlation self-energy for kL: %s / %s' % (kL + 1, nkpts), *cput3) + + if gw.rdm: + gw.sigmaI = sigma + + logger.timer(gw, 'GW correlation self-energy', *cput0) + + return sigma, omega + + +def get_sigma_exchange(gw, mo_coeff_full=None, mo_occ_full=None): + """Get exchange self-energy (EXX). -def _get_scaled_legendre_roots(nw): + Parameters + ---------- + gw : KRGWAC + gw object + mo_coeff : complex ndarray, optional + orbital coefficient, by default None + mo_occ : double ndarray, optional + occupation number, by default None + + Returns + ------- + vk : complex ndarray + exchange self-energy """ - Scale nw Legendre roots, which lie in the - interval [-1, 1], so that they lie in [0, inf) - Ref: www.cond-mat.de/events/correl19/manuscripts/ren.pdf + nmo = gw.nmo + nkpts = gw.nkpts + kpts = gw.kpts + + if mo_coeff_full is None: + mo_coeff_full = gw.mo_coeff + if mo_occ_full is None: + mo_occ_full = gw.mo_occ + nao = mo_coeff_full[0].shape[0] + nmo_full = nao + nocc_full = int(np.sum(gw._scf.mo_occ[0])) // 2 + + # possible kpts shift center + kscaled = gw.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + vk = np.zeros(shape=[nkpts, nmo_full, nmo_full], dtype=np.complex128) + cderiarr = gw.with_df.cderi_array() + for kL in range(nkpts): + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + kidx = np.zeros(shape=[nkpts], dtype=np.int64) + kidx_r = np.zeros(shape=[nkpts], dtype=np.int64) + for i in range(nkpts): + for j in range(nkpts): + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + is_kconserv = np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 + if is_kconserv: + kidx[i] = j + kidx_r[j] = i + + for kn in range(nkpts): + # kn is i + # Find km that conserves with kn and kL (-km+kn+kL=G) + km = kidx_r[kn] # km is j - Returns: - freqs : 1D ndarray - wts : 1D ndarray + # logger.debug(gw, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s @ Rank %d)' % (kL + 1, nkpts, i, j, rank)) + + # Read (L|pq) and ao2mo transform to (L|ij) + # support unequal naux on different k points + Lpq_ao = cderiarr.load(kpts[km], kpts[kn]) + if Lpq_ao.shape[-1] == (nao * (nao + 1)) // 2: + Lpq_ao = lib.unpack_tril(Lpq_ao).reshape(-1, nao**2) + else: + Lpq_ao = Lpq_ao.reshape(-1, nao**2) + Lpq_ao = Lpq_ao.astype(np.complex128) + + Lij = None + if hasattr(gw._scf, 'sigma'): + moij, ijslice = _conc_mos(mo_coeff_full[km], mo_coeff_full[kn])[2:] + Lij = _ao2mo.r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lij) + Lij = Lij.reshape(-1, nmo_full, nmo_full) + else: + moij, ijslice = _conc_mos(mo_coeff_full[km][:, :nocc_full], mo_coeff_full[kn])[2:] + Lij = _ao2mo.r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lij) + Lij = Lij.reshape(-1, nocc_full, nmo_full) + + if hasattr(gw._scf, 'sigma'): + # vk[k] -= 1.0 / nkpts * einsum('i,Lip,Liq->pq', mo_occ[km], Lij[km].conj(), Lij[km]) * 0.5 + Lij_occ = Lij * mo_occ_full[km][None, :, None] + scipy.linalg.blas.zgemm( + alpha=-0.5 / nkpts, + a=Lij_occ.reshape(-1, nmo_full).T, + b=Lij.reshape(-1, nmo_full).T, + c=vk[kn].T, + trans_a=0, + trans_b=2, + beta=1.0, + overwrite_c=True, + ) + else: + # vk[k] -= 1.0 / nkpts * einsum('Lip,Liq->pq', Lij[km].conj(), Lij[km]) + scipy.linalg.blas.zgemm( + alpha=-1.0 / nkpts, + a=Lij.reshape(-1, nmo_full).T, + b=Lij.reshape(-1, nmo_full).T, + c=vk[kn].T, + trans_a=0, + trans_b=2, + beta=1.0, + overwrite_c=True, + ) + + if nmo != nmo_full: + frozen_mask = get_frozen_mask(gw) + identity = np.eye(nmo_full, dtype=np.complex128) + vk_frz = np.zeros(shape=[nkpts, nmo, nmo], dtype=np.complex128) + for k in range(nkpts): + vk_frz[k] = identity[frozen_mask[k], :] @ vk[k] @ identity[:, frozen_mask[k]] + vk = vk_frz + + return vk + + +def get_ef(kmf, mo_energy): + """Get Fermi level. + For gapped systems, Fermi level is computed as the average between HOMO and LUMO. + For metallic systems, Fermi level is optmized according to mo_energy. + + Parameters + ---------- + kmf : pyscf.pbc.scf.rhf.RHF/pyscf.pbc.dft.rks.RKS + mean-field object, provides attributes: kpts, sigma, smearing_method + mo_energy : double array + orbital energy + + Returns + ------- + ef : double + Fermi level """ - freqs, wts = np.polynomial.legendre.leggauss(nw) - x0 = 0.5 - freqs_new = x0*(1.+freqs)/(1.-freqs) - wts = wts*2.*x0/(1.-freqs)**2 - return freqs_new, wts + if hasattr(kmf, "sigma"): + from pyscf.scf import addons as mol_addons -def _get_clenshaw_curtis_roots(nw): + if kmf.smearing_method.lower() == "fermi": + f_occ = mol_addons._fermi_smearing_occ + else: + f_occ = mol_addons._gaussian_smearing_occ + mo_energy_stack = np.hstack(np.asarray(mo_energy)) + nelectron = kmf.mol.tot_electrons(len(kmf.kpts)) + ef = mol_addons._smearing_optimize(f_occ, mo_energy_stack, (nelectron + 1) // 2, kmf.sigma)[0] + else: + nocc = int(kmf.cell.nelectron // 2) + homo = -99.0 + lumo = 99.0 + for k in range(len(kmf.kpts)): + if homo < mo_energy[k][nocc - 1]: + homo = mo_energy[k][nocc - 1] + if lumo > mo_energy[k][nocc]: + lumo = mo_energy[k][nocc] + ef = (homo + lumo) / 2.0 + return ef + + +def get_g0_k(omega, mo_energy, eta): + """Get non-interacting Green's function. + + Parameters + ---------- + omega : double or complex ndarray + frequency grids + mo_energy : double ndarray + orbital energy + eta : double + broadening parameter + + Returns + ------- + gf0 : complex ndarray + non-interacting Green's function """ - Clenshaw-Curtis quadrature on [0,inf) - Ref: J. Chem. Phys. 132, 234114 (2010) - Returns: - freqs : 1D ndarray - wts : 1D ndarray + nkpts = len(mo_energy) + nmo = len(mo_energy[0]) + nw = len(omega) + gf0 = np.zeros(shape=[nkpts, nmo, nmo, nw], dtype=np.complex128) + for k in range(nkpts): + for iw in range(nw): + gf0[k, :, :, iw] = np.diag(1.0 / (omega[iw] + 1j * eta - mo_energy[k])) + return gf0 + + +def make_gf(gw, omega, eta): + """Get dynamical Green's function and self-energy. + + Parameters + ---------- + gw : KRGWAC + GW object, provides attributes: orbs, kptlist, ef, ac_coeff, omega_fit, vk, vxc, _scf.mo_energy + omega : double or complex array + frequency grids + eta : double + broadening parameter + + Returns + ------- + gf : complex ndarray + GW Green's function + gf0 : complex ndarray + mean-field Green's function + sigma : complex ndarray + GW correlation self-energy """ - freqs = np.zeros(nw) - wts = np.zeros(nw) - a = 0.2 - for w in range(nw): - t = (w+1.0)/nw * np.pi/2. - freqs[w] = a / np.tan(t) - if w != nw-1: - wts[w] = a*np.pi/2./nw/(np.sin(t)**2) - else: - wts[w] = a*np.pi/4./nw/(np.sin(t)**2) - return freqs[::-1], wts[::-1] + assert gw.frozen is None or gw.frozen == 0 + + if eta is None: + eta = gw.eta + + nomega = len(omega) + sigma = np.zeros(shape=[gw.nkpts, gw.nmo, gw.nmo, nomega], dtype=np.complex128) + if gw.fullsigma: + for ik, k in enumerate(gw.kptlist): + for ip, p in enumerate(gw.orbs_frz): + for iq, q in enumerate(gw.orbs_frz): + sigma[k, p, q] = gw.acobj[ik, ip, iq].ac_eval(omega + 1j * eta) + gw.vk[k, p, q] - gw.vxc[k, p, q] + else: + for ik, k in enumerate(gw.kptlist): + for ip, p in enumerate(gw.orbs_frz): + sigma[k, p, p] = gw.acobj[ik, ip].ac_eval(omega + 1j * eta) + gw.vk[k, p, p] - gw.vxc[k, p, p] + + gf0 = get_g0_k(omega, gw._scf.mo_energy, eta) + gf = np.zeros_like(gf0) + for k in range(gw.nkpts): + for iw in range(nomega): + gf[k, :, :, iw] = np.linalg.inv(np.linalg.inv(gf0[k, :, :, iw]) - sigma[k, :, :, iw]) + + return gf, gf0, sigma + + +def make_rdm1_linear(gw, ao_repr=False): + """Get GW density matrix from Green's function G(it=0). + G is from linear Dyson equation, which conserves particle number + G = G0 + G0 Sigma G0 + See equation 16 in 10.1021/acs.jctc.0c01264 + + Parameters + ---------- + gw : KRGWAC + GW object, provides attributes: sigmaI, mol, _scf, freqs, wts, frozen, orbs, fc + ao_repr : bool, optional + return density matrix in AO, by default False + + Returns + ------- + rdm1 : double ndarray + density matrix + """ + assert gw.sigmaI is not None + assert gw.rdm is True and gw.fullsigma is True + assert gw.frozen is None or gw.frozen == 0 + sigmaI = gw.sigmaI[:, :, :, 1:] + freqs = 1j * gw.freqs + wts = gw.wts + nmo = gw.nmo + nkpts = gw.nkpts + if len(gw.orbs) != nmo: + sigma = np.zeros(shape=[nkpts, nmo, nmo, len(freqs)], dtype=sigmaI.dtype) + for k in range(nkpts): + for ia, a in enumerate(gw.orbs): + for ib, b in enumerate(gw.orbs): + sigma[k, a, b, :] = sigmaI[k, ia, ib, :] + else: + sigma = sigmaI -def two_pole_fit(coeff, omega, sigma): - cf = coeff[:5] + 1j*coeff[5:] - f = cf[0] + cf[1]/(omega+cf[3]) + cf[2]/(omega+cf[4]) - sigma - f[0] = f[0]/0.01 - return np.array([f.real,f.imag]).reshape(-1) + for iw in range(len(freqs)): + sigma[:, :, :, iw] += gw.vk - gw.vxc + gf0 = get_g0_k(freqs, np.array(gw._scf.mo_energy) - gw.ef, eta=0) + gf = np.array(gf0, copy=True) + for k in range(nkpts): + for iw in range(len(freqs)): + gf[k, :, :, iw] = reduce(np.matmul, (gf0[k, :, :, iw], sigma[k, :, :, iw], gf0[k, :, :, iw])) -def two_pole(freqs, coeff): - cf = coeff[:5] + 1j*coeff[5:] - return cf[0] + cf[1]/(freqs+cf[3]) + cf[2]/(freqs+cf[4]) + # GW density matrix + rdm1 = np.zeros(shape=[nkpts, nmo, nmo], dtype=np.double) + for k in range(nkpts): + rdm1[k] = 2.0 / np.pi * einsum('ijw,w->ij', gf[k], wts).real + np.eye(nmo) + logger.info(gw, 'GW particle number @ k%d = %s', k, np.trace(rdm1[k])) -def AC_twopole_diag(sigma, omega, orbs, nocc): - """ - Analytic continuation to real axis using a two-pole model - Returns: - coeff: 2D array (ncoeff, norbs) + # Symmetrize density matrix + for k in range(nkpts): + rdm1[k] = 0.5 * (rdm1[k] + rdm1[k].T) + + if ao_repr is True: + ovlp = gw._scf.get_ovlp() + for k in range(nkpts): + CS = np.matmul(ovlp, gw._scf.mo_coeff[k]) + rdm1[k] = reduce(np.matmul, (CS, rdm1[k], CS.conj().T)) + + return rdm1 + + +def _mo_energy_frozen(gw, mo_energy): + """Get non-frozen orbital energy. + + Parameters + ---------- + gw : KRGWAC + GW object, provides attributes: frozen, nmo, nkpt + mo_energy : double ndarray + full orbital energy + + Returns + ------- + mo_energy_frozen : double ndarray + non-frozen orbital energy """ - norbs, nw = sigma.shape - coeff = np.zeros((10,norbs)) - for p in range(norbs): - if orbs[p] < nocc: - x0 = np.array([0, 1, 1, 1, -1, 0, 0, 0, -1.0, -0.5]) - else: - x0 = np.array([0, 1, 1, 1, -1, 0, 0, 0, 1.0, 0.5]) - #TODO: analytic gradient - xopt = least_squares(two_pole_fit, x0, jac='3-point', method='trf', xtol=1e-10, - gtol = 1e-10, max_nfev=1000, verbose=0, args=(omega[p], sigma[p])) - if xopt.success is False: - print('WARN: 2P-Fit Orb %d not converged, cost function %e'%(p,xopt.cost)) - coeff[:,p] = xopt.x.copy() - return coeff - -def thiele(fn,zn): - nfit = len(zn) - g = np.zeros((nfit,nfit),dtype=np.complex128) - g[:,0] = fn.copy() - for i in range(1,nfit): - g[i:,i] = (g[i-1,i-1]-g[i:,i-1])/((zn[i:]-zn[i-1])*g[i:,i-1]) - a = g.diagonal() - return a - -def pade_thiele(freqs,zn,coeff): - nfit = len(coeff) - X = coeff[-1]*(freqs-zn[-2]) - for i in range(nfit-1): - idx = nfit-i-1 - X = coeff[idx]*(freqs-zn[idx-1])/(1.+X) - X = coeff[0]/(1.+X) - return X - -def AC_pade_thiele_diag(sigma, omega): + frozen_mask = get_frozen_mask(gw) + nmo = gw.nmo + nkpts = gw.nkpts + mo_energy_frozen = np.zeros(shape=[nkpts, nmo], dtype=np.double) + for k in range(nkpts): + mo_energy_frozen[k] = mo_energy[k][frozen_mask[k]] + return mo_energy_frozen + + +def _mo_frozen(gw, mo): + """Get non-frozen orbital coefficient. + + Parameters + ---------- + gw : KRGWAC + GW object, provides attributes: frozen, nmo, nkpt + mo : complex ndarray + full orbital coefficient + + Returns + ------- + mo_frozen : complex ndarray + non-frozen orbital coefficient """ - Analytic continuation to real axis using a Pade approximation - from Thiele's reciprocal difference method - Reference: J. Low Temp. Phys. 29, 179 (1977) - Returns: - coeff: 2D array (ncoeff, norbs) - omega: 2D array (norbs, npade) + frozen_mask = get_frozen_mask(gw) + nmo = gw.nmo + nkpts = gw.nkpts + nao = mo[0].shape[0] + mo_frozen = np.zeros(shape=[nkpts, nao, nmo], dtype=np.complex128) + for k in range(nkpts): + mo_frozen[k] = mo[k][:, frozen_mask[k]] + return mo_frozen + + +def _mo_occ_frozen(gw, mo_occ): + """Get non-frozen occupation number. + + Parameters + ---------- + gw : KRGWAC + GW object, provides attributes: frozen, nmo, nkpt + mo_occ : double ndarray + full occupation number + + Returns + ------- + mo_occ_frozen : double ndarray + non-frozen occupation number """ - idx = range(1,40,6) - sigma1 = sigma[:,idx].copy() - sigma2 = sigma[:,(idx[-1]+4)::4].copy() - sigma = np.hstack((sigma1,sigma2)) - omega1 = omega[:,idx].copy() - omega2 = omega[:,(idx[-1]+4)::4].copy() - omega = np.hstack((omega1,omega2)) - norbs, nw = sigma.shape - npade = nw // 2 - coeff = np.zeros((npade*2,norbs),dtype=np.complex128) - for p in range(norbs): - coeff[:,p] = thiele(sigma[p,:npade*2], omega[p,:npade*2]) - - return coeff, omega[:,:npade*2] + frozen_mask = get_frozen_mask(gw) + nmo = gw.nmo + nkpts = gw.nkpts + mo_occ_frozen = np.zeros(shape=[nkpts, nmo], dtype=np.double) + for k in range(nkpts): + mo_occ_frozen[k] = mo_occ[k][frozen_mask[k]] + return mo_occ_frozen -class KRGWAC(lib.StreamObject): - linearized = getattr(__config__, 'gw_gw_GW_linearized', False) - # Analytic continuation: pade or twopole - ac = getattr(__config__, 'gw_gw_GW_ac', 'pade') - # Whether applying finite size corrections - fc = getattr(__config__, 'gw_gw_GW_fc', True) +def set_frozen_orbs(gw): + """Set .frozen attribute from frozen mask. + + Parameters + ---------- + gw : KRGWAC + unrestricted GW object + """ + if gw.frozen is not None: + if gw.orbs is not None: + if isinstance(gw.frozen, (int, np.int64)): + # frozen core + gw.orbs_frz = [x - gw.frozen for x in gw.orbs] + else: + # frozen list + assert isinstance(gw.frozen[0], (int, np.int64)) + gw.orbs_frz = [] + for orbi in gw.orbs: + count = len([p for p in gw.frozen if p <= orbi]) + gw.orbs_frz.append(orbi - count) + if any(np.array(gw.orbs_frz) < 0): + raise RuntimeError('GW orbs must be larger than frozen core!') + else: + gw.orbs_frz = range(gw.nmo) + gw.orbs = range(len(gw._scf.mo_energy[0])) + if isinstance(gw.frozen, (int, np.int64)): + gw.orbs = list(set(gw.orbs) - set(range(gw.frozen))) + else: + assert isinstance(gw.frozen[0], (int, np.int64)) + gw.orbs = list(set(gw.orbs) - set(gw.frozen)) + else: + if gw.orbs is None: + gw.orbs = range(len(gw._scf.mo_energy[0])) + gw.orbs_frz = gw.orbs + return - _keys = { - 'linearized', 'ac', 'fc', 'frozen', 'mol', 'with_df', - 'kpts', 'nkpts', 'mo_energy', 'mo_coeff', 'mo_occ', 'sigma', - } +class KRGWAC(lib.StreamObject): def __init__(self, mf, frozen=None): - self.mol = mf.mol - self._scf = mf - self.verbose = self.mol.verbose - self.stdout = self.mol.stdout - self.max_memory = mf.max_memory - - #TODO: implement frozen orbs - if frozen is not None and frozen > 0: - raise NotImplementedError - self.frozen = frozen + self.mol = mf.mol # mol object + self._scf = mf # mean-field object + self.verbose = self.mol.verbose # verbose level + self.stdout = self.mol.stdout # standard output + self.max_memory = mf.max_memory # max memory in MB + + # options + self.frozen = frozen # frozen orbital option + self.orbs = None # list of orbital index in full nmo + self.orbs_frz = None # list of orbital index in non-frozen nmo + self.kptlist = None # list of k-points to evaluate + self.fullsigma = False # calculate off-diagonal self-energy + self.rdm = False # calculate GW density matrix + self.vhf_df = False # use density-fitting for exchange self-energy + self.fc = True # finite-size correction to self-energy + self.fc_grid = False # grids for finite-size correction to self-energy + self.outcore = False # low-memory routine to calculate self-energy + self.eta = 5.0e-3 # broadening parameter + self.nw = 100 # number of grids for integration + self.ac = 'pade' # analytical continuation method + self.ac_iw_cutoff = 5.0 # imaginary frequency cutting for fitting self-energy + self.ac_pade_npts = 18 # number of selected points for Pade approximation + self.ac_pade_step_ratio = 2.0 / 3.0 # final/initial step size for Pade approximation + self.qpe_max_iter = 100 # max iteration in iteratively solving quasiparticle equation + self.qpe_tol = 1.0e-6 # tolerance in Newton method for iteratively quasiparticle equation + self.qpe_linearized = False # use linearized quasiparticle equation + self.qpe_linearized_range = [0.5, 1.5] # Z-shot factor range, if not in this range, z=1 + self.writefile = 0 # write file level # DF-KGW must use GDF integrals if getattr(mf, 'with_df', None): self.with_df = mf.with_df else: raise NotImplementedError - -################################################## -# don't modify the following attributes, they are not input options - self._nocc = None - self._nmo = None - self.kpts = mf.kpts - self.nkpts = len(self.kpts) - # self.mo_energy: GW quasiparticle energy, not scf mo_energy - self.mo_energy = None - self.mo_coeff = mf.mo_coeff - self.mo_occ = mf.mo_occ - self.sigma = None - - def dump_flags(self): + self._keys.update(['with_df']) + + ################################################## + # don't modify the following attributes, they are not input options + self._nocc = None # number of NON-FROZEN occupied orbitals + self._nmo = None # number of NON-FROZEN orbitals + self.kpts = mf.kpts # k-point list + self.nkpts = len(self.kpts) # number of k-points + self.mo_energy = None # orbital energy + self.mo_coeff = None # orbital coefficient + self.mo_occ = None # occupiation number + + # results + self.vk = None # exchange matrix in MO + self.vxc = None # mean-field exchange-correlation matrix in MO + self.freqs = None # frequency grids + self.wts = None # weights of frequency grids + self.ef = None # Fermi level + self.acobj = None # analytical continuation object + self.ac_coeff = None # Pade fitting coefficient, old interface, to be deprecated + self.omega_fit = None # AC fitting frequency, old interface, to be deprecated + self.sigmaI = None # self-energy in the imaginary axis + + return + + def dump_flags(self, verbose=None): log = logger.Logger(self.stdout, self.verbose) log.info('') log.info('******** %s ********', self.__class__) @@ -582,59 +1366,98 @@ def dump_flags(self): nkpts = self.nkpts log.info('GW nocc = %d, nvir = %d, nkpts = %d', nocc, nvir, nkpts) if self.frozen is not None: - log.info('frozen orbitals %s', str(self.frozen)) - logger.info(self, 'use perturbative linearized QP eqn = %s', self.linearized) - logger.info(self, 'analytic continuation method = %s', self.ac) - logger.info(self, 'GW finite size corrections = %s', self.fc) - return self + log.info('frozen orbitals = %s', str(self.frozen)) + if self.kptlist is not None: + log.info('k-point list = %s', str(self.kptlist)) + if self.orbs is not None: + log.info('orbital list = %s', str(self.orbs)) + log.info('off-diagonal self-energy = %s', self.fullsigma) + log.info('GW density matrix = %s', self.rdm) + log.info('density-fitting for exchange = %s', self.vhf_df) + log.info('outcore for self-energy= %s', self.outcore) + log.info('finite size corrections = %s', self.fc) + if self.fc_grid is not None: + log.info('grids for finite size corrections = %s', self.fc_grid) + log.info('broadening parameter = %.3e', self.eta) + log.info('number of grids = %d', self.nw) + log.info('analytic continuation method = %s', self.ac) + log.info('imaginary frequency cutoff = %s', str(self.ac_iw_cutoff)) + if self.ac == 'pade': + log.info('Pade points = %d', self.ac_pade_npts) + log.info('Pade step ratio = %.3f', self.ac_pade_step_ratio) + log.info('use perturbative linearized QP eqn = %s', self.qpe_linearized) + if self.qpe_linearized is True: + log.info('linearized factor range = %s', self.qpe_linearized_range) + else: + log.info('QPE max iter = %d', self.qpe_max_iter) + log.info('QPE tolerance = %.1e', self.qpe_tol) + log.info('') + return @property def nocc(self): - return self.get_nocc() + frozen_mask = get_frozen_mask(self) + nkpts = len(self._scf.mo_energy) + nelec = 0.0 + for k in range(nkpts): + nelec += np.sum(self._scf.mo_occ[k][frozen_mask[k]]) + nelec = int(nelec / nkpts) + return nelec // 2 + @nocc.setter def nocc(self, n): self._nocc = n @property def nmo(self): - return self.get_nmo() + frozen_mask = get_frozen_mask(self) + return len(self._scf.mo_energy[0][frozen_mask[0]]) + @nmo.setter def nmo(self, n): self._nmo = n - get_nocc = get_nocc - get_nmo = get_nmo - get_frozen_mask = get_frozen_mask + def kernel(self, orbs=None, kptlist=None): + """Run a G0W0 calculation. - def kernel(self, mo_energy=None, mo_coeff=None, orbs=None, kptlist=None, nw=100): + Parameters + ---------- + orbs : list, optional + orbital list to calculate self-energy, by default None + kptlist : list, optional + k-point list to calculate self-energy, by default None """ - Input: - kptlist: self-energy k-points - orbs: self-energy orbs - nw: grid number - Output: - mo_energy: GW quasiparticle energy - """ - if mo_coeff is None: - mo_coeff = np.array(self._scf.mo_coeff) - if mo_energy is None: - mo_energy = np.array(self._scf.mo_energy) + if self.mo_energy is None: + self.mo_energy = np.array(self._scf.mo_energy, copy=True) + if self.mo_coeff is None: + self.mo_coeff = np.array(self._scf.mo_coeff, copy=True) + if self.mo_occ is None: + self.mo_occ = np.array(self._scf.mo_occ, copy=True) + + self.orbs = orbs + self.kptlist = kptlist + + if hasattr(self._scf, "sigma"): + self.nw = max(400, self.nw) + self.ac_pade_npts = 18 + self.ac_pade_step_ratio = 5.0 / 6.0 + self.fc = False nmo = self.nmo naux = self.with_df.get_naoaux() nkpts = self.nkpts - mem_incore = (2*nkpts*nmo**2*naux) * 16/1e6 + mem_incore = (2 * nkpts * nmo**2 * naux) * 16 / 1e6 mem_now = lib.current_memory()[0] - if (mem_incore + mem_now > 0.99*self.max_memory): + if mem_incore + mem_now > 0.99 * self.max_memory: logger.warn(self, 'Memory may not be enough!') - raise NotImplementedError - cput0 = (logger.process_clock(), logger.perf_counter()) + cput0 = (time.process_time(), time.perf_counter()) self.dump_flags() - self.converged, self.mo_energy, self.mo_coeff = \ - kernel(self, mo_energy, mo_coeff, orbs=orbs, - kptlist=kptlist, nw=nw, verbose=self.verbose) - - logger.warn(self, 'GW QP energies may not be sorted from min to max') - logger.timer(self, 'GW', *cput0) - return self.mo_energy + kernel(self) + logger.timer(self, 'KRGWAC', *cput0) + return + + set_frozen_orbs = set_frozen_orbs + make_rdm1 = make_rdm1_linear + make_gf = make_gf + get_sigma_exchange = get_sigma_exchange diff --git a/pyscf/pbc/gw/krpa.py b/pyscf/pbc/gw/krpa.py new file mode 100644 index 0000000000..6add9be745 --- /dev/null +++ b/pyscf/pbc/gw/krpa.py @@ -0,0 +1,1045 @@ +#!/usr/bin/env python +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Tianyu Zhu +# Author: Christopher Hillenbrand +# Author: Chaoqun Zhang +# Author: Jincheng Yu +# Author: Jiachen Li +# + +""" +Periodic spin-restricted random phase approximation (direct RPA) with N^4 scaling. + +References: + T. Zhu and G.K.-L. Chan, J. Chem. Theory. Comput. 17, 727-741 (2021) + New J. Phys. 14, 053020 (2012) +""" + +import numpy as np +import scipy.linalg.blas as blas +import time + +from pyscf import lib +from pyscf.lib import logger, temporary_env +from pyscf.ao2mo._ao2mo import r_e2 +from pyscf.ao2mo.incore import _conc_mos +from pyscf.pbc import scf, tools +from pyscf.pbc.mp.kmp2 import get_nocc, get_nmo, get_frozen_mask + +from pyscf.gw.utils.ac_grid import _get_scaled_legendre_roots +from pyscf.pbc.gw.krgw_ac import get_rho_response, get_rho_response_head, get_rho_response_wing, get_qij, \ + _mo_occ_frozen, _mo_energy_frozen, _mo_frozen + + +einsum = lib.einsum + + +def kernel(rpa, mo_energy, mo_coeff, nw=None, with_e_hf=None): + """RPA correlation and total energy + + Parameters + ---------- + rpa : KRPA + rpa object + mo_energy : double array + molecular orbital energies + mo_coeff : double ndarray + molecular orbital coefficients + Lpq : double array, optional + density fitting 3-center integral in MO basis, by default None + nw : int, optional + number of frequency point on imaginary axis, by default None + with_e_hf : float, optional + extra input HF energy, by default None + + Returns + ------- + e_tot : float + RPA total energy + e_hf : float + HF energy (exact exchange for given mo_coeff) + e_corr : float + RPA correlation energy + """ + # Compute HF exchange energy (EXX) + if with_e_hf is None: + rhf = scf.KRHF(rpa.mol, rpa.kpts, exxdiv=rpa._scf.exxdiv) + rhf.verbose = 0 + if hasattr(rpa._scf, 'sigma'): + rhf = scf.addons.smearing_(rhf, sigma=rpa._scf.sigma, method=rpa._scf.smearing_method) + rhf.with_df = rpa._scf.with_df + with temporary_env(rpa.with_df, verbose=0), temporary_env(rhf.mol, verbose=0): + dm = rpa._scf.make_rdm1() + e_1e = 1.0 / len(rpa.kpts) * lib.einsum('kij,kji', dm, rhf.get_hcore()).real + e_j = 0.5 / len(rpa.kpts) * lib.einsum('kij,kji', dm, rhf.get_j(rhf.cell, dm)).real + e_x = get_rpa_exx(rpa, acfd=rpa.acfd_exx, correction_only=False) + e_nuc = rpa._scf.energy_nuc() + e_hf = e_1e + e_j + e_x + e_nuc + else: + e_hf = with_e_hf + logger.debug(rpa, f' Setting EXX energy explicitly to {e_hf}') + + is_metal = hasattr(rpa._scf, 'sigma') + + # Turn off FC for metals + if is_metal and rpa.fc: + logger.warn(rpa, 'FC not available for metals - setting rpa.fc to False') + rpa.fc = False + + # Grids for integration on imaginary axis + freqs, wts = rpa.get_grids(nw=nw, mo_energy=mo_energy) + + # Compute RPA correlation energy + if rpa.outcore: + if is_metal: + e_corr = get_rpa_ecorr_outcore_metal(rpa, freqs, wts) + else: + e_corr = get_rpa_ecorr_outcore(rpa, freqs, wts) + else: + e_corr = get_rpa_ecorr(rpa, freqs, wts) + + # Compute total energy + e_tot = e_hf + e_corr + + logger.debug(rpa, f' RPA total energy = {e_tot}') + logger.debug(rpa, f' EXX energy = {e_hf}, RPA corr energy = {e_corr}') + + return e_tot, e_hf, e_corr + + +def get_idx_metal(mo_occ, threshold=1.0e-6): + """Get index of occupied/virtual/fractional orbitals of metals. + + Parameters + ---------- + mo_occ : double 1d array + occupation number + threshold : double, optional + threshold to determine fractionally occupied orbitals, by default 1.0e-6 + + Returns + ------- + idx_occ : list + list of occupied orbital indexes + idx_frac : list + list of fractionally occupied orbital indexes + idx_vir : list + list of virtual orbital indexes + """ + idx_occ = np.where(mo_occ > 2.0 - threshold)[0] + idx_vir = np.where(mo_occ < threshold)[0] + idx_frac = list(range(idx_occ[-1] + 1, idx_vir[0])) + + return idx_occ, idx_frac, idx_vir + + +def get_rho_response_metal(omega, mo_energy, mo_occ, Lpq, kidx): + """Get Pi=PV for metallic systems. + P is density-density response function. + V is two-electron integral. + See equation 24 in doi.org/10.1021/acs.jctc.0c00704. + + NOTE: this function is different from the one in krgw_ac.py. + They should be merged in the future. The metal version here + is more efficient both in memory and computational time. + + Parameters + ---------- + omega : double + real position of imaginary frequency + mo_energy : double ndarray + orbital energy + mo_occ : double ndarray + occupation number + Lpq : list of complex ndarray + three-center density-fitting matrix in MO. + Lpq[ki] contains the naux x (nocc_i + nfrac_i) x (nfrac_i + nvir_i) sub-block. + kidx : list + momentum-conserved k-point list kj=kidx[ki] + + Returns + ------- + Pi : complex ndarray + Pi in auxiliary basis at freq iw + """ + nkpts = len(Lpq) + naux = Lpq[0].shape[0] + + # Compute Pi for kL + Pi = np.zeros(shape=[naux, naux], dtype=np.complex128) + for i in range(nkpts): + # Find ka that conserves with ki and kL (-ki+ka+kL=G) + a = kidx[i] + idx_occ_i, _, idx_vir_i = get_idx_metal(mo_occ[i]) + idx_occ_a, idx_frac_a, idx_vir_a = get_idx_metal(mo_occ[a]) + + # merge index + idx_i = slice(idx_occ_i[0], idx_vir_i[0]) + idx_a = slice(idx_occ_a[-1] + 1, idx_vir_a[-1] + 1) + nocc_i = len(idx_occ_i) + nfrac_a = len(idx_frac_a) + + eia = mo_energy[i, idx_i, None] - mo_energy[a, None, idx_a] + fia = (mo_occ[i][idx_i, None] - mo_occ[a][None, idx_a]) / 2.0 + + # factor of 0.5 is for double counting + fia[nocc_i:, :nfrac_a] *= 0.5 + # Response from both spin-up and spin-down density + rho_accum_inner(Pi, eia, omega, Lpq[i], alpha=4.0 / nkpts, fia=fia) + + return Pi + + +def rho_accum_inner(Pi, eia, omega, Lov, alpha=0.0, fia=None): + """Get contribution to response function from current occupied-virtual block. + + Parameters + ---------- + Pi : complex 2d array + density-density response function, will be overwritten + eia : double 2d array + occupied-virtual orbital energy difference + omega : double + real position of imaginary frequency + Lov : complex 3d array + occupied-virtual block of three-center density-fitting matrix in MO + alpha : float, optional + prefactor, by default 0.0 + fia : double 2d array, optional + occupied-virtual occupation number difference, by default None + """ + naux, nocc, nvir = Lov.shape + + if fia is None: + eia = eia / (omega**2 + eia**2) + else: + eia = eia * fia / (omega**2 + eia**2) + Pia = (Lov * eia).reshape(naux, nocc * nvir) + + # The following call to blas.zgemm may be replaced with + # Pi += alpha * np.einsum('Pia, Qia -> PQ', Pia, Lov.conj(), optimize=True) + # with a moderate performance hit. + + # zgemm is complex matrix multiplication. A wrapper is included in SciPy. + # C <- alpha * op(A) @ op(B) + beta * C + blas.zgemm( + alpha=alpha, + a=Lov.reshape(naux, nocc * nvir).T, + b=Pia.T, + trans_a=2, # take conjugate transpose of A (this gives Lov.conj()) + trans_b=0, # B is Pia.T + beta=1.0, + c=Pi.T, # Pi.T += alpha * Lov.conj() @ Pia.T + overwrite_c=True, + ) + + return + + +def rho_wing_accum_inner(Pi_P0, eia, omega, Lov, qov, alpha=0.0): + """Accumulate the finite-size-correction wing response for one OV slice. + + Parameters + ---------- + Pi_P0 : complex 1d array + finite-size correction to density-density response function, will be overwritten + eia : double 2d array + occupied-virtual orbital energy difference + omega : double + frequency + Lov : complex 3d array + occupied-virtual block of three-center density-fitting matrix in MO + qov : complex 2d array + virtual-occupied correction + alpha : float, optional + prefactor, by default 0.0 + """ + naux, nocc, nvir = Lov.shape + eia_q = eia * qov.conj() / (omega**2 + eia**2) + Pi_P0 += alpha * np.matmul(Lov.reshape(naux, nocc * nvir), eia_q.reshape(nocc * nvir)) + + return + + +def get_rpa_ecorr(rpa, freqs, wts): + """Compute RPA correlation energy. + + Parameters + ---------- + rpa : KRPA + rpa object + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + + Returns + ------- + e_corr : double + correlation energy + """ + mo_coeff = np.array(_mo_frozen(rpa, rpa._scf.mo_coeff)) + mo_energy = np.array(_mo_energy_frozen(rpa, rpa._scf.mo_energy)) + mo_occ = np.array(_mo_occ_frozen(rpa, rpa._scf.mo_occ)) + + nocc = rpa.nocc + nmo = rpa.nmo + nvir = nmo - nocc + nao = rpa._scf.mo_coeff[0].shape[0] + nkpts = rpa.nkpts + kpts = rpa.kpts + nw = len(freqs) + mydf = rpa.with_df + + # possible kpts shift center + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + is_metal = hasattr(rpa._scf, 'sigma') + + if rpa.fc: + qij, q_abs, nq_pts = rpa.get_q_mesh(mo_energy, mo_coeff) + + e_corr = 0j + + # Precompute k-conservation table + # Given k-point indices (kL, i), kconserv_table[kshift,i] contains + # the index j that satisfies momentum conservation, + # (k(i) - k(j) - k(kL)) \dot a = 2n\pi + # i.e. + # - ki + kj + kL = G + kconserv_table = get_kconserv_ria_efficient(rpa.mol, kpts) + cderiarr = mydf.cderi_array() + + for kL in range(nkpts): + # Lij: (ki, L, i, j) for looping every kL + if is_metal: + Lij = [] + else: + Lij = None + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + # kidx = np.zeros((nkpts),dtype=np.int64) + # kidx_r = np.zeros((nkpts),dtype=np.int64) + for i, kpti in enumerate(kpts): + j = kconserv_table[kL, i] + kptj = kpts[j] + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + assert np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 # kidx[i] = j + # kidx_r[j] = i + logger.debug(rpa, f'Read Lpq (kL: {kL+1} / {nkpts}, ki: {i}, kj: {j})') + # Read (L|pq) and ao2mo transform to (L|ij) + # support unequal naux on different k points + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nao * (nao + 1)) // 2: + Lpq = lib.unpack_tril(Lpq).reshape(-1, nao**2) + else: + Lpq = Lpq.reshape(-1, nao**2) + Lpq = Lpq.astype(np.complex128) + tao = [] + ao_loc = None + moij, ijslice = _conc_mos(mo_coeff[i], mo_coeff[j])[2:] + + naux = Lpq.shape[0] + if not is_metal: + if Lij is None: + Lij = np.zeros((nkpts, naux, nocc, nvir), dtype=np.complex128) + ijslice = (0, nocc, nmo + nocc, 2 * nmo) + r_e2(Lpq, moij, ijslice, tao, ao_loc, out=Lij[i]) + else: + # Only (nocc+nfrac, nfrac+nvir) block of Lpq is needed + # This is consistent with the new get_rho_response_metal implementation + idx_occ_i, idx_frac_i, idx_vir_i = get_idx_metal(mo_occ[i]) + idx_occ_j, idx_frac_j, idx_vir_j = get_idx_metal(mo_occ[j]) + + nocc_i = len(idx_occ_i) + nfrac_i = len(idx_frac_i) + nocc_j = len(idx_occ_j) + nfrac_j = len(idx_frac_j) + nvir_j = len(idx_vir_j) + ijslice = (0, nocc_i + nfrac_i, nmo + nocc_j, 2 * nmo) + + Lij.append(r_e2(Lpq, moij, ijslice, tao, ao_loc).reshape(naux, nocc_i + nfrac_i, nfrac_j + nvir_j)) + + for w in range(nw): + if is_metal: + Pi = get_rho_response_metal(freqs[w], mo_energy, mo_occ, Lij, kconserv_table[kL]) + else: + Pi = get_rho_response(freqs[w], mo_energy, Lij, kconserv_table[kL]) + if kL == 0 and rpa.fc: + for iq in range(nq_pts): + # head Pi_00 + Pi_00 = get_rho_response_head(freqs[w], mo_energy, qij[iq]) + Pi_00 = 4.0 * np.pi / np.linalg.norm(q_abs[iq]) ** 2 * Pi_00 + # wings Pi_P0 + Pi_P0 = get_rho_response_wing(freqs[w], mo_energy, Lij, qij[iq]) + Pi_P0 = np.sqrt(4.0 * np.pi) / np.linalg.norm(q_abs[iq]) * Pi_P0 + + # assemble Pi + Pi_fc = np.zeros((naux + 1, naux + 1), dtype=Pi.dtype) + Pi_fc[0, 0] = Pi_00 + Pi_fc[0, 1:] = Pi_P0.conj() + Pi_fc[1:, 0] = Pi_P0 + Pi_fc[1:, 1:] = Pi + + e_corr += get_rpa_ecorr_w(Pi_fc, wts[w]) + else: + e_corr += get_rpa_ecorr_w(Pi, wts[w]) + + e_corr = e_corr.real + e_corr *= 1.0 / (2.0 * np.pi) / nkpts + return e_corr + + +def get_rpa_ecorr_outcore(rpa, freqs, wts): + """Low-memory routine to compute RPA correlation energy. + + Parameters + ---------- + rpa : KRPA + rpa object + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + + Returns + ------- + e_corr : double + correlation energy + """ + mo_coeff = np.array(_mo_frozen(rpa, rpa._scf.mo_coeff)) + mo_energy = np.array(_mo_energy_frozen(rpa, rpa._scf.mo_energy)) + + nocc = rpa.nocc + nmo = rpa.nmo + nao = rpa._scf.mo_coeff[0].shape[0] + nkpts = rpa.nkpts + kpts = rpa.kpts + nw = len(freqs) + mydf = rpa.with_df + + # possible kpts shift center + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + if rpa.fc: + qij, q_abs, nq_pts = rpa.get_q_mesh(mo_energy, mo_coeff) + + e_corr = 0j + + # Precompute k-conservation table + # Given k-point indices (kL, i), kconserv_table[kshift,i] contains + # the index j that satisfies momentum conservation, + # (k(i) - k(j) - k(kL)) \dot a = 2n\pi + # i.e. + # - ki + kj + kL = G + kconserv_table = get_kconserv_ria_efficient(rpa.mol, kpts) + cderiarr = mydf.cderi_array() + + for kL in range(nkpts): + Pi = None + Pi_P0 = None + nseg = nocc // rpa.segsize + 1 + for iseg in range(nseg): + orb_start = iseg * rpa.segsize + orb_end = min((iseg + 1) * rpa.segsize, nocc) + if orb_end == orb_start: + continue + norb_this_iter = orb_end - orb_start + + # Lij: (ki, L, i, j) for looping every kL + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + # kidx = np.zeros((nkpts),dtype=np.int64) + # kidx_r = np.zeros((nkpts),dtype=np.int64) + for i, kpti in enumerate(kpts): + j = kconserv_table[kL, i] + kptj = kpts[j] + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + assert np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 + logger.debug(rpa, f'Read Lpq (kL: {kL+1} / {nkpts}, ki: {i}, kj: {j})') + # Read (L|pq) and ao2mo transform to (L|ij) + # support uneqaul naux on different k points + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nao * (nao + 1)) // 2: + Lpq = lib.unpack_tril(Lpq).reshape(-1, nao**2) + else: + Lpq = Lpq.reshape(-1, nao**2) + Lpq = Lpq.astype(np.complex128) + naux = Lpq.shape[0] + + tao = [] + ao_loc = None + moij, ijslice = _conc_mos(mo_coeff[i], mo_coeff[j])[2:] + + ijslice = (orb_start, orb_end, nmo + nocc, 2 * nmo) + Lij_slice = r_e2(Lpq, moij, ijslice, tao, ao_loc) + Lij_slice = Lij_slice.reshape(naux, norb_this_iter, nmo - nocc) + if Pi is None: + Pi = np.zeros((nw, naux, naux), dtype=np.complex128) + if kL == 0 and rpa.fc: + Pi_P0 = np.zeros((nq_pts, nw, naux), dtype=np.complex128) + + # Find ka that conserves with ki and kL (-ki+ka+kL=G) + a_inner = kconserv_table[kL, i] + eia = mo_energy[i][orb_start:orb_end, None] - mo_energy[a_inner][None, nocc:] + for w in range(nw): + rho_accum_inner(Pi[w], eia, freqs[w], Lij_slice, alpha=4.0 / nkpts) + if kL == 0 and rpa.fc: + for iq in range(nq_pts): + rho_wing_accum_inner( + Pi_P0[iq, w], + eia, + freqs[w], + Lij_slice, + qij[iq, i, orb_start:orb_end], + alpha=4.0 / nkpts, + ) + + for w in range(nw): + if kL == 0 and rpa.fc: + for iq in range(nq_pts): + Pi_00 = get_rho_response_head(freqs[w], mo_energy, qij[iq]) + Pi_00 = 4.0 * np.pi / np.linalg.norm(q_abs[iq]) ** 2 * Pi_00 + Pi_P0_iq = np.sqrt(4.0 * np.pi) / np.linalg.norm(q_abs[iq]) * Pi_P0[iq, w] + + Pi_fc = np.zeros((naux + 1, naux + 1), dtype=Pi.dtype) + Pi_fc[0, 0] = Pi_00 + Pi_fc[0, 1:] = Pi_P0_iq.conj() + Pi_fc[1:, 0] = Pi_P0_iq + Pi_fc[1:, 1:] = Pi[w] + + e_corr += get_rpa_ecorr_w(Pi_fc, wts[w]) + else: + e_corr += get_rpa_ecorr_w(Pi[w], wts[w]) + + e_corr = e_corr.real + e_corr *= 1.0 / (2.0 * np.pi) / nkpts + return e_corr + + +def get_rpa_ecorr_outcore_metal(rpa, freqs, wts): + """Low-memory routine to compute RPA correlation energy for metals. + + Parameters + ---------- + rpa : KRPA + rpa object + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + + Returns + ------- + e_corr : double + correlation energy + """ + mo_coeff = np.array(_mo_frozen(rpa, rpa._scf.mo_coeff)) + mo_energy = np.array(_mo_energy_frozen(rpa, rpa._scf.mo_energy)) + mo_occ = np.array(_mo_occ_frozen(rpa, rpa._scf.mo_occ)) + + nmo = rpa.nmo + nao = rpa._scf.mo_coeff[0].shape[0] + nkpts = rpa.nkpts + kpts = rpa.kpts + nw = len(freqs) + mydf = rpa.with_df + + # possible kpts shift center + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + e_corr = 0j + + # Precompute k-conservation table + # Given k-point indices (kL, i), kconserv_table[kshift,i] contains + # the index j that satisfies momentum conservation, + # (k(i) - k(j) - k(kL)) \dot a = 2n\pi + # i.e. + # - ki + kj + kL = G + kconserv_table = get_kconserv_ria_efficient(rpa.mol, kpts) + cderiarr = mydf.cderi_array() + + for kL in range(nkpts): + Pi = None + # Lij: (ki, L, i, j) for looping every kL + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + # kidx = np.zeros((nkpts),dtype=np.int64) + # kidx_r = np.zeros((nkpts),dtype=np.int64) + for i, kpti in enumerate(kpts): + j = kconserv_table[kL, i] + kptj = kpts[j] + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + assert np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 + logger.debug(rpa, f'Read Lpq (kL: {kL+1} / {nkpts}, ki: {i}, kj: {j})') + # Read (L|pq) and ao2mo transform to (L|ij) + # support uneqaul naux on different k points + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nao * (nao + 1)) // 2: + Lpq = lib.unpack_tril(Lpq).reshape(-1, nao**2) + else: + Lpq = Lpq.reshape(-1, nao**2) + Lpq = Lpq.astype(np.complex128) + naux = Lpq.shape[0] + + idx_occ_i, idx_frac_i, idx_vir_i = get_idx_metal(mo_occ[i]) + idx_occ_j, idx_frac_j, idx_vir_j = get_idx_metal(mo_occ[j]) + + nocc_i = len(idx_occ_i) + nfrac_i = len(idx_frac_i) + nocc_j = len(idx_occ_j) + nfrac_j = len(idx_frac_j) + nseg = (nocc_i + nfrac_i) // rpa.segsize + 1 + for iseg in range(nseg): + orb_start = iseg * rpa.segsize + orb_end = min((iseg + 1) * rpa.segsize, nocc_i + nfrac_i) + if orb_end == orb_start: + break + norb_this_iter = orb_end - orb_start + + tao = [] + ao_loc = None + moij, ijslice = _conc_mos(mo_coeff[i], mo_coeff[j])[2:] + + ijslice = (orb_start, orb_end, nmo + nocc_j, 2 * nmo) + Lij_slice = r_e2(Lpq, moij, ijslice, tao, ao_loc) + Lij_slice = Lij_slice.reshape(naux, norb_this_iter, nmo - nocc_j) + if Pi is None: + Pi = np.zeros((nw, naux, naux), dtype=np.complex128) + + # Find ka that conserves with ki and kL (-ki+ka+kL=G) + eia = mo_energy[i][orb_start:orb_end, None] - mo_energy[j][None, nocc_j:] + fia = (mo_occ[i][orb_start:orb_end, None] - mo_occ[j][None, nocc_j:]) / 2.0 + # The overall fia[nocc_i:, :nfrac_j] *= 0.5 for double counting + if orb_start >= nocc_i: + fia[:, :nfrac_j] *= 0.5 + elif orb_end > nocc_i: + offset = nocc_i - orb_start + fia[offset:, :nfrac_j] *= 0.5 + for w in range(nw): + rho_accum_inner(Pi[w], eia, freqs[w], Lij_slice, alpha=4.0 / nkpts, fia=fia) + + for w in range(nw): + e_corr += get_rpa_ecorr_w(Pi[w], wts[w]) + + e_corr = e_corr.real + e_corr *= 1.0 / (2.0 * np.pi) / nkpts + return e_corr + + +def get_rpa_ecorr_w(Pi_w, wts_w): + """Get contribution to RPA correlation energy from a single frequency. + + Parameters + ---------- + Pi_w : complex 2d array + density-density response function at a single frequency + wts_w : double + weights of the frequency + + Returns + ------- + e_corr : double + correlation energy + """ + # First, compute ec_w = Tr(Pi_w) + |log(det(I-Pi_w))| + ec_w = np.trace(Pi_w) + # The following two lines are equivalent to + # Pi_w = np.eye(naux) - Pi_w + blas.zdscal(-1.0, Pi_w.ravel(), overwrite_x=1) + np.fill_diagonal(Pi_w, np.diagonal(Pi_w) + 1.0) + + ec_w += np.linalg.slogdet(Pi_w)[1] + #e_corr = 1.0 / (2.0 * np.pi) / nkpts * ec_w * wts_w + e_corr = ec_w * wts_w + + return e_corr + + +def get_rpa_exx(rpa, acfd=False, correction_only=False): + """Calculate RPA exchange energy. + For gapped systems, Hartree-Fock and adiabatic connection fluctuation dissipation exchange energies are the same. + For metallic systems, they are different. + The ACFD exchange energy is given by equation 12 in doi.org/10.1103/PhysRevB.81.115126 + + Parameters + ---------- + rpa : KRPA + rpa object + acfd : bool, optional + calculate ACFD exchange energy, by default False + correction_only : bool, optional + only calculate the correction term, by default False + + Returns + ------- + ex : double + exchange energy + """ + mo_energy = np.array(_mo_energy_frozen(rpa, rpa._scf.mo_energy)) + mo_coeff = np.array(_mo_frozen(rpa, rpa._scf.mo_coeff)) + mo_occ = np.array(_mo_occ_frozen(rpa, rpa._scf.mo_occ)) + + nocc = rpa.nocc + nao = rpa._scf.mo_coeff[0].shape[0] + nkpts = rpa.nkpts + kpts = rpa.kpts + mydf = rpa.with_df + + # possible kpts shift center + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + ex = 0j + cderiarr = mydf.cderi_array() + for kL in range(nkpts): + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + kidx = np.zeros(shape=[nkpts], dtype=np.int64) + kidx_r = np.zeros(shape=[nkpts], dtype=np.int64) + for i in range(nkpts): + for j in range(nkpts): + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + is_kconserv = np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 + if is_kconserv: + kidx[i] = j + kidx_r[j] = i + + for kn in range(nkpts): + # Find km that conserves with kn and kL (-km+kn+kL=G) + km = kidx_r[kn] + + # logger.debug(gw, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s @ Rank %d)' % (kL + 1, nkpts, i, j, rank)) + # Read (L|pq) and ao2mo transform to (L|ij) + # support unequal naux on different k points + Lpq_ao = cderiarr.load(kpts[km], kpts[kn]) + if Lpq_ao.shape[-1] == (nao * (nao + 1)) // 2: + Lpq_ao = lib.unpack_tril(Lpq_ao).reshape(-1, nao**2) + else: + Lpq_ao = Lpq_ao.reshape(-1, nao**2) + Lpq_ao = Lpq_ao.astype(np.complex128) + + Lij = None + if hasattr(rpa._scf, 'sigma'): + idx_occ_i, idx_frac_i, _ = get_idx_metal(mo_occ[km]) + idx_occ_j, idx_frac_j, _ = get_idx_metal(mo_occ[kn]) + nocc_i = len(idx_occ_i) + len(idx_frac_i) + nocc_j = len(idx_occ_j) + len(idx_frac_j) + moij, ijslice = _conc_mos(mo_coeff[km][:, :nocc_i], mo_coeff[kn][:, :nocc_j])[2:] + Lij = r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lij) + Lij = Lij.reshape(-1, nocc_i, nocc_j) + + if acfd is True: + if correction_only is True: + mo_occ_ij = np.minimum(mo_occ[km][:nocc_i, None], mo_occ[kn][None, :nocc_j]) / 2.0 + mo_occ_ij -= mo_occ[km][:nocc_i, None] * mo_occ[kn][None, :nocc_j] / 4.0 + else: + # numerical integration for equation 12 in doi.org/10.1103/PhysRevB.81.115126 + # NOTE: this integration is not stable!!! + # w, wts = _get_scaled_legendre_roots(200) + #eij = mo_energy[km][:nocc_i, None] - mo_energy[kn][None, :nocc_j] + ##integrad = eij[:, :, None] / lib.direct_sum("ij+w->ijw", eij**2, w**2) * wts[None, None] + #integrand = eij[:, :, None] / (eij[:, :, None]**2 + w**2) * wts[None, None] + #integrand = np.sum(integrand, axis=2) * 2.0 / np.pi + + # The following line is equivalent to the frequency integration in equation 12 in + # doi.org/10.1103/PhysRevB.81.115126 + # TODO: add a detailed note + eij = mo_energy[km][:nocc_i, None] - mo_energy[kn][None, :nocc_j] + integrand = np.zeros((nocc_i, nocc_j), dtype=np.complex128) + integrand[eij > 1e-6] = 1 + integrand[eij < -1e-6] = -1 + mo_occ_ij = 1.0 - integrand + # spin-restricted mo_occ should be divided by 2 + mo_occ_ij = mo_occ_ij * mo_occ[km][:nocc_i, None] / 2.0 + else: + mo_occ_ij = mo_occ[km][:nocc_i, None] * mo_occ[kn][None, :nocc_j] / 4.0 + Lij_occ = Lij * mo_occ_ij[None] + # ex -= np.einsum('Lij,Lij->', Lij_occ.reshape(-1, nocc, nocc), Lij.reshape(-1, nocc, nocc).conj()) + ex -= blas.zdotc(Lij_occ.ravel(), Lij.ravel()) + else: + moij, ijslice = _conc_mos(mo_coeff[km][:, :nocc], mo_coeff[kn][:, :nocc])[2:] + Lij = r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lij) + # ex -= np.einsum('Lij,Lij->', Lij.reshape(-1, nocc, nocc), Lij.reshape(-1, nocc, nocc).conj()) + ex -= blas.zdotc(Lij.ravel(), Lij.ravel()) + + ex = ex.real + ex /= nkpts**2 + + if rpa._scf.exxdiv == 'ewald' and rpa._scf.cell.dimension != 0: + madelung = tools.pbc.madelung(rpa._scf.cell, kpts) + exxdiv_shift = madelung * np.sum(mo_occ**2) / (4.0 * nkpts) + ex -= exxdiv_shift + if acfd is True: + for k in range(nkpts): + idx_occ, idx_frac, _ = get_idx_metal(mo_occ[k]) + f_i = mo_occ[k][:(len(idx_occ) + len(idx_frac))] / 2.0 + ex -= madelung * np.sum(f_i - f_i * f_i) / nkpts + + return ex + + +def get_kconserv_ria_efficient(cell, kpts, tol=1e-12): + r"""Get the momentum conservation array for single excitation amplitudes + for a set of k-points with appropriate k-shift. + + + Given k-point indices (kshift, m) the array kconserv[kshift,m] returns + the index n that satisfies momentum conservation, + + (k(m) - k(n) - k(kshift)) \dot a = 2n\pi + """ + nkpts = kpts.shape[0] + a = cell.lattice_vectors() / (2 * np.pi) + + kconserv = np.zeros((nkpts, nkpts), dtype=int) + kvKM = -kpts[:, None, :] + kpts[:, :] + for N, kvN in enumerate(kpts): + kvKMN = np.einsum('wx,kmx->wkm', a, kvKM - kvN, optimize=True) + # check whether (1/(2pi) k_{KLN} dot a) is an integer + kvKMN_int = np.rint(kvKMN) + mask = np.einsum('wkm->km', abs(kvKMN - kvKMN_int), optimize=True) < tol + kconserv[mask] = N + return kconserv + + +class KRPA(lib.StreamObject): + def __init__(self, mf, frozen=None): + self.mol = mf.mol # mol object + self._scf = mf # mean-field object + self.verbose = self.mol.verbose # verbose level + self.stdout = self.mol.stdout # standard output + self.max_memory = mf.max_memory # max memory in MB + + # options + self.frozen = frozen # frozen orbital options + self.grids_alg = 'legendre' # algorithm to generate grids + self.outcore = False # low-memory routine + self.segsize = 50 # number of orbitals in one segment for outcore + self.fc = False # finite-size correction + self.fc_grid = False # grids for finite-size correction + self.acfd_exx = False # calculate ACFD exchange energy + + # don't modify the following attributes, they are not input options + self._nocc = None # number of occupied orbitals + self._nmo = None # number of orbitals (exclude frozen orbitals) + self.kpts = mf.kpts # k-points + self.nkpts = len(self.kpts) # number of k-points + self.mo_energy = np.array(mf.mo_energy, copy=True) # orbital energy + self.mo_coeff = np.array(mf.mo_coeff, copy=True) # orbital coefficient + self.mo_occ = np.array(mf.mo_occ, copy=True) # occupation number + self.e_corr = None # correlation energy + self.e_hf = None # Hartree-Fock energy + self.e_tot = None # total energy + + # KRPA must use GDF integrals + if getattr(mf, 'with_df', None): + self.with_df = mf.with_df + else: + raise NotImplementedError + self._keys.update(['with_df']) + + return + + def dump_flags(self, verbose=None): + log = logger.Logger(self.stdout, self.verbose) + log.info('') + log.info('******** %s ********', self.__class__) + log.info('method = %s', self.__class__.__name__) + nocc = self.nocc + nvir = self.nmo - nocc + nkpts = self.nkpts + log.info(f'RPA nocc = {nocc}, nvir = {nvir}, nkpts = {nkpts}') + if self.frozen is not None: + log.info(f'frozen orbitals = {str(self.frozen)}') + log.info('grid type = %s', self.grids_alg) + log.info('outcore mode = %s', self.outcore) + if self.outcore is True: + log.info('outcore segment size = %d', self.segsize) + log.info('RPA finite size corrections = %s', self.fc) + log.info('ACFD exchange energy = %s', self.acfd_exx) + log.info('') + return + + @property + def nocc(self): + frozen_mask = get_frozen_mask(self) + nkpts = len(self._scf.mo_energy) + nelec = 0.0 + for k in range(nkpts): + nelec += np.sum(self._scf.mo_occ[k][frozen_mask[k]]) + nelec = int(nelec / nkpts) + return nelec // 2 + + @nocc.setter + def nocc(self, n): + self._nocc = n + + @property + def nmo(self): + frozen_mask = get_frozen_mask(self) + return len(self._scf.mo_energy[0][frozen_mask[0]]) + + @nmo.setter + def nmo(self, n): + self._nmo = n + + get_nocc = get_nocc + get_nmo = get_nmo + get_frozen_mask = get_frozen_mask + + def kernel(self, mo_energy=None, mo_coeff=None, nw=None, with_e_hf=None): + """RPA correlation and total energy + + Calculated total energy, HF energy and RPA correlation energy + are stored in self.e_tot, self.e_hf, self.e_corr + + Parameters + ---------- + mo_energy : double array + molecular orbital energies + mo_coeff : double ndarray + molecular orbital coefficients + nw : int, optional + number of frequency point on imaginary axis, by default None + with_e_hf : float, optional + If given, overrides the HF energy computation. + + Returns + ------- + e_tot : float + RPA total energy + e_hf : float + HF energy (exact exchange for given mo_coeff) + e_corr : float + RPA correlation energy + """ + if mo_coeff is None: + mo_coeff = _mo_frozen(self, self._scf.mo_coeff) + if mo_energy is None: + mo_energy = _mo_energy_frozen(self, self._scf.mo_energy) + + cput0 = (time.process_time(), time.perf_counter()) + self.dump_flags() + self.e_tot, self.e_hf, self.e_corr = kernel(self, mo_energy, mo_coeff, nw=nw, with_e_hf=with_e_hf) + logger.timer(self, 'RPA', *cput0) + return self.e_tot, self.e_hf, self.e_corr + + def get_grids(self, alg=None, nw=None, mo_energy=None): + """Generate grids for integration. + + Parameters + ---------- + alg : str, optional + algorithm for generating grids, by default None + nw : int, optional + number of grids, by default None + mo_energy : double 2d array, optional + orbital energy, used for minimax grids, by default None + + Returns + ------- + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + """ + if alg is None: + alg = self.grids_alg + if mo_energy is None: + mo_energy = _mo_energy_frozen(self, self._scf.mo_energy) + if alg == 'legendre': + nw = 40 if nw is None else nw + freqs, wts = _get_scaled_legendre_roots(nw) + else: + raise NotImplementedError('Grids algorithm not implemented!') + + return freqs, wts + + def get_q_mesh(self, mo_energy, mo_coeff): + """Get q-mesh for finite size correction. + Equation 39-42 in doi.org/10.1021/acs.jctc.0c00704 + + Parameters + ---------- + mo_energy : double 2d array + orbital energy + mo_coeff : double 3d array + coefficient from AO to MO + + Returns + ------- + qij : double 1d array + q-mesh grids + q_abs : double 1d array + absolute positions of q-mesh grids + nq_pts : init + number of q-mesh grids + """ + nocc = self.nocc + nmo = self.nmo + nkpts = self.nkpts + # Set up q mesh for q->0 finite size correction + if not self.fc_grid: + q_pts = np.array([1e-3, 0, 0], dtype=np.double).reshape(1, 3) + else: + Nq = 3 + q_pts = np.zeros(shape=[Nq**3 - 1, 3], dtype=np.double) + for i in range(Nq): + for j in range(Nq): + for k in range(Nq): + if i == 0 and j == 0 and k == 0: + continue + else: + q_pts[i * Nq**2 + j * Nq + k - 1, 0] = k * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 1] = j * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 2] = i * 5e-4 + nq_pts = len(q_pts) + q_abs = self.mol.get_abs_kpts(q_pts) + + # Get qij = 1/sqrt(Omega) * < psi_{ik} | e^{iqr} | psi_{ak-q} > at q: (nkpts, nocc, nvir) + qij = np.zeros(shape=[nq_pts, nkpts, nocc, nmo - nocc], dtype=np.complex128) + + if not self.fc_grid: + for k in range(nq_pts): + qij[k] = get_qij(self, q_abs[k], mo_energy, mo_coeff) + else: + for k in range(nq_pts): + qij[k] = get_qij(self, q_abs[k], mo_energy, mo_coeff) + + return qij, q_abs, nq_pts + + def get_acfd_exx(self, correction_only=False): + """Calculate ACFD exchange energy. + + Parameters + ---------- + correction_only : bool + only return the correction term + + Returns + ------- + ex_acfd : double + ACFD exchange energy + """ + ex_acfd = get_rpa_exx(self, acfd=True, correction_only=correction_only) + return ex_acfd diff --git a/pyscf/pbc/gw/kugw_ac.py b/pyscf/pbc/gw/kugw_ac.py index f26d5204b6..c63e246b62 100644 --- a/pyscf/pbc/gw/kugw_ac.py +++ b/pyscf/pbc/gw/kugw_ac.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2021 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,275 +14,540 @@ # limitations under the License. # # Author: Tianyu Zhu +# Author: Christopher Hillenbrand +# Author: Jiachen Li # ''' -PBC spin-unrestricted G0W0-AC QP eigenvalues with k-point sampling +Periodic spin-unrestricted G0W0 method based on the analytic continuation scheme. +This implementation has N^4 scaling, +and is faster than GW-CD (N^4~N^5) and fully analytic GW (N^6) methods. GW-AC is recommended for valence states only, and is inaccurate for core states. -Method: - See T. Zhu and G.K.-L. Chan, arxiv:2007.03148 (2020) for details - Compute Sigma on imaginary frequency with density fitting, - then analytically continued to real frequency. - Gaussian density fitting must be used (FFTDF and MDF are not supported). +References: + T. Zhu and G.K.-L. Chan, J. Chem. Theory. Comput. 17, 727-741 (2021) + New J. Phys. 14 053020 (2012) ''' from functools import reduce -import numpy -import numpy as np import h5py -from scipy.optimize import newton, least_squares +import time +import numpy as np +import scipy from pyscf import lib -from pyscf.lib import logger from pyscf.ao2mo import _ao2mo from pyscf.ao2mo.incore import _conc_mos +from pyscf.lib import einsum, logger, temporary_env from pyscf.pbc import df, dft, scf -from pyscf.pbc.cc.kccsd_uhf import get_nocc, get_nmo, get_frozen_mask -from pyscf import __config__ - -einsum = lib.einsum - -def kernel(gw, mo_energy, mo_coeff, orbs=None, - kptlist=None, nw=None, verbose=logger.NOTE): - ''' - GW-corrected quasiparticle orbital energies - Returns: - A list : converged, mo_energy, mo_coeff - ''' - mf = gw._scf - assert gw.frozen is None +from pyscf.pbc.mp.kump2 import get_frozen_mask - nmoa, nmob = gw.nmo - nocca, noccb = gw.nocc +from pyscf.pbc.gw.krgw_ac import KRGWAC +from pyscf.gw.utils.ac_grid import _get_scaled_legendre_roots, PadeAC, TwoPoleAC +from pyscf.gw.utils.gw_np_helper import mkslice, array_scale - if orbs is None: - orbs = range(nmoa) - if kptlist is None: - kptlist = range(gw.nkpts) +def kernel(gw): + mf = gw._scf + nmo = gw.nmo[0] nkpts = gw.nkpts - nklist = len(kptlist) + + # set frozen orbitals + gw.set_frozen_orbs() + orbs = gw.orbs + orbs_frz = gw.orbs_frz + kptlist = gw.kptlist + if kptlist is None: + gw.kptlist = kptlist = range(gw.nkpts) + mo_energy_frz = _mo_energy_frozen(gw, gw.mo_energy) + mo_coeff_frz = _mo_frozen(gw, gw.mo_coeff) # v_xc - dm = np.array(mf.make_rdm1()) - v_mf = np.array(mf.get_veff()) - vj = np.array(mf.get_j(dm_kpts=dm)) - v_mf[0] = v_mf[0] - (vj[0] + vj[1]) - v_mf[1] = v_mf[1] - (vj[0] + vj[1]) + with temporary_env(mf, verbose=0), temporary_env(mf.mol, verbose=0), temporary_env(mf.with_df, verbose=0): + dm = mf.make_rdm1() + v_mf_ao = mf.get_veff() + vj_ao = mf.get_j(dm_kpts=dm) + v_mf_ao[0] = v_mf_ao[0] - (vj_ao[0] + vj_ao[1]) + v_mf_ao[1] = v_mf_ao[1] - (vj_ao[0] + vj_ao[1]) + v_mf = np.zeros(shape=[2, nkpts, nmo, nmo], dtype=np.complex128) for s in range(2): for k in range(nkpts): - v_mf[s,k] = reduce(numpy.dot, (mo_coeff[s,k].T.conj(), v_mf[s,k], mo_coeff[s,k])) + v_mf[s, k] = reduce(np.matmul, (mo_coeff_frz[s, k].T.conj(), v_mf_ao[s, k], mo_coeff_frz[s, k])) + gw.vxc = v_mf # v_hf from DFT/HF density - if gw.fc: - exxdiv = 'ewald' - else: - exxdiv = None - uhf = scf.KUHF(gw.mol, gw.kpts, exxdiv=exxdiv) + if isinstance(mf.with_df, df.GDF): + uhf = scf.KUHF(gw.mol.copy(deep=True), gw.kpts, exxdiv=None).density_fit() + elif isinstance(mf.with_df, df.RSDF): + uhf = scf.KUHF(gw.mol.copy(deep=True), gw.kpts, exxdiv=None).rs_density_fit() + if hasattr(mf, 'sigma'): + uhf = scf.addons.smearing_(uhf, sigma=mf.sigma, method=mf.smearing_method) uhf.with_df = gw.with_df - uhf.with_df._cderi = gw.with_df._cderi - if uhf.with_df._j_only: - logger.debug(gw, 'Rebuild CDERI for exchange integrals') - uhf.with_df.build(j_only=False) - vk = uhf.get_veff(gw.mol,dm_kpts=dm) - vj = uhf.get_j(gw.mol,dm_kpts=dm) - vk[0] = vk[0] - (vj[0] + vj[1]) - vk[1] = vk[1] - (vj[0] + vj[1]) + uhf.verbose = uhf.mol.verbose = 0 + with temporary_env(uhf, verbose=0), temporary_env(uhf.with_df, verbose=0): + vk_ao = uhf.get_veff(dm_kpts=dm) + vj_ao = uhf.get_j(dm_kpts=dm) + vk_ao[0] = vk_ao[0] - (vj_ao[0] + vj_ao[1]) + vk_ao[1] = vk_ao[1] - (vj_ao[0] + vj_ao[1]) + vk = np.zeros(shape=[2, nkpts, nmo, nmo], dtype=np.complex128) for s in range(2): for k in range(nkpts): - vk[s,k] = reduce(numpy.dot, (mo_coeff[s,k].T.conj(), vk[s,k], mo_coeff[s,k])) + vk[s, k] = reduce(np.matmul, (mo_coeff_frz[s, k].T.conj(), vk_ao[s, k], mo_coeff_frz[s, k])) - # Grids for integration on imaginary axis - freqs,wts = _get_scaled_legendre_roots(nw) - - # Compute self-energy on imaginary axis i*[0,iw_cutoff] - sigmaI, omega = get_sigma_diag(gw, orbs, kptlist, freqs, wts, iw_cutoff=5.) - - # Analytic continuation - coeff_a = [] - coeff_b = [] + # finite size correction for exchange self-energy + if gw.fc: + vk_corr = -2.0 / np.pi * (6.0 * np.pi**2 / gw.mol.vol / nkpts) ** (1.0 / 3.0) + for s in range(2): + for k in range(nkpts): + # NOTE: here was a bug in commits before 2024/12 + for i in range(gw.nocc[s]): + vk[s][k][i, i] = vk[s][k][i, i] + vk_corr + gw.vk = vk + + # set up Fermi level + ef = gw.ef = get_ef(kmf=mf, mo_energy=mf.mo_energy) + + # grids for integration on imaginary axis + gw.freqs, gw.wts = freqs, wts = _get_scaled_legendre_roots(gw.nw) + + # calculate self-energy on imaginary axis + sigmaI, omega = get_sigma( + gw, freqs, wts, ef=ef, mo_energy=mo_energy_frz, orbs=orbs_frz, kptlist=kptlist, iw_cutoff=gw.ac_iw_cutoff, + fullsigma=gw.fullsigma) + + # analytic continuation if gw.ac == 'twopole': - for k in range(nklist): - coeff_a.append(AC_twopole_diag(sigmaI[0,k], omega[0], orbs, nocca)) - coeff_b.append(AC_twopole_diag(sigmaI[1,k], omega[1], orbs, noccb)) + acobj = TwoPoleAC(list(range(nmo)), gw.nocc) elif gw.ac == 'pade': - for k in range(nklist): - coeff_a_tmp, omega_fit_a = AC_pade_thiele_diag(sigmaI[0,k], omega[0]) - coeff_b_tmp, omega_fit_b = AC_pade_thiele_diag(sigmaI[1,k], omega[1]) - coeff_a.append(coeff_a_tmp) - coeff_b.append(coeff_b_tmp) - omega_fit = np.asarray((omega_fit_a, omega_fit_b)) - coeff = np.asarray((coeff_a, coeff_b)) - - conv = True - # This code does not support metals - homo = -99. - lumo = 99. - mo_energy = np.asarray(mf.mo_energy) - for k in range(nkpts): - if homo < max(mo_energy[0,k][nocca-1],mo_energy[1,k][noccb-1]): - homo = max(mo_energy[0,k][nocca-1],mo_energy[1,k][noccb-1]) - if lumo > min(mo_energy[0,k][nocca],mo_energy[1,k][noccb]): - lumo = min(mo_energy[0,k][nocca],mo_energy[1,k][noccb]) - ef = (homo+lumo)/2. + acobj = PadeAC(npts=gw.ac_pade_npts, step_ratio=gw.ac_pade_step_ratio) + else: + raise ValueError('Unknown GW-AC type %s' % (str(gw.ac))) + + acobj.ac_fit(sigmaI, omega, axis=-1) - mo_energy = np.zeros_like(np.array(mf.mo_energy)) + if gw.fullsigma: + diag_acobj = acobj.diagonal(axis1=2, axis2=3) + else: + diag_acobj = acobj + + mo_energy = np.zeros_like(mf.mo_energy) for s in range(2): - for k in range(nklist): - kn = kptlist[k] - for p in orbs: - if gw.linearized: + for ik, k in enumerate(kptlist): + for ip, p in enumerate(orbs_frz): + if gw.qpe_linearized: # linearized G0W0 de = 1e-6 - ep = mf.mo_energy[s][kn][p] - #TODO: analytic sigma derivative - if gw.ac == 'twopole': - sigmaR = two_pole(ep-ef, coeff[s,k,:,p-orbs[0]]).real - dsigma = two_pole(ep-ef+de, coeff[s,k,:,p-orbs[0]]).real - sigmaR.real - elif gw.ac == 'pade': - sigmaR = pade_thiele(ep-ef, omega_fit[s,p-orbs[0]], coeff[s,k,:,p-orbs[0]]).real - dsigma = pade_thiele(ep-ef+de, omega_fit[s,p-orbs[0]], - coeff[s,k,:,p-orbs[0]]).real - sigmaR.real - zn = 1.0/(1.0-dsigma/de) - e = ep + zn*(sigmaR.real + vk[s,kn,p,p].real - v_mf[s,kn,p,p].real) - mo_energy[s,kn,p] = e + ep = mf.mo_energy[s][k][orbs[ip]] + sigmaR = diag_acobj[s, ik, ip].ac_eval(ep).real + dsigma = diag_acobj[s, ik, ip].ac_eval(ep + de).real - sigmaR.real + zn = 1.0 / (1.0 - dsigma / de) + if gw.qpe_linearized_range is not None: + zn = 1.0 if zn < gw.qpe_linearized_range[0] or zn > gw.qpe_linearized_range[1] else zn + mo_energy[s, k, orbs[ip]] = ep + zn * (sigmaR + vk[s, k, p, p] - v_mf[s, k, p, p]).real else: # self-consistently solve QP equation def quasiparticle(omega): - if gw.ac == 'twopole': - sigmaR = two_pole(omega-ef, coeff[s,k,:,p-orbs[0]]).real - elif gw.ac == 'pade': - sigmaR = pade_thiele(omega-ef, omega_fit[s,p-orbs[0]], coeff[s,k,:,p-orbs[0]]).real - return omega - mf.mo_energy[s][kn][p] - (sigmaR.real + vk[s,kn,p,p].real - v_mf[s,kn,p,p].real) + sigmaR = diag_acobj[s, ik, ip].ac_eval(omega) + return omega - mf.mo_energy[s][k][orbs[ip]] - (sigmaR + vk[s, k, p, p] - v_mf[s, k, p, p]).real + try: - e = newton(quasiparticle, mf.mo_energy[s][kn][p], tol=1e-6, maxiter=100) - mo_energy[s,kn,p] = e + mo_energy[s, k, orbs[ip]] = scipy.optimize.newton( + quasiparticle, mf.mo_energy[s][k][orbs[ip]], tol=gw.qpe_tol, maxiter=gw.qpe_max_iter + ) except RuntimeError: - conv = False - mo_coeff = mf.mo_coeff + logger.warn(gw, 'QPE for spin=%d k=%d orbital=%d not converged!', s, k, orbs[ip]) + + # save GW results + gw.mo_energy = mo_energy + gw.acobj = acobj if gw.verbose >= logger.DEBUG: - numpy.set_printoptions(threshold=nmoa) - for k in range(nkpts): - logger.debug(gw, ' GW mo_energy spin-up @ k%d =\n%s', k,mo_energy[0,k]) - for k in range(nkpts): - logger.debug(gw, ' GW mo_energy spin-down @ k%d =\n%s', k,mo_energy[1,k]) - numpy.set_printoptions(threshold=1000) + with np.printoptions(threshold=len(mf.mo_energy[0][0])): + for k in range(nkpts): + logger.debug(gw, ' GW mo_energy spin-up @ k%d =\n%s', k, mo_energy[0, k]) + for k in range(nkpts): + logger.debug(gw, ' GW mo_energy spin-down @ k%d =\n%s', k, mo_energy[1, k]) + + if gw.writefile > 0: + with h5py.File('vxc.h5', 'w') as feri: + feri['vk'] = np.asarray(vk) + feri['v_mf'] = np.asarray(v_mf) + + with h5py.File('sigma_imag.h5', 'w') as feri: + feri['sigmaI'] = np.asarray(sigmaI) + feri['omega'] = np.asarray(omega) + if gw.sigmaI is not None: + feri['sigmaI_full'] = np.asarray(gw.sigmaI) + + acobj.save('ac_coeff.h5') + + return + + +def get_rho_response(omega, nocc, mo_energy, Lia, kidx): + """Get Pi=PV. + P is density-density response function. + V is two-electron integral. + See equation 24 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + real position of imaginary frequency + nocc : list of int + number of occupied orbitals for two spins + mo_energy : double ndarray + orbital energy + Lia : list of complex 4d array + occupied-virtual block three-center density-fitting matrix in MO + kidx : list + momentum-conserved k-point list kj=kidx[ki] + + Returns + ------- + Pi : complex ndarray + Pi in auxiliary basis at freq iw + """ + nkpts, naux = Lia[0].shape[:2] + nocc = [Lia[0].shape[2], Lia[1].shape[2]] + nvir = [Lia[0].shape[3], Lia[1].shape[3]] - return conv, mo_energy, mo_coeff + # Compute Pi for kL + Pi = np.zeros(shape=[naux, naux], dtype=np.complex128) + for i in range(nkpts): + # Find ka that conserves with ki and kL (-ki+ka+kL=G) + a = kidx[i] -def get_rho_response(gw, omega, mo_energy, Lpq, kL, kidx): - ''' - Compute density response function in auxiliary basis at freq iw - ''' - spin, nkpts, naux, nmo, nmo = Lpq.shape - nocca, noccb = gw.nocc - kpts = gw.kpts - kscaled = gw.mol.get_scaled_kpts(kpts) - kscaled -= kscaled[0] + for s in range(2): + eia = mo_energy[s, i, :nocc[s], None] - mo_energy[s, a, None, nocc[s]:] + Lia_i_s = Lia[s][i] + eia = eia / (omega**2 + eia**2) + Pia = Lia_i_s * eia + + # Pi += einsum('Pia,Qia->PQ', Pia, Lia.conj()) + scipy.linalg.blas.zgemm( + alpha=2.0 / nkpts, + a=Lia_i_s.reshape(naux, nocc[s] * nvir[s]).T, + b=Pia.reshape(naux, nocc[s] * nvir[s]).T, + c=Pi.T, + trans_a=2, + trans_b=0, + beta=1.0, + overwrite_c=True, + ) + Pia = Lia_i_s = None + return Pi + + +def get_rho_response_metal(omega, mo_energy, mo_occ, Lpq, kidx): + """Get Pi=PV for metallic systems. + P is density-density response function. + V is two-electron integral. + See equation 24 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + real position of imaginary frequency + mo_energy : double ndarray + orbital energy + mo_occ : double ndarray + occupation number + Lpq : complex ndarray + three-center density-fitting matrix in MO + kidx : list + momentum-conserved k-point list kj=kidx[ki] + + Returns + ------- + Pi : complex ndarray + Pi in auxiliary basis at freq iw + """ + nkpts, nspin, naux, nmo, nmo = Lpq.shape # Compute Pi for kL - Pi = np.zeros((naux,naux),dtype=np.complex128) - for i, kpti in enumerate(kpts): + Pi = np.zeros(shape=[naux, naux], dtype=np.complex128) + for i in range(nkpts): # Find ka that conserves with ki and kL (-ki+ka+kL=G) a = kidx[i] - eia_a = mo_energy[0,i,:nocca,None] - mo_energy[0,a,None,nocca:] - eia_b = mo_energy[1,i,:noccb,None] - mo_energy[1,a,None,noccb:] - eia_a = eia_a/(omega**2+eia_a*eia_a) - eia_b = eia_b/(omega**2+eia_b*eia_b) - Pia_a = einsum('Pia,ia->Pia',Lpq[0,i][:,:nocca,nocca:],eia_a) - Pia_b = einsum('Pia,ia->Pia',Lpq[1,i][:,:noccb,noccb:],eia_b) - # Response from both spin-up and spin-down density - Pi += 2./nkpts * (einsum('Pia,Qia->PQ',Pia_a,Lpq[0,i][:,:nocca,nocca:].conj()) + - einsum('Pia,Qia->PQ',Pia_b,Lpq[1,i][:,:noccb,noccb:].conj())) + + for s in range(nspin): + eia = mo_energy[s, i, :, None] - mo_energy[s, a, None, :] + fia = mo_occ[s][i][:, None] - mo_occ[s][a][None, :] + Lia = np.ascontiguousarray(Lpq[i, s]) + eia = eia * fia / (omega**2 + eia**2) + Pia = Lia * eia + + # Pi += einsum('Pia, Qia -> PQ', Pia, Lia.conj()) / nkpts + scipy.linalg.blas.zgemm( + alpha=1.0 / nkpts, + a=Lia.reshape(naux, nmo * nmo).T, + b=Pia.reshape(naux, nmo * nmo).T, + c=Pi.T, + trans_a=2, + trans_b=0, + beta=1.0, + overwrite_c=True, + ) + Pia = Lia = None + return Pi + + +def get_rho_response_head(omega, mo_energy, qij): + """Compute head (G=0, G'=0) density response function in auxiliary basis at freq iw. + equation 48 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + frequency point + mo_energy : double ndarray + orbital energy + qij : list of complex ndarray + pair density matrix defined as equation 51 in 10.1021/acs.jctc.0c00704 + + Returns + ------- + Pi_00 : complex + head response function + """ + nkpts = qij[0].shape[0] + nocc = [qij[0].shape[1], qij[1].shape[1]] + + Pi_00 = 0j + for k in range(nkpts): + for s in range(2): + eia = mo_energy[s, k, : nocc[s], None] - mo_energy[s, k, None, nocc[s] :] + eia = eia / (omega**2 + eia**2) + Pi_00 += 2.0 / nkpts * einsum('ia,ia->', eia, qij[s][k].conj() * qij[s][k]) + + return Pi_00 + + +def get_rho_response_wing(omega, mo_energy, Lia, qij): + """Compute wing (G=P, G'=0) density response function in auxiliary basis at freq iw. + equation 48 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + omega : double + frequency point + mo_energy : double ndarray + orbital energy + Lia : complex ndarray + occupied-virtual block three-center density fitting matrix in MO + qij : list of complex ndarray + pair density matrix defined as equation 51 in 10.1021/acs.jctc.0c00704 + + Returns + ------- + Pi : complex ndarray + wing response function + """ + nkpts, naux = Lia[0].shape[:2] + nocc = [Lia[0].shape[2], Lia[1].shape[2]] + nvir = [Lia[0].shape[3], Lia[1].shape[3]] + + Pi = np.zeros(shape=[naux], dtype=np.complex128) + for k in range(nkpts): + for s in range(2): + eia = mo_energy[s, k, :nocc[s], None] - mo_energy[s, k, None, nocc[s]:] + eia = eia / (omega**2 + eia**2) + eia_q = eia * qij[s][k].conj() + + Pi += 2.0 / nkpts * np.matmul(Lia[s][k].reshape(naux, nocc[s] * nvir[s]), eia_q.reshape(-1)) + return Pi -def get_sigma_diag(gw, orbs, kptlist, freqs, wts, iw_cutoff=None, max_memory=8000): - ''' - Compute GW correlation self-energy (diagonal elements) in MO basis - on imaginary axis - ''' - mo_energy = np.array(gw._scf.mo_energy) - mo_coeff = np.array(gw._scf.mo_coeff) + +def get_qij(gw, q, mo_energy, mo_coeff, uniform_grids=False): + """Compute pair density matrix in the long-wavelength limit through kp perturbation theory + qij = 1/Omega * |< psi_{ik} | e^{iqr} | psi_{ak-q} >|^2 + equation 51 in 10.1021/acs.jctc.0c00704 + Ref: Phys. Rev. B 83, 245122 (2011) + + Parameters + ---------- + gw : KUGWAC + gw object, provides attributes: nocc, nmo, kpts, mol + q : double + q grid + mo_energy : double ndarray + orbital energy + mo_coeff : complex ndarray + coefficient from AO to MO + uniform_grids : bool, optional + use uniform grids, by default False + + Returns + ------- + list + pair density matrix of two spins in the long-wavelength limit + """ nocca, noccb = gw.nocc nmoa, nmob = gw.nmo + nvira = nmoa - nocca + nvirb = nmob - noccb + kpts = gw.kpts + nkpts = len(kpts) + cell = gw.mol + + if uniform_grids: + with temporary_env(cell, verbose=0): + mydf = df.FFTDF(cell, kpts=kpts) + coords = cell.gen_uniform_grids(mydf.mesh) + else: + with temporary_env(cell, verbose=0): + coords, weights = dft.gen_grid.get_becke_grids(cell, level=4) + ngrid = len(coords) + + qij_a = np.zeros(shape=[nkpts, nocca, nvira], dtype=np.complex128) + qij_b = np.zeros(shape=[nkpts, noccb, nvirb], dtype=np.complex128) + for i, kpti in enumerate(kpts): + ao_p = dft.numint.eval_ao(cell, coords, kpt=kpti, deriv=1) + ao = ao_p[0] + ao_grad = ao_p[1:4] + if uniform_grids: + ao_ao_grad = einsum('mg,xgn->xmn', ao.T.conj(), ao_grad) * cell.vol / ngrid + else: + ao_ao_grad = einsum('g,mg,xgn->xmn', weights, ao.T.conj(), ao_grad) + q_ao_ao_grad = -1j * einsum('x,xmn->mn', q, ao_ao_grad) + q_mo_mo_grad_a = reduce( + np.matmul, (mo_coeff[0, i][:, :nocca].T.conj(), q_ao_ao_grad, mo_coeff[0, i][:, nocca:]) + ) + q_mo_mo_grad_b = reduce( + np.matmul, (mo_coeff[1, i][:, :noccb].T.conj(), q_ao_ao_grad, mo_coeff[1, i][:, noccb:]) + ) + enm_a = 1.0 / (mo_energy[0, i][nocca:, None] - mo_energy[0, i][None, :nocca]) + enm_b = 1.0 / (mo_energy[1, i][noccb:, None] - mo_energy[1, i][None, :noccb]) + dens_a = enm_a.T * q_mo_mo_grad_a + dens_b = enm_b.T * q_mo_mo_grad_b + qij_a[i] = dens_a / np.sqrt(cell.vol) + qij_b[i] = dens_b / np.sqrt(cell.vol) + + return (qij_a, qij_b) + + +def get_sigma( + gw, freqs, wts, ef, mo_energy, orbs=None, kptlist=None, mo_coeff=None, mo_occ=None, iw_cutoff=None, fullsigma=False +): + """Get GW self-energy. + See equation 27 in 10.1021/acs.jctc.0c00704 + + Parameters + ---------- + gw : KUGWAC + GW objects, + provides attributes: _scf, mol, frozen, nmo, nocc, kpts, nkpts, mo_coeff, mo_occ, fc, fc_grid, with_df + freqs : double array + position of imaginary frequency + wts : double array + weight of frequency points + ef : double + Fermi level + mo_energy : double ndarray + non-frozen orbital energy + orbs : list, optional + orbital index in non-frozen nmo to calculate self-energy, by default None + kptlist : list, optional + k-point index to calculate self-energy, by default None + mo_coeff : complex ndarray, optional + coefficient from AO to non-frozen MO, by default None + mo_occ : double ndarray, optional + non-frozen occupation number, by default None + iw_cutoff : complex, optional + imaginary grid cutoff for fitting, by default None + fullsigma : bool, optional + calculate off-diagonal elements, by default False + + Returns + ------- + sigma: complex ndarray + self-energy on the imaginary axis + omega: complex ndarray + imaginary frequency grids of self-energy + """ + nocca, noccb = nocc = gw.nocc + nmoa, nmob = nmo = gw.nmo nkpts = gw.nkpts kpts = gw.kpts + + assert nmoa == nmob + if orbs is None: + orbs = list(range(nmoa)) + if kptlist is None: + kptlist = list(range(nkpts)) + norbs = len(orbs) nklist = len(kptlist) nw = len(freqs) - norbs = len(orbs) - mydf = gw.with_df + + if mo_coeff is None: + mo_coeff = _mo_frozen(gw, gw.mo_coeff) + if mo_occ is None: + mo_occ = _mo_occ_frozen(gw, gw.mo_occ) + nao = mo_coeff.shape[2] # possible kpts shift kscaled = gw.mol.get_scaled_kpts(kpts) kscaled -= kscaled[0] - # This code does not support metals - homo = -99. - lumo = 99. - for k in range(nkpts): - if homo < max(mo_energy[0,k][nocca-1],mo_energy[1,k][noccb-1]): - homo = max(mo_energy[0,k][nocca-1],mo_energy[1,k][noccb-1]) - if lumo > min(mo_energy[0,k][nocca],mo_energy[1,k][noccb]): - lumo = min(mo_energy[0,k][nocca],mo_energy[1,k][noccb]) - if (lumo-homo)<1e-3: - logger.warn(gw, 'Current KUGW is not supporting metals!') - ef = (homo+lumo)/2. - # Integration on numerical grids - if iw_cutoff is not None: + if iw_cutoff is not None and gw.rdm is False: nw_sigma = sum(iw < iw_cutoff for iw in freqs) + 1 else: nw_sigma = nw + 1 - # Compute occ for -iw and vir for iw separately - # to avoid branch cuts in analytic continuation - omega_occ = np.zeros((nw_sigma),dtype=np.complex128) - omega_vir = np.zeros((nw_sigma),dtype=np.complex128) - omega_occ[1:] = -1j*freqs[:(nw_sigma-1)] - omega_vir[1:] = 1j*freqs[:(nw_sigma-1)] - orbs_occ_a = [i for i in orbs if i < nocca] - orbs_occ_b = [i for i in orbs if i < noccb] - norbs_occ_a = len(orbs_occ_a) - norbs_occ_b = len(orbs_occ_b) - - emo_occ_a = np.zeros((nkpts,nmoa,nw_sigma),dtype=np.complex128) - emo_occ_b = np.zeros((nkpts,nmob,nw_sigma),dtype=np.complex128) - emo_vir_a = np.zeros((nkpts,nmoa,nw_sigma),dtype=np.complex128) - emo_vir_b = np.zeros((nkpts,nmob,nw_sigma),dtype=np.complex128) - for k in range(nkpts): - emo_occ_a[k] = omega_occ[None,:] + ef - mo_energy[0,k][:,None] - emo_occ_b[k] = omega_occ[None,:] + ef - mo_energy[1,k][:,None] - emo_vir_a[k] = omega_vir[None,:] + ef - mo_energy[0,k][:,None] - emo_vir_b[k] = omega_vir[None,:] + ef - mo_energy[1,k][:,None] - - sigma = np.zeros((2,nklist,norbs,nw_sigma),dtype=np.complex128) - omega = np.zeros((2,norbs,nw_sigma),dtype=np.complex128) - for s in range(2): - for p in range(norbs): - orbp = orbs[p] - if orbp < gw.nocc[s]: - omega[s,p] = omega_occ.copy() - else: - omega[s,p] = omega_vir.copy() + omega = np.zeros(shape=[nw_sigma], dtype=np.complex128) + omega[1:] = 1j * freqs[: (nw_sigma - 1)] + ef + emo_a = omega[None, None, :] - mo_energy[0][:, :, None] + emo_b = omega[None, None, :] - mo_energy[1][:, :, None] + if fullsigma is False: + sigma = np.zeros(shape=[2, nklist, norbs, nw_sigma], dtype=np.complex128) + else: + sigma = np.zeros(shape=[2, nklist, norbs, norbs, nw_sigma], dtype=np.complex128) if gw.fc: # Set up q mesh for q->0 finite size correction - q_pts = np.array([1e-3,0,0]).reshape(1,3) + if not gw.fc_grid: + q_pts = np.array([1e-3, 0, 0], dtype=np.double).reshape(1, 3) + else: + Nq = 3 + q_pts = np.zeros(shape=[Nq**3 - 1, 3], dtype=np.double) + for i in range(Nq): + for j in range(Nq): + for k in range(Nq): + if i == 0 and j == 0 and k == 0: + continue + else: + q_pts[i * Nq**2 + j * Nq + k - 1, 0] = k * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 1] = j * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 2] = i * 5e-4 + nq_pts = len(q_pts) q_abs = gw.mol.get_abs_kpts(q_pts) # Get qij = 1/sqrt(Omega) * < psi_{ik} | e^{iqr} | psi_{ak-q} > at q: (nkpts, nocc, nvir) - qij = get_qij(gw, q_abs[0], mo_coeff) + qij_a = np.zeros(shape=[nq_pts, nkpts, nocca, nmoa - nocca], dtype=np.complex128) + qij_b = np.zeros(shape=[nq_pts, nkpts, noccb, nmob - noccb], dtype=np.complex128) + + if not gw.fc_grid: + for k in range(nq_pts): + qij_tmp = get_qij(gw, q_abs[k], mo_energy, mo_coeff) + qij_a[k] = qij_tmp[0] + qij_b[k] = qij_tmp[1] + else: + for k in range(nq_pts): + qij_tmp = get_qij(gw, q_abs[k], mo_energy, mo_coeff) + qij_a[k] = qij_tmp[0] + qij_b[k] = qij_tmp[1] + cderiarr = gw.with_df.cderi_array() for kL in range(nkpts): - # Lij: (2, ki, L, i, j) for looping every kL - #Lij = np.zeros((2,nkpts,naux,nmoa,nmoa),dtype=np.complex128) + # Lij: (ki, 2, L, i, j) for looping every kL Lij = [] # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) - kidx = np.zeros((nkpts),dtype=np.int64) - kidx_r = np.zeros((nkpts),dtype=np.int64) + kidx = np.zeros(shape=[nkpts], dtype=np.int64) + kidx_r = np.zeros(shape=[nkpts], dtype=np.int64) for i, kpti in enumerate(kpts): for j, kptj in enumerate(kpts): # Find (ki,kj) that satisfies momentum conservation with kL @@ -291,354 +556,461 @@ def get_sigma_diag(gw, orbs, kptlist, freqs, wts, iw_cutoff=None, max_memory=800 if is_kconserv: kidx[i] = j kidx_r[j] = i - logger.debug(gw, "Read Lpq (kL: %s / %s, ki: %s, kj: %s)"%(kL+1, nkpts, i, j)) + logger.debug(gw, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s)' % (kL + 1, nkpts, i, j)) + + # Read (L|pq) and ao2mo transform to (L|ij) + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nao * (nao + 1)) // 2: + Lpq = lib.unpack_tril(Lpq).reshape(-1, nao**2) + else: + Lpq = Lpq.reshape(-1, nao**2) + Lpq = Lpq.astype(np.complex128) + + moija, ijslicea = _conc_mos(mo_coeff[0, i], mo_coeff[0, j])[2:] + moijb, ijsliceb = _conc_mos(mo_coeff[1, i], mo_coeff[1, j])[2:] Lij_out_a = None Lij_out_b = None - # Read (L|pq) and ao2mo transform to (L|ij) - Lpq = [] - for LpqR, LpqI, sign \ - in mydf.sr_loop([kpti, kptj], max_memory=0.1*gw._scf.max_memory, compact=False): - Lpq.append(LpqR+LpqI*1.0j) - Lpq = np.vstack(Lpq).reshape(-1,nmoa**2) - moija, ijslicea = _conc_mos(mo_coeff[0,i], mo_coeff[0,j])[2:] - moijb, ijsliceb = _conc_mos(mo_coeff[1,i], mo_coeff[1,j])[2:] - tao = [] - ao_loc = None - Lij_out_a = _ao2mo.r_e2(Lpq, moija, ijslicea, tao, ao_loc, out=Lij_out_a) - tao = [] - ao_loc = None - Lij_out_b = _ao2mo.r_e2(Lpq, moijb, ijsliceb, tao, ao_loc, out=Lij_out_b) - Lij.append(np.asarray((Lij_out_a.reshape(-1,nmoa,nmoa),Lij_out_b.reshape(-1,nmob,nmob)))) - - Lij = np.asarray(Lij) - Lij = Lij.transpose(1,0,2,3,4) - naux = Lij.shape[2] + Lij_out_a = _ao2mo.r_e2(Lpq, moija, ijslicea, tao=[], ao_loc=None, out=Lij_out_a) + Lij_out_b = _ao2mo.r_e2(Lpq, moijb, ijsliceb, tao=[], ao_loc=None, out=Lij_out_b) + Lij.append(np.asarray((Lij_out_a.reshape(-1, nmoa, nmoa), Lij_out_b.reshape(-1, nmob, nmob)))) - if kL == 0: - for w in range(nw): - # body dielectric matrix eps_body - Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) - eps_body_inv = np.linalg.inv(np.eye(naux)-Pi) + Lij = np.ascontiguousarray(Lij) + naux = Lij.shape[2] + if hasattr(gw._scf, 'sigma') is False: + Lia = [ + np.ascontiguousarray(Lij[:, 0, :, : nocc[0], nocc[0] :]), + np.ascontiguousarray(Lij[:, 1, :, : nocc[1], nocc[1] :]), + ] + + naux_ones = np.ones(shape=[1, naux], dtype=np.complex128) + for w in range(nw): + # body dielectric matrix eps_body + if hasattr(gw._scf, 'sigma'): + Pi = get_rho_response_metal(freqs[w], mo_energy, mo_occ, Lij, kidx) + else: + Pi = get_rho_response(freqs[w], nocc, mo_energy, Lia, kidx) + Pi_inv = np.linalg.inv(np.eye(naux) - Pi) - if gw.fc: - # head dielectric matrix eps_00 - Pi_00 = get_rho_response_head(gw, freqs[w], mo_energy, qij) - eps_00 = 1. - 4. * np.pi/np.linalg.norm(q_abs[0])**2 * Pi_00 + if gw.fc and kL == 0: + eps_inv_00 = 0j + eps_inv_P0 = np.zeros(shape=[naux], dtype=np.complex128) + for iq in range(nq_pts): + # head dielectric matrix eps_00, equation 47 in 10.1021/acs.jctc.0c00704 + Pi_00 = get_rho_response_head(freqs[w], mo_energy, (qij_a[iq], qij_b[iq])) + eps_00 = 1.0 - 4.0 * np.pi / np.linalg.norm(q_abs[iq]) ** 2 * Pi_00 - # wings dielectric matrix eps_P0 - Pi_P0 = get_rho_response_wing(gw, freqs[w], mo_energy, Lij, qij) - eps_P0 = -np.sqrt(4.*np.pi) / np.linalg.norm(q_abs[0]) * Pi_P0 + # wings dielectric matrix eps_P0, equation 48 in 10.1021/acs.jctc.0c00704 + Pi_P0 = get_rho_response_wing(freqs[w], mo_energy, Lia, (qij_a[iq], qij_b[iq])) + eps_P0 = -np.sqrt(4.0 * np.pi) / np.linalg.norm(q_abs[iq]) * Pi_P0 # inverse dielectric matrix - eps_inv_00 = 1./(eps_00 - np.dot(np.dot(eps_P0.conj(),eps_body_inv),eps_P0)) - eps_inv_P0 = -eps_inv_00 * np.dot(eps_body_inv, eps_P0) - - # head correction - Del_00 = 2./np.pi * (6.*np.pi**2/gw.mol.vol/nkpts)**(1./3.) * (eps_inv_00 - 1.) - - eps_inv_PQ = eps_body_inv - g0_occ_a = wts[w] * emo_occ_a / (emo_occ_a**2+freqs[w]**2) - g0_occ_b = wts[w] * emo_occ_b / (emo_occ_b**2+freqs[w]**2) - g0_vir_a = wts[w] * emo_vir_a / (emo_vir_a**2+freqs[w]**2) - g0_vir_b = wts[w] * emo_vir_b / (emo_vir_b**2+freqs[w]**2) - for k in range(nklist): - kn = kptlist[k] - # Find km that conserves with kn and kL (-km+kn+kL=G) - km = kidx_r[kn] - Qmn_a = einsum('Pmn,PQ->Qmn',Lij[0,km][:,:,orbs].conj(),eps_inv_PQ-np.eye(naux)) - Qmn_b = einsum('Pmn,PQ->Qmn',Lij[1,km][:,:,orbs].conj(),eps_inv_PQ-np.eye(naux)) - Wmn_a = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn_a,Lij[0,km][:,:,orbs]) - Wmn_b = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn_b,Lij[1,km][:,:,orbs]) - - sigma[0,k][:norbs_occ_a] += -einsum('mn,mw->nw',Wmn_a[:,:norbs_occ_a],g0_occ_a[km])/np.pi - sigma[1,k][:norbs_occ_b] += -einsum('mn,mw->nw',Wmn_b[:,:norbs_occ_b],g0_occ_b[km])/np.pi - sigma[0,k][norbs_occ_a:] += -einsum('mn,mw->nw',Wmn_a[:,norbs_occ_a:],g0_vir_a[km])/np.pi - sigma[1,k][norbs_occ_b:] += -einsum('mn,mw->nw',Wmn_b[:,norbs_occ_b:],g0_vir_b[km])/np.pi - - if gw.fc: - # apply head correction - assert (kn == km) - sigma[0,k][:norbs_occ_a] += -Del_00 * g0_occ_a[kn][orbs][:norbs_occ_a] /np.pi - sigma[0,k][norbs_occ_a:] += -Del_00 * g0_vir_a[kn][orbs][norbs_occ_a:] /np.pi - sigma[1,k][:norbs_occ_b] += -Del_00 * g0_occ_b[kn][orbs][:norbs_occ_b] /np.pi - sigma[1,k][norbs_occ_b:] += -Del_00 * g0_vir_b[kn][orbs][norbs_occ_b:] /np.pi - - # apply wing correction - Wn_P0_a = einsum('Pnm,P->nm',Lij[0,kn],eps_inv_P0).diagonal() - Wn_P0_b = einsum('Pnm,P->nm',Lij[1,kn],eps_inv_P0).diagonal() - Wn_P0_a = Wn_P0_a.real * 2. - Wn_P0_b = Wn_P0_b.real * 2. - Del_P0_a = np.sqrt(gw.mol.vol/4./np.pi**3) * (6.*np.pi**2/gw.mol.vol/nkpts)**(2./3.) * Wn_P0_a[orbs] # noqa: E501 - Del_P0_b = np.sqrt(gw.mol.vol/4./np.pi**3) * (6.*np.pi**2/gw.mol.vol/nkpts)**(2./3.) * Wn_P0_b[orbs] # noqa: E501 - sigma[0,k][:norbs_occ_a] += -einsum('n,nw->nw',Del_P0_a[:norbs_occ_a],g0_occ_a[kn][orbs][:norbs_occ_a]) /np.pi # noqa: E501 - sigma[0,k][norbs_occ_a:] += -einsum('n,nw->nw',Del_P0_a[norbs_occ_a:],g0_vir_a[kn][orbs][norbs_occ_a:]) /np.pi # noqa: E501 - sigma[1,k][:norbs_occ_b] += -einsum('n,nw->nw',Del_P0_b[:norbs_occ_b],g0_occ_b[kn][orbs][:norbs_occ_b]) /np.pi # noqa: E501 - sigma[1,k][norbs_occ_b:] += -einsum('n,nw->nw',Del_P0_b[norbs_occ_b:],g0_vir_b[kn][orbs][norbs_occ_b:]) /np.pi # noqa: E501 - else: - for w in range(nw): - Pi = get_rho_response(gw, freqs[w], mo_energy, Lij, kL, kidx) - Pi_inv = np.linalg.inv(np.eye(naux)-Pi)-np.eye(naux) - g0_occ_a = wts[w] * emo_occ_a / (emo_occ_a**2+freqs[w]**2) - g0_occ_b = wts[w] * emo_occ_b / (emo_occ_b**2+freqs[w]**2) - g0_vir_a = wts[w] * emo_vir_a / (emo_vir_a**2+freqs[w]**2) - g0_vir_b = wts[w] * emo_vir_b / (emo_vir_b**2+freqs[w]**2) - for k in range(nklist): - kn = kptlist[k] - # Find km that conserves with kn and kL (-km+kn+kL=G) - km = kidx_r[kn] - Qmn_a = einsum('Pmn,PQ->Qmn',Lij[0,km][:,:,orbs].conj(),Pi_inv) - Qmn_b = einsum('Pmn,PQ->Qmn',Lij[1,km][:,:,orbs].conj(),Pi_inv) - Wmn_a = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn_a,Lij[0,km][:,:,orbs]) - Wmn_b = 1./nkpts * einsum('Qmn,Qmn->mn',Qmn_b,Lij[1,km][:,:,orbs]) - - sigma[0,k][:norbs_occ_a] += -einsum('mn,mw->nw',Wmn_a[:,:norbs_occ_a],g0_occ_a[km])/np.pi - sigma[1,k][:norbs_occ_b] += -einsum('mn,mw->nw',Wmn_b[:,:norbs_occ_b],g0_occ_b[km])/np.pi - sigma[0,k][norbs_occ_a:] += -einsum('mn,mw->nw',Wmn_a[:,norbs_occ_a:],g0_vir_a[km])/np.pi - sigma[1,k][norbs_occ_b:] += -einsum('mn,mw->nw',Wmn_b[:,norbs_occ_b:],g0_vir_b[km])/np.pi + # equation 53 in 10.1021/acs.jctc.0c00704 + eps_inv_00 += 1.0 / nq_pts * 1.0 / (eps_00 - reduce(np.matmul, (eps_P0.conj(), Pi_inv, eps_P0))) + # equation 54 in 10.1021/acs.jctc.0c00704 + eps_inv_P0 += 1.0 / nq_pts * (-eps_inv_00) * np.matmul(Pi_inv, eps_P0) + + # head correction + Del_00 = 2.0 / np.pi * (6.0 * np.pi**2 / gw.mol.vol / nkpts) ** (1.0 / 3.0) * (eps_inv_00 - 1.0) + + Pi_inv -= np.eye(naux) + g0_a = wts[w] * emo_a / (emo_a**2 + freqs[w] ** 2) + g0_b = wts[w] * emo_b / (emo_b**2 + freqs[w] ** 2) + g0 = [g0_a, g0_b] + for k, kn in enumerate(kptlist): + # Find km that conserves with kn and kL (-km+kn+kL=G) + km = kidx_r[kn] + + for s in range(2): + # Qmn_a = einsum('Pmn,PQ->Qmn', Lij[km, s][:, :, orbs].conj(), Pi_inv) + if len(orbs) == nmo[s]: + l_slice = Lij[km, s].reshape(naux, -1) + else: + l_slice = np.ascontiguousarray(Lij[km, s, :, :, mkslice(orbs)].reshape(naux, -1)) + Qmn = np.zeros(shape=[nmo[s] * norbs, naux], dtype=np.complex128) + scipy.linalg.blas.zgemm(alpha=1.0, a=Pi_inv.T, b=l_slice.T, c=Qmn.T, overwrite_c=1, trans_b=2) + Qmn = Qmn.T + + if fullsigma is False: + # Wmn = 1.0 / nkpts * einsum('Qmn,Qmn->mn', Qmn, Lij[km, s][:, :, orbs]) + Qmn = Qmn * l_slice + Wmn = np.matmul(naux_ones, Qmn) + array_scale(Wmn, 1.0 / nkpts) + + # sigma[s, k] += -einsum('mn,mw->nw', Wmn, g0[s][km]) / np.pi + sigma[s, k] -= np.matmul(Wmn.reshape(nmo[s], norbs).T, g0[s][km]) / np.pi + else: + # for orbm in range(nmo): + # Wmn[orbm] = 1./nkpts * np.dot(Qmn[:,orbm,:].transpose(),Lij[km][:,orbm,orbs]) + Qmn = Qmn.reshape(naux, nmo[s], norbs) + Wmn = np.zeros(shape=[nmo[s], norbs, norbs], dtype=np.complex128) + for m in range(nmo[s]): + np.matmul(Qmn[:, m, :].T, np.ascontiguousarray(Lij[km, s, :, m, mkslice(orbs)]), out=Wmn[m]) + array_scale(Wmn, 1.0 / nkpts) + Wmn = Wmn.reshape(nmo[s], norbs * norbs).T + + # sigma[s, k] += -einsum('mnl,mw->nlw',Wmn,g0[km])/np.pi + sigma[s, k] -= np.matmul(Wmn, g0[s][km]).reshape(norbs, norbs, nw_sigma) / np.pi + + if gw.fc and kL == 0: + assert kn == km + for s in range(2): + if fullsigma is False: + # apply head correction + sigma[s, k] += -Del_00 * g0[s][kn][orbs] / np.pi + + # apply wing correction + Wn_P0 = einsum('Pnn,P->n', Lij[kn, s], eps_inv_P0) + Wn_P0 = Wn_P0[orbs].real * 2.0 + Del_P0 = ( + np.sqrt(gw.mol.vol / 4.0 / np.pi**3) + * (6.0 * np.pi**2 / gw.mol.vol / nkpts) ** (2 / 3) + * Wn_P0 + ) + + sigma[s, k] += -einsum('n,nw->nw', Del_P0, g0[s][kn][orbs]) / np.pi + else: + # head correction + tmp = -Del_00 * g0[s][kn][orbs] / np.pi + for p in range(norbs): + sigma[s, k, p, p, :] += tmp[p, :] + # sigma[s, k, np.arange(norbs), np.arange(norbs), :] += tmp + + # wing correction + Wn_P0 = einsum('Pnn,P->n', Lij[kn, s], eps_inv_P0) + Wn_P0 = Wn_P0[orbs].real * 2.0 + Del_P0 = ( + np.sqrt(gw.mol.vol / 4.0 / np.pi**3) + * (6.0 * np.pi**2 / gw.mol.vol / nkpts) ** (2 / 3) + * Wn_P0 + ) + tmp = -einsum('n,nw->nw', Del_P0, g0[s][kn][orbs]) / np.pi + for p in range(norbs): + sigma[s, k, p, p, :] += tmp[p, :] + #sigma[s, k, np.arange(norbs), np.arange(norbs), :] += tmp + + if gw.rdm: + gw.sigmaI = sigma return sigma, omega -def get_rho_response_head(gw, omega, mo_energy, qij): - ''' - Compute head (G=0, G'=0) density response function in auxiliary basis at freq iw - ''' - qij_a, qij_b = qij - nocca, noccb = gw.nocc - kpts = gw.kpts - nkpts = len(kpts) - # Compute Pi head - Pi_00 = 0j - for i, kpti in enumerate(kpts): - eia_a = mo_energy[0,i,:nocca,None] - mo_energy[0,i,None,nocca:] - eia_b = mo_energy[1,i,:noccb,None] - mo_energy[1,i,None,noccb:] - eia_a = eia_a/(omega**2+eia_a*eia_a) - eia_b = eia_b/(omega**2+eia_b*eia_b) - Pi_00 += 2./nkpts * (einsum('ia,ia->',eia_a,qij_a[i].conj()*qij_a[i]) + - einsum('ia,ia->',eia_b,qij_b[i].conj()*qij_b[i])) - return Pi_00 - -def get_rho_response_wing(gw, omega, mo_energy, Lpq, qij): - ''' - Compute wing (G=P, G'=0) density response function in auxiliary basis at freq iw - ''' - qij_a, qij_b = qij - spin, nkpts, naux, nmo, nmo = Lpq.shape - nocca, noccb = gw.nocc - kpts = gw.kpts - nkpts = len(kpts) +def get_ef(kmf, mo_energy): + """Get Fermi level. + For gapped systems, Fermi level is computed as the average between HOMO and LUMO. + For metallic systems, Fermi level is optmized according to mo_energy. - # Compute Pi wing - Pi = np.zeros(naux,dtype=np.complex128) - for i, kpti in enumerate(kpts): - eia_a = mo_energy[0,i,:nocca,None] - mo_energy[0,i,None,nocca:] - eia_b = mo_energy[1,i,:noccb,None] - mo_energy[1,i,None,noccb:] - eia_a = eia_a/(omega**2+eia_a*eia_a) - eia_b = eia_b/(omega**2+eia_b*eia_b) - eia_q_a = eia_a * qij_a[i].conj() - eia_q_b = eia_b * qij_b[i].conj() - Pi += 2./nkpts * (einsum('Pia,ia->P',Lpq[0,i][:,:nocca,nocca:],eia_q_a) + - einsum('Pia,ia->P',Lpq[1,i][:,:noccb,noccb:],eia_q_b)) - return Pi + Parameters + ---------- + kmf : pyscf.pbc.scf.uhf.UHF/pyscf.pbc.dft.uks.UKS + mean-field object, provides attributes: kpts, sigma, smearing_method + mo_energy : double array + orbital energy -def get_qij(gw, q, mo_coeff, uniform_grids=False): - ''' - Compute qij = 1/Omega * |< psi_{ik} | e^{iqr} | psi_{ak-q} >|^2 at q: (nkpts, nocc, nvir) - through kp perturbation theory - Ref: Phys. Rev. B 83, 245122 (2011) - ''' - nocca, noccb = gw.nocc - nmoa, nmob = gw.nmo - nvira = nmoa - nocca - nvirb = nmob - noccb - kpts = gw.kpts - nkpts = len(kpts) - cell = gw.mol - mo_energy = np.asarray(gw._scf.mo_energy) + Returns + ------- + ef : double + Fermi level + """ + if hasattr(kmf, "sigma"): + from pyscf.scf import addons as mol_addons - if uniform_grids: - mydf = df.FFTDF(cell, kpts=kpts) - coords = cell.gen_uniform_grids(mydf.mesh) + if kmf.smearing_method.lower() == "fermi": + f_occ = mol_addons._fermi_smearing_occ + else: + f_occ = mol_addons._gaussian_smearing_occ + mo_energy_stack_a = np.hstack(mo_energy[0]) + mo_energy_stack_b = np.hstack(mo_energy[1]) + mo_energy_stack = np.append(mo_energy_stack_a, mo_energy_stack_b) + nelectron = kmf.mol.tot_electrons(len(kmf.kpts)) + ef = mol_addons._smearing_optimize(f_occ, mo_energy_stack, nelectron, kmf.sigma)[0] else: - coords, weights = dft.gen_grid.get_becke_grids(cell,level=4) - ngrid = len(coords) + nkpts = len(kmf.kpts) + neleca = 0.0 + nelecb = 0.0 + for k in range(nkpts): + neleca += np.sum(kmf.mo_occ[0][k]) + nelecb += np.sum(kmf.mo_occ[1][k]) + nocca = int(neleca / nkpts) + noccb = int(nelecb / nkpts) + + homo = -99.0 + lumo = 99.0 + for k in range(len(kmf.kpts)): + if homo < max(mo_energy[0][k][nocca - 1], mo_energy[1][k][noccb - 1]): + homo = max(mo_energy[0][k][nocca - 1], mo_energy[1][k][noccb - 1]) + if lumo > min(mo_energy[0][k][nocca], mo_energy[1][k][noccb]): + lumo = min(mo_energy[0][k][nocca], mo_energy[1][k][noccb]) + ef = (homo + lumo) / 2.0 + return ef + + +def get_g0_k(omega, mo_energy, eta): + """Get non-interacting Green's function. + + Parameters + ---------- + omega : double or complex ndarray + frequency grids + mo_energy : double ndarray + orbital energy + eta : double + broadening parameter + + Returns + ------- + gf0 : complex ndarray + non-interacting Green's function + """ + nkpts = len(mo_energy[0]) + nmo = mo_energy[0][0].shape[0] + nw = len(omega) + gf0 = np.zeros(shape=[2, nkpts, nmo, nmo, nw], dtype=np.complex128) + for s in range(2): + for k in range(nkpts): + for iw in range(nw): + gf0[s, k, :, :, iw] = np.diag(1.0 / (omega[iw] + 1j * eta - mo_energy[s][k])) + return gf0 + + +def make_gf(gw, omega, eta): + """Get dynamical Green's function and self-energy. + + Parameters + ---------- + gw : KUGWAC + GW object, provides attributes: orbs, kptlist, ef, ac_coeff, omega_fit, vk, vxc, _scf.mo_energy + omega : double or complex array + frequency grids + eta : double + broadening parameter + + Returns + ------- + gf : complex ndarray + GW Green's function + gf0 : complex ndarray + mean-field Green's function + sigma : complex ndarray + GW correlation self-energy + """ + nmo = gw.nmo[0] + + nomega = len(omega) + sigma = np.zeros(shape=[2, gw.nkpts, nmo, nmo, nomega], dtype=np.complex128) + if gw.fullsigma: + for s in range(2): + for ik, k in enumerate(gw.kptlist): + for ip, p in enumerate(gw.orbs_frz): + for iq, q in enumerate(gw.orbs_frz): + sigma[s, k, p, q] = gw.acobj[s, ik, ip, iq].ac_eval(omega + 1j * eta) + sigma[s, k, p, q] += gw.vk[s, k, p, q] - gw.vxc[s, k, p, q] + else: + for s in range(2): + for k, kn in enumerate(gw.kptlist): + for ip, p in enumerate(gw.orbs_frz): + sigma[s, k, p, p] = gw.acobj[s, ik, ip].ac_eval(omega + 1j * eta) + sigma[s, kn, p, p] += gw.vk[s, kn, p, p] - gw.vxc[s, kn, p, p] + + gf0 = get_g0_k(omega, gw._scf.mo_energy, eta) + gf = np.zeros_like(gf0) + for s in range(2): + for k in range(gw.nkpts): + for iw in range(nomega): + gf[s, k, :, :, iw] = np.linalg.inv(np.linalg.inv(gf0[s, k, :, :, iw]) - sigma[s, k, :, :, iw]) + + return gf, gf0, sigma + + +def make_rdm1_linear(gw, ao_repr=False): + """Get GW density matrix from Green's function G(it=0). + G is from linear Dyson equation, which conserves particle number + G = G0 + G0 Sigma G0 + See equation 16 in 10.1021/acs.jctc.0c01264 + + Parameters + ---------- + gw : KUGWAC + GW object, provides attributes: sigmaI, mol, _scf, freqs, wts, frozen, orbs, fc + ao_repr : bool, optional + return density matrix in AO, by default False + + Returns + ------- + rdm1 : double ndarray + density matrix + """ + assert gw.sigmaI is not None + assert gw.rdm is True and gw.fullsigma is True + assert gw.frozen is None or gw.frozen == 0 + sigmaI = gw.sigmaI[:, :, :, 1:] + freqs = 1j * gw.freqs + wts = gw.wts + nmo = gw.nmo + nkpts = gw.nkpts + if len(gw.orbs) != nmo: + sigma = np.zeros(shape=[nkpts, nmo, nmo, len(freqs)], dtype=sigmaI.dtype) + for s in range(2): + for k in range(nkpts): + for ia, a in enumerate(gw.orbs): + for ib, b in enumerate(gw.orbs): + sigma[s, k, a, b, :] = sigmaI[s, k, ia, ib, :] + else: + sigma = sigmaI - qij_a = np.zeros((nkpts,nocca,nvira),dtype=np.complex128) - qij_b = np.zeros((nkpts,noccb,nvirb),dtype=np.complex128) - for i, kpti in enumerate(kpts): - ao_p = dft.numint.eval_ao(cell, coords, kpt=kpti, deriv=1) - ao = ao_p[0] - ao_grad = ao_p[1:4] - if uniform_grids: - ao_ao_grad = einsum('mg,xgn->xmn',ao.T.conj(),ao_grad) * cell.vol / ngrid - else: - ao_ao_grad = einsum('g,mg,xgn->xmn',weights,ao.T.conj(),ao_grad) - q_ao_ao_grad = -1j * einsum('x,xmn->mn',q,ao_ao_grad) - q_mo_mo_grad_a = np.dot(np.dot(mo_coeff[0,i][:,:nocca].T.conj(), q_ao_ao_grad), mo_coeff[0,i][:,nocca:]) - q_mo_mo_grad_b = np.dot(np.dot(mo_coeff[1,i][:,:noccb].T.conj(), q_ao_ao_grad), mo_coeff[1,i][:,noccb:]) - enm_a = 1./(mo_energy[0,i][nocca:,None] - mo_energy[0,i][None,:nocca]) - enm_b = 1./(mo_energy[1,i][noccb:,None] - mo_energy[1,i][None,:noccb]) - dens_a = enm_a.T * q_mo_mo_grad_a - dens_b = enm_b.T * q_mo_mo_grad_b - qij_a[i] = dens_a / np.sqrt(cell.vol) - qij_b[i] = dens_b / np.sqrt(cell.vol) + for iw in range(len(freqs)): + sigma[:, :, :, iw] += gw.vk - gw.vxc + gf0 = get_g0_k(freqs, np.array(gw._scf.mo_energy) - gw.ef, eta=0) + gf = np.array(gf0, copy=True) + for s in range(2): + for k in range(nkpts): + for iw in range(len(freqs)): + gf[s, k, :, :, iw] = gf0[s, k, :, :, iw] @ sigma[s, k, :, :, iw] @ gf0[s, k, :, :, iw] - return (qij_a, qij_b) + # GW density matrix + rdm1 = np.zeros(shape=[2, nkpts, nmo, nmo], dtype=np.double) + for s in range(2): + for k in range(nkpts): + rdm1[s, k] = (1.0 / np.pi) * einsum('ijw, w -> ij', gf[s, k], wts).real + np.eye(nmo) * 0.5 + channel = "spin-up" if s == 0 else "spin-down" + logger.info(gw, 'GW particle number %s @ k%d = %s', channel, k, np.trace(rdm1[s, k])) -def _get_scaled_legendre_roots(nw): - """ - Scale nw Legendre roots, which lie in the - interval [-1, 1], so that they lie in [0, inf) - Ref: www.cond-mat.de/events/correl19/manuscripts/ren.pdf + # Symmetrize density matrix + for s in range(2): + for k in range(nkpts): + rdm1[s, k] = 0.5 * (rdm1[s, k] + rdm1[s, k].T) - Returns: - freqs : 1D ndarray - wts : 1D ndarray - """ - freqs, wts = np.polynomial.legendre.leggauss(nw) - x0 = 0.5 - freqs_new = x0*(1.+freqs)/(1.-freqs) - wts = wts*2.*x0/(1.-freqs)**2 - return freqs_new, wts + if ao_repr is True: + ovlp = gw._scf.get_ovlp() + for s in range(2): + for k in range(nkpts): + CS = np.matmul(ovlp, gw._scf.mo_coeff[s, k]) + rdm1[s, k] = reduce(np.matmul, (CS, rdm1[s, k], CS.conj().T)) + + return rdm1 -def _get_clenshaw_curtis_roots(nw): - """ - Clenshaw-Curtis quadrature on [0,inf) - Ref: J. Chem. Phys. 132, 234114 (2010) - Returns: - freqs : 1D ndarray - wts : 1D ndarray - """ - freqs = np.zeros(nw) - wts = np.zeros(nw) - a = 0.2 - for w in range(nw): - t = (w+1.0)/nw * np.pi/2. - freqs[w] = a / np.tan(t) - if w != nw-1: - wts[w] = a*np.pi/2./nw/(np.sin(t)**2) - else: - wts[w] = a*np.pi/4./nw/(np.sin(t)**2) - return freqs[::-1], wts[::-1] -def two_pole_fit(coeff, omega, sigma): - cf = coeff[:5] + 1j*coeff[5:] - f = cf[0] + cf[1]/(omega+cf[3]) + cf[2]/(omega+cf[4]) - sigma - f[0] = f[0]/0.01 - return np.array([f.real,f.imag]).reshape(-1) +def _mo_energy_frozen(gw, mo_energy): + """Get non-frozen orbital energy. + Assume nmoa = nmob. -def two_pole(freqs, coeff): - cf = coeff[:5] + 1j*coeff[5:] - return cf[0] + cf[1]/(freqs+cf[3]) + cf[2]/(freqs+cf[4]) + Parameters + ---------- + gw : KUGWAC + GW object, provides attributes: frozen, nmo, nkpt + mo_energy : double ndarray + full orbital energy -def AC_twopole_diag(sigma, omega, orbs, nocc): + Returns + ------- + mo_energy_frozen : double ndarray + non-frozen orbital energy """ - Analytic continuation to real axis using a two-pole model - Returns: - coeff: 2D array (ncoeff, norbs) + frozen_mask = get_frozen_mask(gw) + nmoa, _ = gw.nmo + nkpts = gw.nkpts + mo_energy_frozen = np.zeros(shape=[2, nkpts, nmoa], dtype=np.double) + for s in range(2): + for k in range(nkpts): + mo_energy_frozen[s, k] = mo_energy[s][k][frozen_mask[s][k]] + return mo_energy_frozen + + +def _mo_frozen(gw, mo): + """Get non-frozen orbital coefficient. + Assume nmoa = nmob. + + Parameters + ---------- + gw : KUGWAC + GW object, provides attributes: frozen, nmo, nkpt + mo : complex ndarray + full orbital coefficient + + Returns + ------- + mo_frozen : complex ndarray + non-frozen orbital coefficient """ - norbs, nw = sigma.shape - coeff = np.zeros((10,norbs)) - for p in range(norbs): - if orbs[p] < nocc: - x0 = np.array([0, 1, 1, 1, -1, 0, 0, 0, -1.0, -0.5]) - else: - x0 = np.array([0, 1, 1, 1, -1, 0, 0, 0, 1.0, 0.5]) - #TODO: analytic gradient - xopt = least_squares(two_pole_fit, x0, jac='3-point', method='trf', xtol=1e-10, - gtol = 1e-10, max_nfev=2000, verbose=0, args=(omega[p], sigma[p])) - if xopt.success is False: - print('WARN: 2P-Fit Orb %d not converged, cost function %e'%(p,xopt.cost)) - coeff[:,p] = xopt.x.copy() - return coeff - -def thiele(fn,zn): - nfit = len(zn) - g = np.zeros((nfit,nfit),dtype=np.complex128) - g[:,0] = fn.copy() - for i in range(1,nfit): - g[i:,i] = (g[i-1,i-1]-g[i:,i-1])/((zn[i:]-zn[i-1])*g[i:,i-1]) - a = g.diagonal() - return a - -def pade_thiele(freqs,zn,coeff): - nfit = len(coeff) - X = coeff[-1]*(freqs-zn[-2]) - for i in range(nfit-1): - idx = nfit-i-1 - X = coeff[idx]*(freqs-zn[idx-1])/(1.+X) - X = coeff[0]/(1.+X) - return X - -def AC_pade_thiele_diag(sigma, omega): + frozen_mask = get_frozen_mask(gw) + nmoa, _ = gw.nmo + nkpts = gw.nkpts + nao = mo[0][0].shape[0] + mo_frozen = np.zeros(shape=[2, nkpts, nao, nmoa], dtype=np.complex128) + for s in range(2): + for k in range(nkpts): + mo_frozen[s, k] = mo[s][k][:, frozen_mask[s][k]] + return mo_frozen + + +def _mo_occ_frozen(gw, mo_occ): + """Get non-frozen occupation number. + Assume nmoa = nmob. + + Parameters + ---------- + gw : KUGWAC + GW object, provides attributes: frozen, nmo, nkpt + mo_occ : complex ndarray + full occupation number + + Returns + ------- + mo_occ_frozen : double ndarray + non-frozen occupation number """ - Analytic continuation to real axis using a Pade approximation - from Thiele's reciprocal difference method - Reference: J. Low Temp. Phys. 29, 179 (1977) - Returns: - coeff: 2D array (ncoeff, norbs) - omega: 2D array (norbs, npade) + frozen_mask = get_frozen_mask(gw) + nmoa, _ = gw.nmo + nkpts = gw.nkpts + mo_occ_frozen = np.zeros(shape=[2, nkpts, nmoa], dtype=np.complex128) + for s in range(2): + for k in range(nkpts): + mo_occ_frozen[s, k] = mo_occ[s][k][frozen_mask[s][k]] + return mo_occ_frozen + + +def set_frozen_orbs(gw): + """Set .frozen attribute from frozen mask. + + Parameters + ---------- + gw : KUGWAC + unrestricted GW object """ - idx = range(1,40,6) - sigma1 = sigma[:,idx].copy() - sigma2 = sigma[:,(idx[-1]+4)::4].copy() - sigma = np.hstack((sigma1,sigma2)) - omega1 = omega[:,idx].copy() - omega2 = omega[:,(idx[-1]+4)::4].copy() - omega = np.hstack((omega1,omega2)) - norbs, nw = sigma.shape - npade = nw // 2 - coeff = np.zeros((npade*2,norbs),dtype=np.complex128) - for p in range(norbs): - coeff[:,p] = thiele(sigma[p,:npade*2], omega[p,:npade*2]) - - return coeff, omega[:,:npade*2] - - -class KUGWAC(lib.StreamObject): - - linearized = getattr(__config__, 'gw_gw_GW_linearized', False) - # Analytic continuation: pade or twopole - ac = getattr(__config__, 'gw_gw_GW_ac', 'pade') - # Whether applying finite size corrections - fc = getattr(__config__, 'gw_gw_GW_fc', True) - - _keys = { - 'linearized', 'ac', 'fc', 'frozen', 'mol', 'with_df', - 'kpts', 'nkpts', 'mo_energy', 'mo_coeff', 'mo_occ', 'sigma', - } - - def __init__(self, mf, frozen=None): - self.mol = mf.mol - self._scf = mf - self.verbose = self.mol.verbose - self.stdout = self.mol.stdout - self.max_memory = mf.max_memory - - #TODO: implement frozen orbs - if frozen is not None and frozen > 0: - raise NotImplementedError - self.frozen = frozen - - # DF-KGW must use GDF integrals - if getattr(mf, 'with_df', None): - self.with_df = mf.with_df + assert gw.nmo[0] == gw.nmo[1], "current implementation requires nmoa = nmob." + + if gw.frozen is not None: + if gw.orbs is not None: + if isinstance(gw.frozen, (int, np.int64)): + # frozen core + gw.orbs_frz = [x - gw.frozen for x in gw.orbs] + else: + # frozen list + assert isinstance(gw.frozen[0][0], (int, np.int64)) + assert gw.frozen[0] == gw.frozen[1] + gw.orbs_frz = [] + for orbi in gw.orbs: + count = len([p for p in gw.frozen[0] if p <= orbi]) + gw.orbs_frz.append(orbi - count) + if any(np.array(gw.orbs_frz) < 0): + raise RuntimeError('GW orbs must be larger than frozen core!') else: - raise NotImplementedError - -################################################## -# don't modify the following attributes, they are not input options - self._nocc = None - self._nmo = None - self.kpts = mf.kpts - self.nkpts = len(self.kpts) - # self.mo_energy: GW quasiparticle energy, not scf mo_energy - self.mo_energy = None - self.mo_coeff = mf.mo_coeff - self.mo_occ = mf.mo_occ - self.sigma = None - - def dump_flags(self): + gw.orbs_frz = range(gw.nmo[0]) + gw.orbs = range(len(gw._scf.mo_energy[0][0])) + if isinstance(gw.frozen, (int, np.int64)): + gw.orbs = list(set(gw.orbs) - set(range(gw.frozen))) + else: + assert isinstance(gw.frozen[0][0], (int, np.int64)) + assert gw.frozen[0] == gw.frozen[1] + gw.orbs = list(set(gw.orbs) - set(gw.frozen[0])) + else: + if gw.orbs is None: + gw.orbs = range(len(gw._scf.mo_energy[0][0])) + gw.orbs_frz = gw.orbs + return + + +class KUGWAC(KRGWAC): + def dump_flags(self, verbose=None): log = logger.Logger(self.stdout, self.verbose) log.info('') log.info('******** %s ********', self.__class__) @@ -647,134 +1019,111 @@ def dump_flags(self): nmoa, nmob = self.nmo nvira = nmoa - nocca nvirb = nmob - noccb - nkpts = self.nkpts - log.info('GW (nocca, noccb) = (%d, %d), (nvira, nvirb) = (%d, %d), nkpts = %d', - nocca, noccb, nvira, nvirb, nkpts) + log.info('GW (nocca, noccb) = (%d, %d), (nvira, nvirb) = (%d, %d)', nocca, noccb, nvira, nvirb) + log.info('nkpt = %d', self.nkpts) if self.frozen is not None: - log.info('frozen orbitals %s', str(self.frozen)) - logger.info(self, 'use perturbative linearized QP eqn = %s', self.linearized) - logger.info(self, 'analytic continuation method = %s', self.ac) - logger.info(self, 'GW finite size corrections = %s', self.fc) - return self + log.info('frozen orbitals = %s', str(self.frozen)) + if self.kptlist is not None: + log.info('k-point list = %s', str(self.kptlist)) + if self.orbs is not None: + log.info('orbital list = %s', str(self.orbs)) + log.info('off-diagonal self-energy = %s', self.fullsigma) + log.info('GW density matrix = %s', self.rdm) + log.info('density-fitting for exchange = %s', self.vhf_df) + log.info('finite size corrections = %s', self.fc) + if self.fc_grid is not None: + log.info('grids for finite size corrections = %s', self.fc_grid) + log.info('broadening parameter = %.3e', self.eta) + log.info('number of grids = %d', self.nw) + log.info('analytic continuation method = %s', self.ac) + log.info('imaginary frequency cutoff = %s', str(self.ac_iw_cutoff)) + if self.ac == 'pade': + log.info('Pade points = %d', self.ac_pade_npts) + log.info('Pade step ratio = %.3f', self.ac_pade_step_ratio) + log.info('use perturbative linearized QP eqn = %s', self.qpe_linearized) + if self.qpe_linearized is True: + log.info('linearized factor range = %s', self.qpe_linearized_range) + else: + log.info('QPE max iter = %d', self.qpe_max_iter) + log.info('QPE tolerance = %.1e', self.qpe_tol) + log.info('') + return @property def nocc(self): - return self.get_nocc() + frozen_mask = get_frozen_mask(self) + nkpts = len(self._scf.mo_energy[0]) + neleca = 0.0 + nelecb = 0.0 + for k in range(nkpts): + neleca += np.sum(self._scf.mo_occ[0][k][frozen_mask[0][k]]) + nelecb += np.sum(self._scf.mo_occ[1][k][frozen_mask[1][k]]) + neleca = int(neleca / nkpts) + nelecb = int(nelecb / nkpts) + return (neleca, nelecb) + @nocc.setter def nocc(self, n): self._nocc = n @property def nmo(self): - return self.get_nmo() + frozen_mask = get_frozen_mask(self) + nmoa = len(self._scf.mo_energy[0][0][frozen_mask[0][0]]) + nmob = len(self._scf.mo_energy[1][0][frozen_mask[1][0]]) + return (nmoa, nmob) + @nmo.setter def nmo(self, n): self._nmo = n - get_nocc = get_nocc - get_nmo = get_nmo - get_frozen_mask = get_frozen_mask + def kernel(self, orbs=None, kptlist=None): + """Run a G0W0 calculation. - def kernel(self, mo_energy=None, mo_coeff=None, orbs=None, kptlist=None, nw=100): - """ - Input: - kptlist: self-energy k-points - orbs: self-energy orbs - nw: grid number - Output: - mo_energy: GW quasiparticle energy + Parameters + ---------- + orbs : list, optional + orbital list to calculate self-energy, by default None + kptlist : list, optional + k-point list to calculate self-energy, by default None """ - if mo_coeff is None: - mo_coeff = np.array(self._scf.mo_coeff) - if mo_energy is None: - mo_energy = np.array(self._scf.mo_energy) + if self.mo_energy is None: + self.mo_energy = np.array(self._scf.mo_energy, copy=True) + if self.mo_coeff is None: + self.mo_coeff = np.array(self._scf.mo_coeff, copy=True) + if self.mo_occ is None: + self.mo_occ = np.array(self._scf.mo_occ, copy=True) + + if isinstance(self.frozen, list) and (not isinstance(self.frozen[0], list)): + # make sure self.frozen is a list of lists if not frozen core + self.frozen = [self.frozen, self.frozen] + else: + assert self.frozen is None or isinstance(self.frozen, (int, np.int64)) - nmoa, nmob = self.nmo + self.orbs = orbs + self.kptlist = kptlist + + if hasattr(self._scf, "sigma"): + self.nw = max(400, self.nw) + self.ac_pade_npts = 18 + self.ac_pade_step_ratio = 5.0 / 6.0 + self.fc = False + + nmoa, _ = self.nmo naux = self.with_df.get_naoaux() nkpts = self.nkpts - mem_incore = (3*nkpts*nmoa**2*naux) * 16/1e6 + mem_incore = (3 * nkpts * nmoa**2 * naux) * 16 / 1e6 mem_now = lib.current_memory()[0] - if (mem_incore + mem_now > 0.99*self.max_memory): + if mem_incore + mem_now > 0.99 * self.max_memory: logger.warn(self, 'Memory may not be enough!') - raise NotImplementedError - cput0 = (logger.process_clock(), logger.perf_counter()) + cput0 = (time.process_time(), time.perf_counter()) self.dump_flags() - self.converged, self.mo_energy, self.mo_coeff = \ - kernel(self, mo_energy, mo_coeff, orbs=orbs, - kptlist=kptlist, nw=nw, verbose=self.verbose) - + kernel(self) logger.warn(self, 'GW QP energies may not be sorted from min to max') logger.timer(self, 'GW', *cput0) - return self.mo_energy - -if __name__ == '__main__': - from pyscf.pbc import gto - from pyscf.pbc.lib import chkfile - import os - cell = gto.Cell() - cell.build( - unit = 'B', - a = [[ 0., 6.74027466, 6.74027466], - [ 6.74027466, 0., 6.74027466], - [ 6.74027466, 6.74027466, 0. ]], - atom = '''H 0 0 0 - H 1.68506866 1.68506866 1.68506866 - H 3.37013733 3.37013733 3.37013733''', - basis = 'gth-dzvp', - pseudo = 'gth-pade', - verbose = 5, - charge = 0, - spin = 1) - - cell.spin = cell.spin * 3 - kpts = cell.make_kpts([3,1,1],scaled_center=[0,0,0]) - gdf = df.GDF(cell, kpts) - gdf_fname = 'h3_ints_311.h5' - gdf._cderi_to_save = gdf_fname - if not os.path.isfile(gdf_fname): - gdf.build() - - chkfname = 'h_311.chk' - if os.path.isfile(chkfname): - kmf = scf.KUHF(cell, kpts, exxdiv=None) - kmf.with_df = gdf - kmf.with_df._cderi = gdf_fname - data = chkfile.load(chkfname, 'scf') - kmf.__dict__.update(data) - else: - kmf = scf.KUHF(cell, kpts, exxdiv=None) - kmf.with_df = gdf - kmf.with_df._cderi = gdf_fname - kmf.conv_tol = 1e-12 - kmf.chkfile = chkfname - kmf.kernel() - - gw = KUGWAC(kmf) - gw.linearized = False - gw.ac = 'pade' - gw.fc = False - nocca, noccb = gw.nocc - gw.kernel(kptlist=[0,1,2],orbs=range(0,nocca+3)) - print(gw.mo_energy) - assert ((abs(gw.mo_energy[0][0][nocca-1]--0.28012813))<1e-5) - assert ((abs(gw.mo_energy[0][0][nocca]-0.13748876))<1e-5) - assert ((abs(gw.mo_energy[0][1][nocca-1]--0.29515851))<1e-5) - assert ((abs(gw.mo_energy[0][1][nocca]-0.14128011))<1e-5) - assert ((abs(gw.mo_energy[1][0][noccb-1]--0.33991721))<1e-5) - assert ((abs(gw.mo_energy[1][0][noccb]-0.10578847))<1e-5) - assert ((abs(gw.mo_energy[1][1][noccb-1]--0.33547973))<1e-5) - assert ((abs(gw.mo_energy[1][1][noccb]-0.08053408))<1e-5) - - gw.fc = True - nocca, noccb = gw.nocc - gw.kernel(kptlist=[0,1,2],orbs=range(0,nocca+3)) - print(gw.mo_energy) - assert ((abs(gw.mo_energy[0][0][nocca-1]--0.40244058))<1e-5) - assert ((abs(gw.mo_energy[0][0][nocca]-0.13618348))<1e-5) - assert ((abs(gw.mo_energy[0][1][nocca-1]--0.41743063))<1e-5) - assert ((abs(gw.mo_energy[0][1][nocca]-0.13997427))<1e-5) - assert ((abs(gw.mo_energy[1][0][noccb-1]--0.46133481))<1e-5) - assert ((abs(gw.mo_energy[1][0][noccb]-0.1044926))<1e-5) - assert ((abs(gw.mo_energy[1][1][noccb-1]--0.4568894))<1e-5) - assert ((abs(gw.mo_energy[1][1][noccb]-0.07922511))<1e-5) + return + + set_frozen_orbs = set_frozen_orbs + make_rdm1 = make_rdm1_linear + make_gf = make_gf diff --git a/pyscf/pbc/gw/kurpa.py b/pyscf/pbc/gw/kurpa.py new file mode 100644 index 0000000000..01b7a437c8 --- /dev/null +++ b/pyscf/pbc/gw/kurpa.py @@ -0,0 +1,811 @@ +#!/usr/bin/env python +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Author: Tianyu Zhu +# Author: Christopher Hillenbrand +# Author: Chaoqun Zhang +# Author: Jincheng Yu +# Author: Jiachen Li +# + +""" +Periodic spin-unrestricted random phase approximation (direct RPA) with N^4 scaling. + +References: + T. Zhu and G.K.-L. Chan, J. Chem. Theory. Comput. 17, 727-741 (2021) + New J. Phys. 14, 053020 (2012) +""" + +import time +import numpy as np +import scipy.linalg.blas as blas + +from pyscf import lib +from pyscf.lib import logger, temporary_env +from pyscf.ao2mo import _ao2mo +from pyscf.ao2mo.incore import _conc_mos +from pyscf.pbc import scf, tools +from pyscf.pbc.cc.kccsd_uhf import get_nocc, get_nmo, get_frozen_mask + +from pyscf.gw.utils.ac_grid import _get_scaled_legendre_roots +from pyscf.pbc.gw.kugw_ac import get_rho_response, get_rho_response_metal, get_rho_response_head, \ + get_rho_response_wing, get_qij +from pyscf.pbc.gw.krpa import KRPA, rho_accum_inner, rho_wing_accum_inner, get_rpa_ecorr_w, get_kconserv_ria_efficient + + +def kernel(rpa, mo_energy, mo_coeff, nw=None, with_e_hf=None): + """RPA correlation and total energy + + Parameters + ---------- + rpa : KURPA + rpa object + mo_energy : double array + molecular orbital energies + mo_coeff : double ndarray + molecular orbital coefficients + nw : int, optional + number of frequency point on imaginary axis, by default None + with_e_hf : float, optional + extra input HF energy, by default None + + Returns + ------- + e_tot : float + RPA total energy + e_hf : float + HF energy (exact exchange for given mo_coeff) + e_corr : float + RPA correlation energy + """ + assert rpa.frozen == 0 or rpa.frozen is None + + # Compute HF exchange energy (EXX) + if with_e_hf is None: + uhf = scf.KUHF(rpa.mol, rpa.kpts, exxdiv=rpa._scf.exxdiv) + uhf.verbose = 0 + if hasattr(rpa._scf, 'sigma'): + uhf = scf.addons.smearing_(uhf, sigma=rpa._scf.sigma, method=rpa._scf.smearing_method) + uhf.with_df = rpa._scf.with_df + with temporary_env(rpa.with_df, verbose=0), temporary_env(rpa.mol, verbose=0): + dm = rpa._scf.make_rdm1() + vj = uhf.get_j(uhf.cell, dm) + vj_tot = vj[0] + vj[1] + e_1e = 1.0 / len(rpa.kpts) * lib.einsum('kij,kji', dm[0] + dm[1], uhf.get_hcore()).real + e_j = 0.5 / len(rpa.kpts) * lib.einsum('kij,kji', dm[0] + dm[1], vj_tot).real + e_x = get_rpa_exx(rpa, acfd=rpa.acfd_exx, correction_only=False) + e_nuc = rpa._scf.energy_nuc() + e_hf = e_1e + e_j + e_x + e_nuc + else: + e_hf = with_e_hf + logger.debug(rpa, f' Setting EXX energy explicitly to {e_hf}') + + is_metal = hasattr(rpa._scf, 'sigma') + + # Turn off FC for metals + if is_metal and rpa.fc: + logger.warn(rpa, 'FC not available for metals - setting rpa.fc to False') + rpa.fc = False + + # Grids for integration on imaginary axis + freqs, wts = rpa.get_grids(nw=nw, mo_energy=mo_energy) + + # Compute RPA correlation energy + if rpa.outcore: + if is_metal: + e_corr = get_rpa_ecorr_outcore_metal(rpa, freqs, wts) + else: + e_corr = get_rpa_ecorr_outcore(rpa, freqs, wts) + else: + e_corr = get_rpa_ecorr(rpa, freqs, wts) + + # Compute total energy + e_tot = e_hf + e_corr + + logger.debug(rpa, ' RPA total energy = %s', e_tot) + logger.debug(rpa, ' EXX energy = %s, RPA corr energy = %s', e_hf, e_corr) + + return e_tot, e_hf, e_corr + + +def get_idx_metal(mo_occ, threshold=1.0e-6): + """Get index of occupied/virtual/fractional orbitals of metals. + + Parameters + ---------- + mo_occ : double 1d array + occupation number + threshold : double, optional + threshold to determine fractionally occupied orbitals, by default 1.0e-6 + + Returns + ------- + idx_occ : list + list of occupied orbital indexes + idx_frac : list + list of fractionally occupied orbital indexes + idx_vir : list + list of virtual orbital indexes + """ + idx_occ = np.where(mo_occ > 1.0 - threshold)[0] + idx_vir = np.where(mo_occ < threshold)[0] + idx_frac = list(range(idx_occ[-1] + 1, idx_vir[0])) + + return idx_occ, idx_frac, idx_vir + + +def get_rpa_ecorr(rpa, freqs, wts): + """Compute RPA correlation energy. + + Parameters + ---------- + rpa : KURPA + rpa object + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + + Returns + ------- + e_corr : double + correlation energy + """ + mo_energy = np.array(rpa._scf.mo_energy) + mo_coeff = np.array(rpa._scf.mo_coeff) + nmoa, nmob = rpa.nmo + nkpts = rpa.nkpts + kpts = rpa.kpts + nw = len(freqs) + mydf = rpa.with_df + mo_occ = rpa.mo_occ + + # possible kpts shift + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + is_metal = hasattr(rpa._scf, 'sigma') + + if rpa.fc: + qij_a, qij_b, q_abs, nq_pts = rpa.get_q_mesh(mo_energy, mo_coeff) + + e_corr = 0j + + # Precompute k-conservation table + # Given k-point indices (kL, i), kconserv_table[kshift,i] contains + # the index j that satisfies momentum conservation, + # (k(i) - k(j) - k(kL)) \dot a = 2n\pi + # i.e. + # - ki + kj + kL = G + kconserv_table = get_kconserv_ria_efficient(rpa.mol, kpts) + cderiarr = mydf.cderi_array() + + for kL in range(nkpts): + # Lij: (2, ki, L, i, j) for looping every kL + # Lij = np.zeros((2,nkpts,naux,nmoa,nmoa),dtype=np.complex128) + Lij = [] + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + kidx = np.zeros((nkpts), dtype=np.int64) + kidx_r = np.zeros((nkpts), dtype=np.int64) + for i, kpti in enumerate(kpts): + j = kconserv_table[kL, i] + kptj = kpts[j] + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + assert np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 # kidx[i] = j + kidx[i] = j + kidx_r[j] = i + logger.debug(rpa, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s)' % (kL + 1, nkpts, i, j)) + Lij_out_a = None + Lij_out_b = None + # Read (L|pq) and ao2mo transform to (L|ij) + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nmoa * (nmoa + 1)) // 2: + Lpq = lib.unpack_tril(Lpq).reshape(-1, nmoa**2) + else: + Lpq = Lpq.reshape(-1, nmoa**2) + Lpq = Lpq.astype(np.complex128) + moija, ijslicea = _conc_mos(mo_coeff[0, i], mo_coeff[0, j])[2:] + moijb, ijsliceb = _conc_mos(mo_coeff[1, i], mo_coeff[1, j])[2:] + Lij_out_a = _ao2mo.r_e2(Lpq, moija, ijslicea, tao=[], ao_loc=None, out=Lij_out_a) + Lij_out_b = _ao2mo.r_e2(Lpq, moijb, ijsliceb, tao=[], ao_loc=None, out=Lij_out_b) + Lij.append(np.asarray((Lij_out_a.reshape(-1, nmoa, nmoa), Lij_out_b.reshape(-1, nmob, nmob)))) + + Lij = np.asarray(Lij) + naux = Lij.shape[2] + if is_metal is False: + Lia = [ + np.ascontiguousarray(Lij[:, 0, :, : rpa.nocc[0], rpa.nocc[0] :]), + np.ascontiguousarray(Lij[:, 1, :, : rpa.nocc[1], rpa.nocc[1] :]), + ] + + for w in range(nw): + # body polarizability + if is_metal: + Pi = get_rho_response_metal(freqs[w], mo_energy, mo_occ, Lij, kidx) + else: + Pi = get_rho_response(freqs[w], rpa.nocc, mo_energy, Lia, kidx) + if kL == 0 and rpa.fc: + for iq in range(nq_pts): + # head Pi_00 + Pi_00 = get_rho_response_head(freqs[w], mo_energy, (qij_a[iq], qij_b[iq])) + Pi_00 = 4.0 * np.pi / np.linalg.norm(q_abs[iq]) ** 2 * Pi_00 + # wings Pi_P0 + Pi_P0 = get_rho_response_wing(freqs[w], mo_energy, Lia, (qij_a[iq], qij_b[iq])) + Pi_P0 = np.sqrt(4.0 * np.pi) / np.linalg.norm(q_abs[iq]) * Pi_P0 + + # assemble Pi + Pi_fc = np.zeros((naux + 1, naux + 1), dtype=Pi.dtype) + Pi_fc[0, 0] = Pi_00 + Pi_fc[0, 1:] = Pi_P0.conj() + Pi_fc[1:, 0] = Pi_P0 + Pi_fc[1:, 1:] = Pi + + # First, compute ec_w = Tr(Pi) + |log(det(I-Pi))| + ec_w = np.trace(Pi_fc) + # The following two lines are equivalent to + # Pi = np.eye(naux) - Pi + blas.zdscal(-1.0, Pi_fc.ravel(), overwrite_x=1) + np.fill_diagonal(Pi_fc, np.diagonal(Pi_fc) + 1.0) + ec_w += np.linalg.slogdet((Pi_fc))[1] + e_corr += 1.0 / (2.0 * np.pi) * 1.0 / nkpts * 1.0 / nq_pts * ec_w * wts[w] + else: + # First, compute ec_w = Tr(Pi) + |log(det(I-Pi))| + ec_w = np.trace(Pi) + # The following two lines are equivalent to + # Pi = np.eye(naux) - Pi + blas.zdscal(-1.0, Pi.ravel(), overwrite_x=1) + np.fill_diagonal(Pi, np.diagonal(Pi) + 1.0) + ec_w += np.linalg.slogdet((Pi))[1] + e_corr += 1.0 / (2.0 * np.pi) * 1.0 / nkpts * ec_w * wts[w] + + return e_corr.real + + +def get_rpa_ecorr_outcore(rpa, freqs, wts): + """Low-memory routine to compute RPA correlation energy. + + Parameters + ---------- + rpa : KURPA + rpa object + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + + Returns + ------- + e_corr : double + correlation energy + """ + mo_energy = np.array(rpa._scf.mo_energy) + mo_coeff = np.array(rpa._scf.mo_coeff) + nmoa = rpa.nmo[0] + nkpts = rpa.nkpts + kpts = rpa.kpts + nw = len(freqs) + mydf = rpa.with_df + + # possible kpts shift + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + if rpa.fc: + qij_a, qij_b, q_abs, nq_pts = rpa.get_q_mesh(mo_energy, mo_coeff) + + e_corr = 0j + + # Precompute k-conservation table + # Given k-point indices (kL, i), kconserv_table[kshift,i] contains + # the index j that satisfies momentum conservation, + # (k(i) - k(j) - k(kL)) \dot a = 2n\pi + # i.e. + # - ki + kj + kL = G + kconserv_table = get_kconserv_ria_efficient(rpa.mol, kpts) + cderiarr = mydf.cderi_array() + + for kL in range(nkpts): + Pi = None + Pi_P0 = None + kidx = np.zeros((nkpts), dtype=np.int64) + kidx_r = np.zeros((nkpts), dtype=np.int64) + for s in range(2): + nseg = rpa.nocc[s] // rpa.segsize + 1 + for iseg in range(nseg): + orb_start = iseg * rpa.segsize + orb_end = min((iseg + 1) * rpa.segsize, rpa.nocc[s]) + if orb_end == orb_start: + continue + norb_this_iter = orb_end - orb_start + + for i, kpti in enumerate(kpts): + j = kconserv_table[kL, i] + kptj = kpts[j] + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + assert np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 # kidx[i] = j + kidx[i] = j + kidx_r[j] = i + logger.debug(rpa, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s)' % (kL + 1, nkpts, i, j)) + # Read (L|pq) and ao2mo transform to (L|ij) + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nmoa * (nmoa + 1)) // 2: + Lpq = lib.unpack_tril(Lpq).reshape(-1, nmoa**2) + else: + Lpq = Lpq.reshape(-1, nmoa**2) + Lpq = Lpq.astype(np.complex128) + naux = Lpq.shape[0] + moij, ijslice = _conc_mos(mo_coeff[s, i], mo_coeff[s, j])[2:] + ijslice = (orb_start, orb_end, rpa.nmo[s] + rpa.nocc[s], 2 * rpa.nmo[s]) + Lij_slice = _ao2mo.r_e2(Lpq, moij, ijslice, tao=[], ao_loc=None) + Lij_slice = Lij_slice.reshape(naux, norb_this_iter, rpa.nmo[s] - rpa.nocc[s]) + if Pi is None: + Pi = np.zeros((nw, naux, naux), dtype=np.complex128) + if kL == 0 and rpa.fc: + Pi_P0 = np.zeros((nq_pts, nw, naux), dtype=np.complex128) + eia = mo_energy[s, i][orb_start:orb_end, None] - mo_energy[s, j][None, rpa.nocc[s] :] + for w in range(nw): + rho_accum_inner(Pi[w], eia, freqs[w], Lij_slice, alpha=2.0 / nkpts) + if kL == 0 and rpa.fc: + for iq in range(nq_pts): + rho_wing_accum_inner( + Pi_P0[iq, w], + eia, + freqs[w], + Lij_slice, + (qij_a if s == 0 else qij_b)[iq, i, orb_start:orb_end], + alpha=2.0 / nkpts, + ) + + for w in range(nw): + if kL == 0 and rpa.fc: + for iq in range(nq_pts): + Pi_00 = get_rho_response_head(freqs[w], mo_energy, (qij_a[iq], qij_b[iq])) + Pi_00 = 4.0 * np.pi / np.linalg.norm(q_abs[iq]) ** 2 * Pi_00 + Pi_P0_iq = np.sqrt(4.0 * np.pi) / np.linalg.norm(q_abs[iq]) * Pi_P0[iq, w] + + Pi_fc = np.zeros((naux + 1, naux + 1), dtype=Pi.dtype) + Pi_fc[0, 0] = Pi_00 + Pi_fc[0, 1:] = Pi_P0_iq.conj() + Pi_fc[1:, 0] = Pi_P0_iq + Pi_fc[1:, 1:] = Pi[w] + + e_corr += get_rpa_ecorr_w(Pi_fc, wts[w] / nq_pts) + else: + e_corr += get_rpa_ecorr_w(Pi[w], wts[w]) + + e_corr = e_corr.real + e_corr *= 1.0 / (2.0 * np.pi) / nkpts + return e_corr + + +def get_rpa_ecorr_outcore_metal(rpa, freqs, wts): + """Low-memory routine to compute RPA correlation energy for metals. + + Parameters + ---------- + rpa : KURPA + rpa object + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + + Returns + ------- + e_corr : double + correlation energy + """ + mo_energy = np.array(rpa._scf.mo_energy) + mo_coeff = np.array(rpa._scf.mo_coeff) + nmoa = rpa.nmo[0] + nkpts = rpa.nkpts + kpts = rpa.kpts + nw = len(freqs) + mydf = rpa.with_df + mo_occ = np.array(rpa.mo_occ) + + # possible kpts shift + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + if rpa.fc: + qij_a, qij_b, q_abs, nq_pts = rpa.get_q_mesh(mo_energy, mo_coeff) + + e_corr = 0j + + # Precompute k-conservation table + # Given k-point indices (kL, i), kconserv_table[kshift,i] contains + # the index j that satisfies momentum conservation, + # (k(i) - k(j) - k(kL)) \dot a = 2n\pi + # i.e. + # - ki + kj + kL = G + kconserv_table = get_kconserv_ria_efficient(rpa.mol, kpts) + cderiarr = mydf.cderi_array() + + for kL in range(nkpts): + Pi = None + kidx = np.zeros((nkpts), dtype=np.int64) + kidx_r = np.zeros((nkpts), dtype=np.int64) + for s in range(2): + for i, kpti in enumerate(kpts): + j = kconserv_table[kL, i] + kptj = kpts[j] + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + assert np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 # kidx[i] = j + kidx[i] = j + kidx_r[j] = i + logger.debug(rpa, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s)' % (kL + 1, nkpts, i, j)) + # Read (L|pq) and ao2mo transform to (L|ij) + Lpq = cderiarr.load(kpti, kptj) + if Lpq.shape[-1] == (nmoa * (nmoa + 1)) // 2: + Lpq = lib.unpack_tril(Lpq).reshape(-1, nmoa**2) + else: + Lpq = Lpq.reshape(-1, nmoa**2) + Lpq = Lpq.astype(np.complex128) + naux = Lpq.shape[0] + + idx_occ_i, idx_frac_i, idx_vir_i = get_idx_metal(mo_occ[s, i]) + idx_occ_j, idx_frac_j, idx_vir_j = get_idx_metal(mo_occ[s, j]) + + nocc_i = len(idx_occ_i) + nfrac_i = len(idx_frac_i) + nocc_j = len(idx_occ_j) + nfrac_j = len(idx_frac_j) + nseg = (nocc_i + nfrac_i) // rpa.segsize + 1 + for iseg in range(nseg): + orb_start = iseg * rpa.segsize + orb_end = min((iseg + 1) * rpa.segsize, nocc_i + nfrac_i) + if orb_end == orb_start: + break + norb_this_iter = orb_end - orb_start + + moij, ijslice = _conc_mos(mo_coeff[s, i], mo_coeff[s, j])[2:] + + ijslice = (orb_start, orb_end, rpa.nmo[s] + nocc_j, 2 * rpa.nmo[s]) + Lij_slice = _ao2mo.r_e2(Lpq, moij, ijslice, tao=[], ao_loc=None) + Lij_slice = Lij_slice.reshape(naux, norb_this_iter, rpa.nmo[s] - nocc_j) + if Pi is None: + Pi = np.zeros((nw, naux, naux), dtype=np.complex128) + + # Find ka that conserves with ki and kL (-ki+ka+kL=G) + eia = mo_energy[s, i][orb_start:orb_end, None] - mo_energy[s, j][None, nocc_j:] + fia = mo_occ[s, i][orb_start:orb_end, None] - mo_occ[s, j][None, nocc_j:] + # The overall fia[nocc_i:, :nfrac_j] *= 0.5 for double counting + if orb_start >= nocc_i: + fia[:, :nfrac_j] *= 0.5 + elif orb_end > nocc_i: + offset = nocc_i - orb_start + fia[offset:, :nfrac_j] *= 0.5 + for w in range(nw): + rho_accum_inner(Pi[w], eia, freqs[w], Lij_slice, alpha=2.0 / nkpts, fia=fia) + + for w in range(nw): + e_corr += get_rpa_ecorr_w(Pi[w], wts[w]) + + e_corr = e_corr.real + e_corr *= 1.0 / (2.0 * np.pi) / nkpts + + return e_corr + + +def get_rpa_exx(rpa, acfd=False, correction_only=False): + """Calculate RPA exchange energy. + For gapped systems, Hartree-Fock and adiabatic connection fluctuation dissipation exchange energies are the same. + For metallic systems, they are different. + The ACFD exchange energy is given by equation 12 in doi.org/10.1103/PhysRevB.81.115126 + + Parameters + ---------- + rpa : KURPA + rpa object + acfd : bool, optional + calculate ACFD exchange energy, by default False + correction_only : bool, optional + only calculate the correction term, by default False + + Returns + ------- + ex : double + exchange energy + """ + mo_energy = np.asarray(rpa._scf.mo_energy) + mo_coeff = np.asarray(rpa._scf.mo_coeff) + mo_occ = np.asarray(rpa._scf.mo_occ) + + nocc = rpa.nocc + nspin, _, nao, _ = mo_coeff.shape + nkpts = rpa.nkpts + kpts = rpa.kpts + mydf = rpa.with_df + + # possible kpts shift center + kscaled = rpa.mol.get_scaled_kpts(kpts) + kscaled -= kscaled[0] + + ex = 0j + cderiarr = mydf.cderi_array() + for kL in range(nkpts): + # kidx: save kj that conserves with kL and ki (-ki+kj+kL=G) + # kidx_r: save ki that conserves with kL and kj (-ki+kj+kL=G) + kidx = np.zeros(shape=[nkpts], dtype=np.int64) + kidx_r = np.zeros(shape=[nkpts], dtype=np.int64) + for i in range(nkpts): + for j in range(nkpts): + # Find (ki,kj) that satisfies momentum conservation with kL + kconserv = -kscaled[i] + kscaled[j] + kscaled[kL] + is_kconserv = np.linalg.norm(np.round(kconserv) - kconserv) < 1e-12 + if is_kconserv: + kidx[i] = j + kidx_r[j] = i + + for kn in range(nkpts): + # Find km that conserves with kn and kL (-km+kn+kL=G) + km = kidx_r[kn] + + # logger.debug(gw, 'Read Lpq (kL: %s / %s, ki: %s, kj: %s @ Rank %d)' % (kL + 1, nkpts, i, j, rank)) + # Read (L|pq) and ao2mo transform to (L|ij) + # support unequal naux on different k points + Lpq_ao = cderiarr.load(kpts[km], kpts[kn]) + if Lpq_ao.shape[-1] == (nao * (nao + 1)) // 2: + Lpq_ao = lib.unpack_tril(Lpq_ao).reshape(-1, nao**2) + else: + Lpq_ao = Lpq_ao.reshape(-1, nao**2) + Lpq_ao = Lpq_ao.astype(np.complex128) + + for s in range(nspin): + Lij = None + if hasattr(rpa._scf, 'sigma'): + idx_occ_i, idx_frac_i, _ = get_idx_metal(mo_occ[s][km]) + idx_occ_j, idx_frac_j, _ = get_idx_metal(mo_occ[s][kn]) + nocc_i = len(idx_occ_i) + len(idx_frac_i) + nocc_j = len(idx_occ_j) + len(idx_frac_j) + moij, ijslice = _conc_mos(mo_coeff[s][km][:, :nocc_i], mo_coeff[s][kn][:, :nocc_j])[2:] + Lij = _ao2mo.r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lij) + Lij = Lij.reshape(-1, nocc_i, nocc_j) + + if acfd is True: + if correction_only is True: + mo_occ_ij = np.minimum(mo_occ[s][km][:nocc_i, None], mo_occ[s][kn][None, :nocc_j]) + mo_occ_ij -= mo_occ[s][km][:nocc_i, None] * mo_occ[s][kn][None, :nocc_j] + else: + # The following line is equivalent to the frequency integration in equation 12 in + # doi.org/10.1103/PhysRevB.81.115126 + # TODO: add a detailed note + eij = mo_energy[s][km][:nocc_i, None] - mo_energy[s][kn][None, :nocc_j] + integrand = np.zeros((nocc_i, nocc_j), dtype=np.complex128) + integrand[eij > 1e-6] = 1 + integrand[eij < -1e-6] = -1 + mo_occ_ij = 1.0 - integrand + mo_occ_ij = mo_occ_ij * mo_occ[s][km][:nocc_i, None] + else: + mo_occ_ij = mo_occ[s][km][:nocc_i, None] * mo_occ[s][kn][None, :nocc_j] + Lij_occ = Lij * mo_occ_ij[None] + # ex -= np.einsum('Lij,Lij->', Lij_occ.reshape(-1, nocc, nocc), Lij.reshape(-1, nocc, nocc).conj()) + ex -= blas.zdotc(Lij_occ.ravel(), Lij.ravel()) + else: + moij, ijslice = _conc_mos(mo_coeff[s][km][:, :nocc[s]], mo_coeff[s][kn][:, :nocc[s]])[2:] + Lij = _ao2mo.r_e2(Lpq_ao, moij, ijslice, tao=[], ao_loc=None, out=Lij) + # ex -= np.einsum('Lij,Lij->', Lij.reshape(-1, nocc, nocc), Lij.reshape(-1, nocc, nocc).conj()) + ex -= blas.zdotc(Lij.ravel(), Lij.ravel()) + + ex = ex.real + ex *= 0.5 / nkpts**2 + + if rpa._scf.exxdiv == 'ewald' and rpa._scf.cell.dimension != 0: + madelung = tools.pbc.madelung(rpa._scf.cell, kpts) + for s in range(nspin): + exxdiv_shift = 0.5 * madelung * np.sum(mo_occ[s]**2) / (nkpts) + ex -= exxdiv_shift + if acfd is True: + for k in range(nkpts): + idx_occ, idx_frac, _ = get_idx_metal(mo_occ[s][k]) + f_i = mo_occ[s][k][:(len(idx_occ) + len(idx_frac))] + ex -= 0.5 * madelung * np.sum(f_i - f_i * f_i) / nkpts + + return ex + + +class KURPA(KRPA): + def dump_flags(self, verbose=None): + log = logger.Logger(self.stdout, self.verbose) + log.info('') + log.info('******** %s ********', self.__class__) + log.info('method = %s', self.__class__.__name__) + nocca, noccb = self.nocc + nmoa, nmob = self.nmo + nvira = nmoa - nocca + nvirb = nmob - noccb + nkpts = self.nkpts + log.info( + 'RPA (nocca, noccb) = (%d, %d), (nvira, nvirb) = (%d, %d), nkpts = %d', nocca, noccb, nvira, nvirb, nkpts + ) + if self.frozen is not None and self.frozen > 0: + log.info('frozen orbitals %s', str(self.frozen)) + log.info('grid type = %s', self.grids_alg) + log.info('outcore mode = %s', self.outcore) + if self.outcore is True: + log.info('outcore segment size = %d', self.segsize) + log.info('RPA finite size corrections = %s', self.fc) + log.info('ACFD exchange energy = %s', self.acfd_exx) + log.info('') + return + + @property + def nocc(self): + mo_occ = self._scf.mo_occ + return (int(np.sum(mo_occ[0][0])), int(np.sum(mo_occ[1][0]))) + + @nocc.setter + def nocc(self, n): + self._nocc = n + + @property + def nmo(self): + return (len(self._scf.mo_energy[0][0]), len(self._scf.mo_energy[1][0])) + + @nmo.setter + def nmo(self, n): + self._nmo = n + + get_nocc = get_nocc + get_nmo = get_nmo + get_frozen_mask = get_frozen_mask + + def kernel(self, mo_energy=None, mo_coeff=None, nw=None): + """RPA correlation and total energy + + Calculated total energy, HF energy and RPA correlation energy + are stored in self.e_tot, self.e_hf, self.e_corr + + Parameters + ---------- + mo_energy : double array + molecular orbital energies + mo_coeff : double ndarray + molecular orbital coefficients + nw : int, optional + number of frequency point on imaginary axis, by default None + + Returns + ------- + e_tot : float + RPA total energy + e_hf : float + HF energy (exact exchange for given mo_coeff) + e_corr : float + RPA correlation energy + """ + if mo_coeff is None: + mo_coeff = np.array(self._scf.mo_coeff) + if mo_energy is None: + mo_energy = np.array(self._scf.mo_energy) + + nmoa = self.nmo[0] + naux = self.with_df.get_naoaux() + nkpts = self.nkpts + mem_incore = (3 * nkpts * nmoa**2 * naux) * 16 / 1e6 + mem_now = lib.current_memory()[0] + if mem_incore + mem_now > 0.99 * self.max_memory: + logger.warn(self, 'Memory may not be enough!') + raise NotImplementedError + + cput0 = (time.process_time(), time.perf_counter()) + self.dump_flags() + self.e_tot, self.e_hf, self.e_corr = kernel(self, mo_energy, mo_coeff, nw=nw) + logger.timer(self, 'RPA', *cput0) + return self.e_tot, self.e_hf, self.e_corr + + def get_grids(self, alg=None, nw=None, mo_energy=None): + """Generate grids for integration. + + Parameters + ---------- + alg : str, optional + algorithm for generating grids, by default None + nw : int, optional + number of grids, by default None + mo_energy : double 3d array, optional + orbital energy, used for minimax grids, by default None + + Returns + ------- + freqs : double 1d array + frequency grid + wts : double 1d array + weight of grids + """ + if alg is None: + alg = self.grids_alg + if mo_energy is None: + mo_energy = np.array(self._scf.mo_energy) + + if alg == 'legendre': + nw = 40 if nw is None else nw + freqs, wts = _get_scaled_legendre_roots(nw) + else: + raise NotImplementedError('Grids algorithm not implemented!') + + return freqs, wts + + def get_q_mesh(self, mo_energy, mo_coeff): + """Get q-mesh for finite size correction. + Equation 39-42 in doi.org/10.1021/acs.jctc.0c00704 + + Parameters + ---------- + mo_energy : double 3d array + orbital energy + mo_coeff : double 4d array + coefficient from AO to MO + + Returns + ------- + qij : double 1d array + q-mesh grids + q_abs : double 1d array + absolute positions of q-mesh grids + nq_pts : init + number of q-mesh grids + """ + # Set up q mesh for q->0 finite size correction + nmoa, nmob = self.nmo + nocca, noccb = self.nocc + nkpts = self.nkpts + if not self.fc_grid: + q_pts = np.array([1e-3, 0, 0]).reshape(1, 3) + else: + Nq = 4 + q_pts = np.zeros((Nq**3 - 1, 3)) + for i in range(Nq): + for j in range(Nq): + for k in range(Nq): + if i == 0 and j == 0 and k == 0: + continue + else: + q_pts[i * Nq**2 + j * Nq + k - 1, 0] = k * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 1] = j * 5e-4 + q_pts[i * Nq**2 + j * Nq + k - 1, 2] = i * 5e-4 + nq_pts = len(q_pts) + q_abs = self.mol.get_abs_kpts(q_pts) + + # Get qij = 1/sqrt(Omega) * < psi_{ik} | e^{iqr} | psi_{ak-q} > at q: (nkpts, nocc, nvir) + qij_a = np.zeros((nq_pts, nkpts, nocca, nmoa - nocca), dtype=np.complex128) + qij_b = np.zeros((nq_pts, nkpts, noccb, nmob - noccb), dtype=np.complex128) + for k in range(nq_pts): + qij_tmp = get_qij(self, q_abs[k], mo_energy, mo_coeff) + qij_a[k] = qij_tmp[0] + qij_b[k] = qij_tmp[1] + + return qij_a, qij_b, q_abs, nq_pts + + def get_acfd_exx(self, correction_only=False): + """Calculate ACFD exchange energy. + + Parameters + ---------- + correction_only : bool + only return the correction term + + Returns + ------- + ex_acfd : double + ACFD exchange energy + """ + ex_acfd = get_rpa_exx(self, acfd=True, correction_only=correction_only) + return ex_acfd diff --git a/pyscf/pbc/gw/test/test_gw_ac.py b/pyscf/pbc/gw/test/test_gw_ac.py new file mode 100644 index 0000000000..4d36cc1781 --- /dev/null +++ b/pyscf/pbc/gw/test/test_gw_ac.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +import pytest + +from pyscf.pbc import df, dft, gto, tools +from pyscf.pbc.gw.gw_ac import GWAC + + +@pytest.fixture(scope="module") +def diamond_supercell_pbe(): + ucell = gto.Cell() + ucell.build( + unit="angstrom", + a=""" + 0.000000 1.783500 1.783500 + 1.783500 0.000000 1.783500 + 1.783500 1.783500 0.000000 + """, + atom="C 1.337625 1.337625 1.337625; C 2.229375 2.229375 2.229375", + dimension=3, + verbose=0, + output="/dev/null", + pseudo="gth-pade", + basis="gth-szv", + precision=1e-12, + ) + + cell = tools.super_cell(ucell, [3, 1, 1]) + cell.verbose = 0 + cell.output = "/dev/null" + + gdf = df.RSDF(cell) + gdf.build() + + mf = dft.RKS(cell).rs_density_fit() + mf.xc = "pbe" + mf.exxdiv = None + mf.with_df = gdf + mf.conv_tol = 1e-12 + mf.kernel() + + yield mf + + +def test_gwac_pade_diamond_supercell_high_cost(diamond_supercell_pbe): + gw = GWAC(diamond_supercell_pbe) + gw.kernel() + + assert gw.mo_energy[5] == pytest.approx(0.52637379, abs=1e-4) + assert gw.mo_energy[10] == pytest.approx(0.62044176, abs=1e-4) + assert gw.mo_energy[12] == pytest.approx(0.96572544, abs=1e-4) + assert gw.mo_energy[15] == pytest.approx(1.0751724, abs=1e-4) diff --git a/pyscf/pbc/gw/test/test_krgw.py b/pyscf/pbc/gw/test/test_krgw.py index 45bbe4553e..15c88715bd 100644 --- a/pyscf/pbc/gw/test/test_krgw.py +++ b/pyscf/pbc/gw/test/test_krgw.py @@ -1,11 +1,7 @@ #!/usr/bin/env python import unittest -import numpy -import os -from pyscf import lib -from pyscf.pbc import gto, dft, scf, df -from pyscf.pbc.gw import krgw_ac +from pyscf.pbc import gto, dft, df from pyscf.pbc.gw import krgw_cd def setUpModule(): @@ -33,32 +29,6 @@ def tearDownModule(): del cell, kpts, gdf class KnownValues(unittest.TestCase): - def test_gwac_pade_high_cost(self): - kmf = dft.KRKS(cell, kpts).density_fit(with_df=gdf) - kmf.xc = 'pbe' - kmf.kernel() - - gw = krgw_ac.KRGWAC(kmf) - gw.linearized = False - gw.ac = 'pade' - - # without finite size corrections - gw.fc = False - nocc = gw.nocc - gw.kernel(kptlist=[0,1,2],orbs=range(0, nocc+3)) - self.assertAlmostEqual(gw.mo_energy[0][nocc-1], 0.62045797, 4) - self.assertAlmostEqual(gw.mo_energy[0][nocc] , 0.96574324, 4) - self.assertAlmostEqual(gw.mo_energy[1][nocc-1], 0.52639137, 4) - self.assertAlmostEqual(gw.mo_energy[1][nocc] , 1.07513258, 4) - - # with finite size corrections - gw.fc = True - gw.kernel(kptlist=[0,1,2], orbs=range(0, nocc+3)) - self.assertAlmostEqual(gw.mo_energy[0][nocc-1], 0.54277092, 4) - self.assertAlmostEqual(gw.mo_energy[0][nocc] , 0.80148537, 4) - self.assertAlmostEqual(gw.mo_energy[1][nocc-1], 0.45073793, 4) - self.assertAlmostEqual(gw.mo_energy[1][nocc] , 0.92910108, 4) - def test_gwcd_high_cost(self): kmf = dft.KRKS(cell, kpts).density_fit(with_df=gdf) kmf.xc = 'pbe' @@ -84,42 +54,6 @@ def test_gwcd_high_cost(self): self.assertAlmostEqual(gw.mo_energy[1][nocc-1], 0.45073751, 4) self.assertAlmostEqual(gw.mo_energy[1][nocc], 0.92910117, 4) - def test_gw(self): - cell = gto.Cell() - cell.build(a = ''' - 0.000000 1.783500 1.783500 - 1.783500 0.000000 1.783500 - 1.783500 1.783500 0.000000 - ''', - atom = 'H 1.337625 1.337625 1.337625; H 2.229375 2.229375 2.229375', - verbose = 4, - output = '/dev/null', - basis=[[0, [2., 1.]], [0, [.5, 1.]]]) - - kpts = cell.make_kpts([3,1,1],scaled_center=[0,0,0]) - kmf = dft.KRKS(cell, kpts).density_fit().run() - - gw = krgw_ac.KRGWAC(kmf) - gw.linearized = True - gw.ac = 'pade' - # without finite size corrections - gw.fc = False - nocc = gw.nocc - gw.kernel(kptlist=[0,1,2],orbs=range(0,nocc+3)) - self.assertAlmostEqual(gw.mo_energy[0][nocc-1], -0.257088388010083, 6) - self.assertAlmostEqual(gw.mo_energy[0][nocc] , 0.7377021147675703, 6) - self.assertAlmostEqual(gw.mo_energy[1][nocc-1], -0.121872186953884, 6) - self.assertAlmostEqual(gw.mo_energy[1][nocc] , 0.570710170186033 , 6) - - # with finite size corrections - gw.linearized = False - gw.fc = True - gw.kernel(kptlist=[0,1,2],orbs=range(0,nocc+3)) - self.assertAlmostEqual(gw.mo_energy[0][nocc-1], -0.464099926108335, 6) - self.assertAlmostEqual(gw.mo_energy[0][nocc] , 0.7105306664244474, 6) - self.assertAlmostEqual(gw.mo_energy[1][nocc-1], -0.347704595829313, 6) - self.assertAlmostEqual(gw.mo_energy[1][nocc] , 0.552136080110482 , 6) - if __name__ == '__main__': print('Full Tests for KRGW') unittest.main() diff --git a/pyscf/pbc/gw/test/test_krgw_ac.py b/pyscf/pbc/gw/test/test_krgw_ac.py new file mode 100644 index 0000000000..617e7a2441 --- /dev/null +++ b/pyscf/pbc/gw/test/test_krgw_ac.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +import pytest + +from pyscf.pbc import df, dft, gto +from pyscf.pbc.gw.krgw_ac import KRGWAC + + +@pytest.fixture(scope="module") +def diamond_pbe(): + cell = gto.Cell() + cell.build( + unit="angstrom", + a=""" + 0.000000 1.783500 1.783500 + 1.783500 0.000000 1.783500 + 1.783500 1.783500 0.000000 + """, + atom="C 1.337625 1.337625 1.337625; C 2.229375 2.229375 2.229375", + dimension=3, + verbose=0, + output="/dev/null", + pseudo="gth-pade", + basis="gth-szv", + precision=1e-10, + ) + + kpts = cell.make_kpts([3, 1, 1], scaled_center=[0, 0, 0]) + gdf = df.RSDF(cell, kpts) + gdf.build() + + kmf = dft.KRKS(cell, kpts).rs_density_fit() + kmf.xc = "pbe" + kmf.with_df = gdf + kmf.conv_tol = 1e-12 + kmf.kernel() + + yield kmf + + cell.stdout.close() + + +def test_krgwac_pade_no_fc(diamond_pbe): + gw = KRGWAC(diamond_pbe) + gw.ac = "pade" + gw.qpe_linearized = False + gw.fc = False + gw.kernel(kptlist=[0, 1, 2], orbs=range(0, 7)) + + assert gw.mo_energy[0][3] == pytest.approx(0.62044205, abs=1e-4) + assert gw.mo_energy[0][4] == pytest.approx(0.96572609, abs=1e-4) + assert gw.mo_energy[1][3] == pytest.approx(0.52637438, abs=1e-4) + assert gw.mo_energy[1][4] == pytest.approx(1.07517363, abs=1e-4) + + +def test_krgwac_pade_no_fc_outcore(diamond_pbe): + gw = KRGWAC(diamond_pbe) + gw.ac = "pade" + gw.qpe_linearized = False + gw.fc = False + gw.outcore = True + gw.kernel(kptlist=[0, 1, 2], orbs=range(0, 7)) + + assert gw.mo_energy[0][3] == pytest.approx(0.62044205, abs=1e-4) + assert gw.mo_energy[0][4] == pytest.approx(0.96572609, abs=1e-4) + assert gw.mo_energy[1][3] == pytest.approx(0.52637438, abs=1e-4) + assert gw.mo_energy[1][4] == pytest.approx(1.07517363, abs=1e-4) + + +def test_krgwac_pade_with_fc(diamond_pbe): + gw = KRGWAC(diamond_pbe) + gw.ac = "pade" + gw.qpe_linearized = False + gw.fc = True + gw.kernel(kptlist=[0, 1, 2], orbs=range(0, 7)) + + assert gw.mo_energy[0][3] == pytest.approx(0.44025061, abs=1e-4) + assert gw.mo_energy[0][4] == pytest.approx(0.80148565, abs=1e-4) + assert gw.mo_energy[1][3] == pytest.approx(0.35193483, abs=1e-4) + assert gw.mo_energy[1][4] == pytest.approx(0.92909525, abs=1e-4) + + +def test_krgwac_pade_with_fc_frozen_core(diamond_pbe): + gw = KRGWAC(diamond_pbe) + gw.ac = "pade" + gw.qpe_linearized = False + gw.fc = True + gw.frozen = 1 + gw.kernel() + + assert gw.mo_energy[0][3] == pytest.approx(0.44092615, abs=1e-4) + assert gw.mo_energy[0][4] == pytest.approx(0.79820946, abs=1e-4) diff --git a/pyscf/pbc/gw/test/test_krpa.py b/pyscf/pbc/gw/test/test_krpa.py new file mode 100644 index 0000000000..b71ef7bc2d --- /dev/null +++ b/pyscf/pbc/gw/test/test_krpa.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +import pytest + +from pyscf.pbc import df, gto, scf +from pyscf.pbc.gw.krpa import KRPA + + +@pytest.fixture(scope="module") +def diamond_krhf(): + cell = gto.Cell() + cell.build( + unit="angstrom", + a=""" + 0.000000 1.783500 1.783500 + 1.783500 0.000000 1.783500 + 1.783500 1.783500 0.000000 + """, + atom="C 1.337625 1.337625 1.337625; C 2.229375 2.229375 2.229375", + dimension=3, + verbose=0, + output="/dev/null", + pseudo="gth-pbe", + basis="gth-dzv", + precision=1e-12, + ) + + kpts = cell.make_kpts([3, 1, 1], scaled_center=[0, 0, 0]) + gdf = df.RSGDF(cell, kpts) + gdf.build() + + kmf = scf.KRHF(cell, kpts).rs_density_fit() + kmf.with_df = gdf + kmf.conv_tol = 1e-12 + kmf.kernel() + + yield kmf + + cell.stdout.close() + + +def test_krpa_no_fc(diamond_krhf): + rpa = KRPA(diamond_krhf) + rpa.fc = False + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.1852772037535004, abs=1e-6) + assert rpa.e_tot == pytest.approx(-10.694392044197565, abs=1e-6) + + +def test_krpa_no_fc_outcore(diamond_krhf): + rpa = KRPA(diamond_krhf) + rpa.outcore = True + rpa.segsize = 2 + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.1852772037535004, abs=1e-6) + assert rpa.e_tot == pytest.approx(-10.694392044197565, abs=1e-6) + + +def test_krpa_with_fc(diamond_krhf): + rpa = KRPA(diamond_krhf) + rpa.fc = True + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.20723389722097715, abs=1e-6) + assert rpa.e_tot == pytest.approx(-10.716348738655793, abs=1e-6) + + +def test_krpa_with_fc_outcore(diamond_krhf): + rpa = KRPA(diamond_krhf) + rpa.fc = True + rpa.outcore = True + rpa.segsize = 2 + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.20723389722097715, abs=1e-6) + assert rpa.e_tot == pytest.approx(-10.716348738655793, abs=1e-6) + diff --git a/pyscf/pbc/gw/test/test_kugw.py b/pyscf/pbc/gw/test/test_kugw.py deleted file mode 100644 index a4fd92d7e9..0000000000 --- a/pyscf/pbc/gw/test/test_kugw.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python - -import unittest -import numpy -import os -from pyscf import lib -from pyscf.pbc import gto, dft, scf, df -from pyscf.pbc.gw import kugw_ac - -def setUpModule(): - global cell, kmf, kpts - cell = gto.Cell() - cell.build( - unit = 'B', - a = [[ 0., 6.74027466, 6.74027466], - [ 6.74027466, 0., 6.74027466], - [ 6.74027466, 6.74027466, 0. ]], - atom = '''H 0 0 0 - H 1.68506866 1.68506866 1.68506866 - H 3.37013733 3.37013733 3.37013733''', - basis = 'gth-dzvp', - pseudo = 'gth-pade', - verbose = 7, - output = '/dev/null', - charge = 0, - spin = None) - cell.spin = 3 - kpts = cell.make_kpts([3,1,1], scaled_center=[0,0,0]) - kmf = scf.KUHF(cell, kpts, exxdiv=None).density_fit() - kmf.run() - -def tearDownModule(): - global cell, kmf - cell.stdout.close() - del cell, kmf - -class KnownValues(unittest.TestCase): - def test_gwac_pade(self): - gw = kugw_ac.KUGWAC(kmf) - gw.linearized = False - gw.ac = 'pade' - gw.fc = False - nocca, noccb = gw.nocc - gw.kernel(kptlist=[0,1,2], orbs=range(0, nocca+3)) - self.assertAlmostEqual(gw.mo_energy[0][0][nocca-1], -0.28012813, 5) - self.assertAlmostEqual(gw.mo_energy[0][0][nocca], 0.13748876, 5) - self.assertAlmostEqual(gw.mo_energy[0][1][nocca-1], -0.29515851, 5) - self.assertAlmostEqual(gw.mo_energy[0][1][nocca], 0.14128011, 5) - self.assertAlmostEqual(gw.mo_energy[1][0][noccb-1], -0.33991721, 5) - self.assertAlmostEqual(gw.mo_energy[1][0][noccb], 0.10578847, 5) - self.assertAlmostEqual(gw.mo_energy[1][1][noccb-1], -0.33547973, 5) - self.assertAlmostEqual(gw.mo_energy[1][1][noccb], 0.08053408, 5) - - gw.fc = True - nocca, noccb = gw.nocc - gw.kernel(kptlist=[0,1,2], orbs=range(0,nocca+3)) - self.assertAlmostEqual(gw.mo_energy[0][0][nocca-1], -0.40244058, 5) - self.assertAlmostEqual(gw.mo_energy[0][0][nocca], 0.13618348, 5) - self.assertAlmostEqual(gw.mo_energy[0][1][nocca-1], -0.41743063, 5) - self.assertAlmostEqual(gw.mo_energy[0][1][nocca], 0.13997427, 5) - self.assertAlmostEqual(gw.mo_energy[1][0][noccb-1], -0.46133481, 5) - self.assertAlmostEqual(gw.mo_energy[1][0][noccb], 0.1044926 , 5) - self.assertAlmostEqual(gw.mo_energy[1][1][noccb-1], -0.4568894 , 5) - self.assertAlmostEqual(gw.mo_energy[1][1][noccb], 0.07922511, 5) - -if __name__ == '__main__': - print('Full Tests for KUGW') - unittest.main() diff --git a/pyscf/pbc/gw/test/test_kugw_ac.py b/pyscf/pbc/gw/test/test_kugw_ac.py new file mode 100644 index 0000000000..86ec2f11b5 --- /dev/null +++ b/pyscf/pbc/gw/test/test_kugw_ac.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +import pytest + +from pyscf.pbc import df, gto, scf +from pyscf.pbc.gw.kugw_ac import KUGWAC + + +@pytest.fixture(scope="module") +def hydrogen_kuhf(): + cell = gto.Cell() + cell.build( + unit="B", + a=[[0.0, 6.74027466, 6.74027466], [6.74027466, 0.0, 6.74027466], [6.74027466, 6.74027466, 0.0]], + atom="""H 0 0 0 + H 1.68506866 1.68506866 1.68506866 + H 3.37013733 3.37013733 3.37013733""", + basis="gth-dzvp", + pseudo="gth-pade", + verbose=0, + output="/dev/null", + charge=0, + spin=3, + ) + + kpts = cell.make_kpts([3, 1, 1], scaled_center=[0, 0, 0]) + gdf = df.RSDF(cell, kpts) + gdf.build() + + kmf = scf.KUHF(cell, kpts, exxdiv="ewald") + kmf.with_df = gdf + kmf.conv_tol = 1e-12 + kmf.kernel() + + yield kmf + + +def test_kugwac_pade_no_fc(hydrogen_kuhf): + gw = KUGWAC(hydrogen_kuhf) + gw.qpe_linearized = False + gw.fc = False + gw.kernel(kptlist=[0, 1, 2], orbs=range(0, 5)) + + assert gw.mo_energy[0][0][1] == pytest.approx(-0.28661016, abs=1e-5) + assert gw.mo_energy[0][0][2] == pytest.approx(0.13952572, abs=1e-5) + assert gw.mo_energy[1][1][0] == pytest.approx(-0.34174199, abs=1e-5) + assert gw.mo_energy[1][1][1] == pytest.approx(0.08296260, abs=1e-5) + + +def test_kugwac_pade_with_fc(hydrogen_kuhf): + gw = KUGWAC(hydrogen_kuhf) + gw.qpe_linearized = False + gw.fc = True + gw.kernel(kptlist=[0, 1, 2], orbs=range(0, 5)) + + assert gw.mo_energy[0][0][1] == pytest.approx(-0.48063839, abs=1e-5) + assert gw.mo_energy[0][0][2] == pytest.approx(0.13870787, abs=1e-5) + assert gw.mo_energy[1][1][0] == pytest.approx(-0.53502818, abs=1e-5) + assert gw.mo_energy[1][1][1] == pytest.approx(0.08214267, abs=1e-5) + + +def test_kugwac_pade_with_fc_frozen_orbitals(hydrogen_kuhf): + gw = KUGWAC(hydrogen_kuhf) + gw.qpe_linearized = False + gw.fc = True + gw.frozen = [12, 13, 14] + gw.kernel() + + assert gw.mo_energy[0][0][1] == pytest.approx(-0.47649992, abs=1e-5) + assert gw.mo_energy[0][0][2] == pytest.approx(0.14513332, abs=1e-5) diff --git a/pyscf/pbc/gw/test/test_kurpa.py b/pyscf/pbc/gw/test/test_kurpa.py new file mode 100644 index 0000000000..c9577ca2d0 --- /dev/null +++ b/pyscf/pbc/gw/test/test_kurpa.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python + +import pytest + +from pyscf.pbc import df, gto, scf +from pyscf.pbc.gw.kurpa import KURPA + + +@pytest.fixture(scope="module") +def hydrogen_kuhf(): + cell = gto.Cell() + cell.build( + unit="B", + a=[[0.0, 6.74027466, 6.74027466], [6.74027466, 0.0, 6.74027466], [6.74027466, 6.74027466, 0.0]], + atom="""H 0 0 0 + H 1.68506866 1.68506866 1.68506866 + H 3.37013733 3.37013733 3.37013733""", + basis="gth-dzvp", + pseudo="gth-pade", + verbose=0, + output="/dev/null", + charge=0, + spin=3, + ) + + kpts = cell.make_kpts([3, 1, 1], scaled_center=[0, 0, 0]) + gdf = df.RSDF(cell, kpts) + gdf.build() + + kmf = scf.KUHF(cell, kpts, exxdiv="ewald") + kmf.with_df = gdf + kmf.conv_tol = 1e-12 + kmf.kernel() + + yield kmf + + cell.stdout.close() + + +def test_kurpa_no_fc(hydrogen_kuhf): + rpa = KURPA(hydrogen_kuhf) + rpa.fc = False + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.04288352903004621, abs=1e-6) + assert rpa.e_tot == pytest.approx(-1.584806462873674, abs=1e-6) + + +def test_kurpa_no_fc_outcore(hydrogen_kuhf): + rpa = KURPA(hydrogen_kuhf) + rpa.fc = False + rpa.outcore = True + rpa.segsize = 3 + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.04288352903004621, abs=1e-6) + assert rpa.e_tot == pytest.approx(-1.584806462873674, abs=1e-6) + + +def test_kurpa_with_fc(hydrogen_kuhf): + rpa = KURPA(hydrogen_kuhf) + rpa.fc = True + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.04295466718074476, abs=1e-6) + + +def test_kurpa_with_fc_outcore(hydrogen_kuhf): + rpa = KURPA(hydrogen_kuhf) + rpa.fc = True + rpa.outcore = True + rpa.segsize = 3 + rpa.kernel() + + assert rpa.e_corr == pytest.approx(-0.04295466718074476, abs=1e-6) + diff --git a/pyscf/pbc/scf/khf.py b/pyscf/pbc/scf/khf.py index 3e054165cf..e49e95307b 100644 --- a/pyscf/pbc/scf/khf.py +++ b/pyscf/pbc/scf/khf.py @@ -531,9 +531,9 @@ def kmesh(self): '''The number of k-points along each axis in the first Brillouin zone''' from pyscf.pbc.tools.k2gamma import kpts_to_kmesh kpts = self.kpts - kmesh = kpts_to_kmesh(kpts) + kmesh = kpts_to_kmesh(self.cell, kpts) if len(kpts) != np.prod(kmesh): - logger.WARN(self, 'K-points specified in %s are not Monkhorst-Pack %s grids', + logger.warn(self, 'K-points specified in %s are not Monkhorst-Pack %s grids', self, kmesh) return kmesh diff --git a/pyscf/pbc/scf/test/test_hf.py b/pyscf/pbc/scf/test/test_hf.py index 6abe18fb1f..df39dfe396 100644 --- a/pyscf/pbc/scf/test/test_hf.py +++ b/pyscf/pbc/scf/test/test_hf.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import numpy from pyscf import lib from pyscf.scf import atom_hf @@ -153,9 +152,9 @@ def test_init_guess_by_chkfile(self): numpy.random.seed(1) k = numpy.random.random(3) mf = pbchf.RHF(cell, k, exxdiv='vcut_sph') - mf.chkfile = tempfile.NamedTemporaryFile().name mf.max_cycle = 1 mf.diis = None + mf.chkfile = lib.NamedTemporaryFile().name e1 = mf.kernel() self.assertAlmostEqual(e1, -4.132445328608581, 7) diff --git a/pyscf/pbc/scf/test/test_khf.py b/pyscf/pbc/scf/test/test_khf.py index 101d6b12f7..b1008ded1c 100644 --- a/pyscf/pbc/scf/test/test_khf.py +++ b/pyscf/pbc/scf/test/test_khf.py @@ -18,7 +18,6 @@ # import unittest -import tempfile import numpy as np from pyscf import lib @@ -101,8 +100,8 @@ def test_init_guess_by_chkfile(self): kpts = cell.make_kpts(nk) kmf = khf.KRHF(cell, kpts, exxdiv='vcut_sph') - kmf.chkfile = tempfile.NamedTemporaryFile().name kmf.conv_tol = 1e-9 + kmf.chkfile = lib.NamedTemporaryFile().name ekpt = kmf.scf() dm1 = kmf.make_rdm1() dm2 = kmf.from_chk(kmf.chkfile) @@ -301,6 +300,13 @@ def test_damping(self): for k in range(len(kpts)): self.assertAlmostEqual(abs(f_damp[k] - (f[k]*(1-damp) + f_prev[k]*damp)).max(), 0, 9) + def test_kmesh_property(self): + kmf = cell.KRHF(kpts=cell.make_kpts([3,1,1])) + assert np.array_equal(kmf.kmesh, [3, 1, 1]) + + kmf = cell.KRHF(kpts=cell.make_kpts([18]*3)) + assert np.array_equal(kmf.kmesh, [13, 13, 13]) + if __name__ == '__main__': print("Full Tests for pbc.scf.khf") unittest.main() diff --git a/pyscf/pbc/scf/test/test_rohf.py b/pyscf/pbc/scf/test/test_rohf.py index b093ef0c9f..641a0f9477 100644 --- a/pyscf/pbc/scf/test/test_rohf.py +++ b/pyscf/pbc/scf/test/test_rohf.py @@ -15,7 +15,6 @@ # import unittest -import tempfile import numpy as np from pyscf import lib from pyscf.pbc import gto as pgto @@ -80,10 +79,10 @@ def test_init_guess_by_chkfile(self): np.random.seed(1) k = np.random.random(3) mf = pscf.KROHF(cell, [k], exxdiv='vcut_sph') - mf.chkfile = tempfile.NamedTemporaryFile().name mf.init_guess = 'hcore' mf.max_cycle = 1 mf.diis = None + mf.chkfile = lib.NamedTemporaryFile().name e1 = mf.kernel() self.assertAlmostEqual(e1, -3.4376090968645068, 7) diff --git a/pyscf/pbc/scf/test/test_uhf.py b/pyscf/pbc/scf/test/test_uhf.py index 195c207eed..c684b0ca3d 100644 --- a/pyscf/pbc/scf/test/test_uhf.py +++ b/pyscf/pbc/scf/test/test_uhf.py @@ -15,7 +15,6 @@ # import unittest -import tempfile import numpy as np from pyscf import lib from pyscf.pbc import gto as pgto @@ -82,9 +81,9 @@ def test_init_guess_by_chkfile(self): np.random.seed(1) k = np.random.random(3) mf = pscf.KUHF(cell, [k], exxdiv='vcut_sph') - mf.chkfile = tempfile.NamedTemporaryFile().name mf.max_cycle = 1 mf.diis = None + mf.chkfile = lib.NamedTemporaryFile().name e1 = mf.kernel() self.assertAlmostEqual(e1, -3.4070772194665477, 7) diff --git a/pyscf/pbc/tdscf/test/test_uks.py b/pyscf/pbc/tdscf/test/test_uks.py index 69efe556a0..070a699925 100644 --- a/pyscf/pbc/tdscf/test/test_uks.py +++ b/pyscf/pbc/tdscf/test/test_uks.py @@ -87,7 +87,7 @@ def tearDownClass(cls): def kernel(self, TD, ref, **kwargs): td = getattr(self.mf, TD)().set(nstates=self.nstates, **kwargs).run() - self.assertAlmostEqual(abs(td.e[:self.nstates_test] * unitev - ref).max(), 0, 5) + self.assertAlmostEqual(abs(td.e[:self.nstates_test] * unitev - ref).max(), 0, 4) return td def test_tda(self): @@ -103,7 +103,7 @@ def test_tdhf(self): td = self.kernel('TDDFT', ref, conv_tol=1e-8) a, b = td.get_ab() eref = diagonalize(a, b) - self.assertAlmostEqual(abs(td.e[:4] - eref[:4]).max(), 0, 8) + self.assertAlmostEqual(abs(td.e[:4] - eref[:4]).max(), 0, 7) def check_rsh_tda(self, xc, place=6): cell = self.cell diff --git a/pyscf/pbc/x2c/sfx2c1e.py b/pyscf/pbc/x2c/sfx2c1e.py index a2c3a1ffa4..d69a898b37 100644 --- a/pyscf/pbc/x2c/sfx2c1e.py +++ b/pyscf/pbc/x2c/sfx2c1e.py @@ -107,7 +107,16 @@ def get_hcore(self, cell=None, kpts=None, kpt=None): else: return super(x2c._X2C_SCF, self).get_hcore(cell, kpts) -class PBCX2CHelper(x2c.X2C): + def undo_x2c(self): + obj = lib.view(self, lib.drop_class(self.__class__, SFX2C1E_SCF)) + del obj.with_x2c + return obj + + def to_gpu(self): + obj = self.undo_x2c().to_gpu().sfx2c1e() + return lib.to_gpu(self, obj) + +class PBCX2CHelper(x2c.X2CHelperBase): exp_drop = getattr(__config__, 'pbc_x2c_X2C_exp_drop', 0.2) # 1e: X2C1e, atom1e: X2C1e with one-center approximation @@ -118,13 +127,15 @@ class PBCX2CHelper(x2c.X2C): def __init__(self, cell, kpts=None): self.cell = cell - x2c.X2C.__init__(self, cell) + x2c.X2CHelperBase.__init__(self, cell) def reset(self, cell=None): if cell is not None: self.cell = cell return self + to_gpu = lib.to_gpu + class SpinFreeX2CHelper(PBCX2CHelper): '''1-component X2c Foldy-Wouthuysen (FW Hamiltonian (spin-free part only) ''' @@ -142,11 +153,12 @@ def get_hcore(self, cell=None, kpts=None): c = lib.param.LIGHT_SPEED assert ('1E' in self.approx.upper()) if 'ATOM' in self.approx.upper(): + raise NotImplementedError( + 'Atomic X is generated in molecular orbitals. ' + 'It might be incompatible with PBC setup.') atom_slices = xcell.offset_nr_by_atom() nao = xcell.nao_nr() x = numpy.zeros((nao,nao)) - vloc = numpy.zeros((nao,nao)) - wloc = numpy.zeros((nao,nao)) for ia in range(xcell.natm): ish0, ish1, p0, p1 = atom_slices[ia] shls_slice = (ish0, ish1, ish0, ish1) @@ -156,8 +168,6 @@ def get_hcore(self, cell=None, kpts=None): z = -xcell.atom_charge(ia) v1 = z * xcell.intor('int1e_rinv', shls_slice=shls_slice) w1 = z * xcell.intor('int1e_prinvp', shls_slice=shls_slice) - vloc[p0:p1,p0:p1] = v1 - wloc[p0:p1,p0:p1] = w1 x[p0:p1,p0:p1] = x2c._x2c1e_xmatrix(t1, v1, w1, s1, c) else: w = get_pnucp(with_df, kpts_lst) @@ -175,13 +185,9 @@ def get_hcore(self, cell=None, kpts=None): h1_kpts = [] for k in range(len(kpts_lst)): if 'ATOM' in self.approx.upper(): - # The treatment of pnucp local part has huge effects to hcore - #h1 = x2c._get_hcore_fw(t[k], vloc, wloc, s[k], x, c) - vloc + v[k] - #h1 = x2c._get_hcore_fw(t[k], v[k], w[k], s[k], x, c) - h1 = x2c._get_hcore_fw(t[k], v[k], wloc, s[k], x, c) + h1 = x2c._get_hcore_fw(t[k], v[k], w[k], s[k], x, c) else: - xk = x2c._x2c1e_xmatrix(t[k], v[k], w[k], s[k], c) - h1 = x2c._get_hcore_fw(t[k], v[k], w[k], s[k], xk, c) + h1 = x2c._x2c1e_get_hcore(t[k], v[k], w[k], s[k], c) if self.basis is not None: # If cell = xcell, U = identity matrix @@ -201,6 +207,9 @@ def get_xmat(self, cell=None, kpts=None): c = lib.param.LIGHT_SPEED assert ('1E' in self.approx.upper()) if 'ATOM' in self.approx.upper(): + raise NotImplementedError( + 'Atomic X is generated in molecular orbitals. ' + 'It might be incompatible with PBC setup.') atom_slices = xcell.offset_nr_by_atom() nao = xcell.nao_nr() x = numpy.zeros((nao,nao)) @@ -310,46 +319,3 @@ def get_pnucp(mydf, kpts=None): if kpts is None or numpy.shape(kpts) == (3,): wj_kpts = wj_kpts[0] return numpy.asarray(wj_kpts) - - -if __name__ == '__main__': - from pyscf.pbc import scf - cell = pbcgto.Cell() - cell.build(unit = 'B', - a = numpy.eye(3)*4, - mesh = [11]*3, - atom = 'H 0 0 0; H 0 0 1.8', - verbose = 4, - basis='sto3g') - lib.param.LIGHT_SPEED = 2 - mf = scf.RHF(cell) - mf.with_df = aft.AFTDF(cell) - enr = mf.kernel() - print('E(NR) = %.12g' % enr) - - mf = sfx2c1e(mf) - esfx2c = mf.kernel() - print('E(SFX2C1E) = %.12g' % esfx2c) - - mf = scf.KRHF(cell) - mf.with_df = aft.AFTDF(cell) - mf.kpts = cell.make_kpts([2,2,1]) - enr = mf.kernel() - print('E(k-NR) = %.12g' % enr) - - mf = sfx2c1e(mf) - esfx2c = mf.kernel() - print('E(k-SFX2C1E) = %.12g' % esfx2c) - -# cell = pbcgto.M(unit = 'B', -# a = numpy.eye(3)*4, -# atom = 'H 0 0 0; H 0 0 1.8', -# mesh = None, -# dimension = 2, -# basis='sto3g') -# with_df = aft.AFTDF(cell) -# w0 = get_pnucp(with_df, cell.make_kpts([2,2,1])) -# with_df = aft.AFTDF(cell) -# with_df.eta = 0 -# w1 = get_pnucp(with_df, cell.make_kpts([2,2,1])) -# print(abs(w0-w1).max()) diff --git a/pyscf/pbc/x2c/test/test_x2c.py b/pyscf/pbc/x2c/test/test_x2c.py index c14eeadd6c..cfd435ca86 100644 --- a/pyscf/pbc/x2c/test/test_x2c.py +++ b/pyscf/pbc/x2c/test/test_x2c.py @@ -54,6 +54,7 @@ def tearDownModule(): del cell, cell1 class KnownValues(unittest.TestCase): + @unittest.skip('The implementation of atom-X approximation requires more validation.') def test_hf(self): with lib.light_speed(4) as c: mf = scf.RHF(cell1).sfx2c1e() @@ -69,6 +70,7 @@ def test_hf(self): h1 = mf.get_hcore(kpt=kpts[1]) self.assertAlmostEqual(numpy.einsum('ij,ji', dm, h1), -0.32361715420090226 + 0j, 8) + @unittest.skip('The implementation of atom-X approximation requires more validation.') def test_hf_high_cost(self): with lib.light_speed(2) as c: mf = scf.RHF(cell).sfx2c1e() @@ -89,6 +91,7 @@ def test_hf_high_cost(self): h1 = mf.get_hcore(kpt=kpts[1]) self.assertAlmostEqual(numpy.einsum('ij,ji', dm, h1), -0.04113247191600125+0j, 8) + @unittest.skip('The implementation of atom-X approximation requires more validation.') def test_khf_high_cost(self): with lib.light_speed(2) as c: mf = scf.KRHF(cell).sfx2c1e() diff --git a/pyscf/pbc/x2c/x2c1e.py b/pyscf/pbc/x2c/x2c1e.py index 69a8ca66f7..e4955a4f07 100644 --- a/pyscf/pbc/x2c/x2c1e.py +++ b/pyscf/pbc/x2c/x2c1e.py @@ -107,6 +107,15 @@ def get_hcore(self, cell=None, kpts=None, kpt=None): else: return super(x2c._X2C_SCF).get_hcore(cell, kpts) + def undo_x2c(self): + obj = lib.view(self, lib.drop_class(self.__class__, X2C1E_GSCF)) + del obj.with_x2c + return obj + + def to_gpu(self): + obj = self.undo_x2c().to_gpu().x2c1e() + return lib.to_gpu(self, obj) + class SpinOrbitalX2C1EHelper(sfx2c1e.PBCX2CHelper): def get_hcore(self, cell=None, kpts=None): if cell is None: @@ -125,7 +134,9 @@ def get_hcore(self, cell=None, kpts=None): c = lib.param.LIGHT_SPEED if 'ATOM' in self.approx.upper(): - raise NotImplementedError + raise NotImplementedError( + 'Atomic X is generated in molecular orbitals. ' + 'It might be incompatible with PBC setup.') else: w_sr = sfx2c1e.get_pnucp(with_df, kpts_lst) w_soc = get_pbc_pvxp(with_df, kpts_lst) @@ -155,8 +166,7 @@ def get_hcore(self, cell=None, kpts=None): if 'ATOM' in self.approx.upper(): raise NotImplementedError else: - xk = x2c._x2c1e_xmatrix(t[k], v[k], w[k], s[k], c) - h1 = x2c._get_hcore_fw(t[k], v[k], w[k], s[k], xk, c) + h1 = x2c._x2c1e_get_hcore(t[k], v[k], w[k], s[k], c) if self.basis is not None: # If cell = xcell, U = identity matrix diff --git a/pyscf/qmmm/pbc/itrf.py b/pyscf/qmmm/pbc/itrf.py index 408661f8d0..ac48a69a0c 100644 --- a/pyscf/qmmm/pbc/itrf.py +++ b/pyscf/qmmm/pbc/itrf.py @@ -804,60 +804,48 @@ def grad_kTij(R, r, eta): TGGcosGvRqm = lib.einsum("iab,ga,gb,ig->g", qm_quads, Gv, Gv, cosGvRqm) TGGsinGvRqm = lib.einsum("iab,ga,gb,ig->g", qm_quads, Gv, Gv, sinGvRqm) + DGqm = lib.einsum('ia,ga->ig', qm_dipoles, Gv) + TGGqm = lib.einsum('iab,ga,gb->ig', qm_quads, Gv, Gv) + qm_ewg_grad = np.zeros_like(qm_coords) if with_mm: mm_ewg_grad = np.zeros_like(mm_coords) # qm pc - mm pc - p = ['einsum_path', (3, 4), (1, 3), (1, 2), (0, 1)] - qm_ewg_grad -= lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, sinGvRqm, zcosGvRmm, Gpref, optimize=p) - qm_ewg_grad += lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, cosGvRqm, zsinGvRmm, Gpref, optimize=p) + qm_ewg_grad -= qm_charges[:,None] * lib.einsum('ig,gx->ix', sinGvRqm, Gv*(zcosGvRmm*Gpref)[:,None]) + qm_ewg_grad += qm_charges[:,None] * lib.einsum('ig,gx->ix', cosGvRqm, Gv*(zsinGvRmm*Gpref)[:,None]) if with_mm: - p = ['einsum_path', (0, 2), (1, 2), (0, 2), (0, 1)] - mm_ewg_grad -= lib.einsum('i,gx,ig,g,g->ix', mm_charges, Gv, sinGvRmm, zcosGvRqm, Gpref, optimize=p) - mm_ewg_grad += lib.einsum('i,gx,ig,g,g->ix', mm_charges, Gv, cosGvRmm, zsinGvRqm, Gpref, optimize=p) + mm_ewg_grad -= mm_charges[:,None] * lib.einsum('ig,gx->ix', sinGvRmm, Gv*(zcosGvRqm*Gpref)[:,None]) + mm_ewg_grad += mm_charges[:,None] * lib.einsum('ig,gx->ix', cosGvRmm, Gv*(zsinGvRqm*Gpref)[:,None]) # qm dip - mm pc - p = ['einsum_path', (4, 5), (1, 4), (0, 1), (0, 2), (0, 1)] - qm_ewg_grad -= lib.einsum('ia,gx,ga,ig,g,g->ix', qm_dipoles, Gv, Gv, sinGvRqm, zsinGvRmm, Gpref, optimize=p) - qm_ewg_grad -= lib.einsum('ia,gx,ga,ig,g,g->ix', qm_dipoles, Gv, Gv, cosGvRqm, zcosGvRmm, Gpref, optimize=p) + qm_ewg_grad -= lib.einsum('ig,gx->ix', DGqm*sinGvRqm, Gv*(zsinGvRmm*Gpref)[:,None]) + qm_ewg_grad -= lib.einsum('ig,gx->ix', DGqm*cosGvRqm, Gv*(zcosGvRmm*Gpref)[:,None]) if with_mm: - p = ['einsum_path', (1, 3), (0, 2), (0, 2), (0, 1)] - mm_ewg_grad += lib.einsum('g,j,gx,jg,g->jx', DGcosGvRqm, mm_charges, Gv, cosGvRmm, Gpref, optimize=p) - mm_ewg_grad += lib.einsum('g,j,gx,jg,g->jx', DGsinGvRqm, mm_charges, Gv, sinGvRmm, Gpref, optimize=p) + mm_ewg_grad += mm_charges[:,None] * lib.einsum('ig,gx->ix', cosGvRmm, Gv*(DGcosGvRqm*Gpref)[:,None]) + mm_ewg_grad += mm_charges[:,None] * lib.einsum('ig,gx->ix', sinGvRmm, Gv*(DGsinGvRqm*Gpref)[:,None]) # qm quad - mm pc - p = ['einsum_path', (5, 6), (0, 5), (0, 2), (2, 3), (1, 2), (0, 1)] - qm_ewg_grad += lib.einsum('ga,gb,iab,gx,ig,g,g->ix', Gv, Gv, qm_quads, - Gv, sinGvRqm, zcosGvRmm, Gpref, optimize=p) / 3 - qm_ewg_grad -= lib.einsum('ga,gb,iab,gx,ig,g,g->ix', Gv, Gv, qm_quads, - Gv, cosGvRqm, zsinGvRmm, Gpref, optimize=p) / 3 + qm_ewg_grad += lib.einsum('ig,gx->ix', TGGqm*sinGvRqm, Gv*(zcosGvRmm*Gpref)[:,None]) / 3 + qm_ewg_grad -= lib.einsum('ig,gx->ix', TGGqm*cosGvRqm, Gv*(zsinGvRmm*Gpref)[:,None]) / 3 if with_mm: - p = ['einsum_path', (1, 3), (0, 2), (0, 2), (0, 1)] - mm_ewg_grad += lib.einsum('g,j,gx,jg,g->jx', TGGcosGvRqm, mm_charges, Gv, sinGvRmm, Gpref, optimize=p) / 3 - mm_ewg_grad -= lib.einsum('g,j,gx,jg,g->jx', TGGsinGvRqm, mm_charges, Gv, cosGvRmm, Gpref, optimize=p) / 3 + mm_ewg_grad += mm_charges[:,None] * lib.einsum('ig,gx->ix', sinGvRmm, Gv*(TGGcosGvRqm*Gpref)[:,None]) / 3 + mm_ewg_grad -= mm_charges[:,None] * lib.einsum('ig,gx->ix', cosGvRmm, Gv*(TGGsinGvRqm*Gpref)[:,None]) / 3 # qm pc - qm pc - p = ['einsum_path', (3, 4), (1, 3), (1, 2), (0, 1)] - qm_ewg_grad -= lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, sinGvRqm, zcosGvRqm, Gpref, optimize=p) - qm_ewg_grad += lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, cosGvRqm, zsinGvRqm, Gpref, optimize=p) + qm_ewg_grad -= qm_charges[:,None] * lib.einsum('ig,gx->ix', sinGvRqm, Gv*(zcosGvRqm*Gpref)[:,None]) + qm_ewg_grad += qm_charges[:,None] * lib.einsum('ig,gx->ix', cosGvRqm, Gv*(zsinGvRqm*Gpref)[:,None]) # qm pc - qm dip - qm_ewg_grad += lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, cosGvRqm, DGcosGvRqm, Gpref, optimize=p) - qm_ewg_grad += lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, sinGvRqm, DGsinGvRqm, Gpref, optimize=p) - p = ['einsum_path', (3, 5), (1, 4), (1, 3), (1, 2), (0, 1)] - qm_ewg_grad -= lib.einsum('ja,ga,gx,g,jg,g->jx', qm_dipoles, Gv, Gv, zsinGvRqm, sinGvRqm, Gpref, optimize=p) - qm_ewg_grad -= lib.einsum('ja,ga,gx,g,jg,g->jx', qm_dipoles, Gv, Gv, zcosGvRqm, cosGvRqm, Gpref, optimize=p) + qm_ewg_grad += qm_charges[:,None] * lib.einsum('ig,gx->ix', cosGvRqm, Gv*(DGcosGvRqm*Gpref)[:,None]) + qm_ewg_grad += qm_charges[:,None] * lib.einsum('ig,gx->ix', sinGvRqm, Gv*(DGsinGvRqm*Gpref)[:,None]) + qm_ewg_grad -= lib.einsum('ig,gx->ix', DGqm*sinGvRqm, Gv*(zsinGvRqm*Gpref)[:,None]) + qm_ewg_grad -= lib.einsum('ig,gx->ix', DGqm*cosGvRqm, Gv*(zcosGvRqm*Gpref)[:,None]) # qm dip - qm dip - p = ['einsum_path', (4, 5), (1, 4), (1, 3), (1, 2), (0, 1)] - qm_ewg_grad -= lib.einsum('ia,ga,gx,ig,g,g->ix', qm_dipoles, Gv, Gv, sinGvRqm, DGcosGvRqm, Gpref, optimize=p) - qm_ewg_grad += lib.einsum('ia,ga,gx,ig,g,g->ix', qm_dipoles, Gv, Gv, cosGvRqm, DGsinGvRqm, Gpref, optimize=p) + qm_ewg_grad -= lib.einsum('ig,gx->ix', DGqm*sinGvRqm, Gv*(DGcosGvRqm*Gpref)[:,None]) + qm_ewg_grad += lib.einsum('ig,gx->ix', DGqm*cosGvRqm, Gv*(DGsinGvRqm*Gpref)[:,None]) # qm pc - qm quad - p = ['einsum_path', (3, 4), (1, 3), (1, 2), (0, 1)] - qm_ewg_grad += lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, sinGvRqm, TGGcosGvRqm, Gpref, optimize=p) / 3 - qm_ewg_grad -= lib.einsum('i,gx,ig,g,g->ix', qm_charges, Gv, cosGvRqm, TGGsinGvRqm, Gpref, optimize=p) / 3 - p = ['einsum_path', (4, 6), (1, 5), (1, 2), (2, 3), (1, 2), (0, 1)] - qm_ewg_grad += lib.einsum('jab,ga,gb,gx,g,jg,g->jx', qm_quads, Gv, Gv, - Gv, zcosGvRqm, sinGvRqm, Gpref, optimize=p) / 3 - qm_ewg_grad -= lib.einsum('jab,ga,gb,gx,g,jg,g->jx', qm_quads, Gv, Gv, - Gv, zsinGvRqm, cosGvRqm, Gpref, optimize=p) / 3 + qm_ewg_grad += qm_charges[:,None] * lib.einsum('ig,gx->ix', sinGvRqm, Gv*(TGGcosGvRqm*Gpref)[:,None]) / 3 + qm_ewg_grad -= qm_charges[:,None] * lib.einsum('ig,gx->ix', cosGvRqm, Gv*(TGGsinGvRqm*Gpref)[:,None]) / 3 + qm_ewg_grad += lib.einsum('ig,gx->ix', TGGqm*sinGvRqm, Gv*(zcosGvRqm*Gpref)[:,None]) / 3 + qm_ewg_grad -= lib.einsum('ig,gx->ix', TGGqm*cosGvRqm, Gv*(zsinGvRqm*Gpref)[:,None]) / 3 logger.timer(self, 'grad_ewald k-space', *cput2) logger.timer(self, 'grad_ewald', *cput0) diff --git a/pyscf/qmmm/pbc/mm_mole.py b/pyscf/qmmm/pbc/mm_mole.py index d1d4d9533e..fbe93e5cbc 100644 --- a/pyscf/qmmm/pbc/mm_mole.py +++ b/pyscf/qmmm/pbc/mm_mole.py @@ -305,13 +305,11 @@ def get_ewald_pot(self, coords1, coords2=None, charges2=None): ewg0 = lib.einsum('ig,g,g->i', cosGvR1, zcosGvR2, Gpref) ewg0 += lib.einsum('ig,g,g->i', sinGvR1, zsinGvR2, Gpref) # qm dip - mm pc - p = ['einsum_path', (2, 3), (0, 2), (0, 1)] - ewg1 = lib.einsum('gx,ig,g,g->ix', Gv, cosGvR1, zsinGvR2, Gpref, optimize=p) - ewg1 -= lib.einsum('gx,ig,g,g->ix', Gv, sinGvR1, zcosGvR2, Gpref, optimize=p) + ewg1 = lib.einsum('gx,ig->ix', Gv*(zsinGvR2*Gpref)[:,None], cosGvR1) + ewg1 -= lib.einsum('gx,ig->ix', Gv*(zcosGvR2*Gpref)[:,None], sinGvR1) # qm quad - mm pc - p = ['einsum_path', (3, 4), (0, 3), (0, 2), (0, 1)] - ewg2 = -lib.einsum('gx,gy,ig,g,g->ixy', Gv, Gv, cosGvR1, zcosGvR2, Gpref, optimize=p) - ewg2 += -lib.einsum('gx,gy,ig,g,g->ixy', Gv, Gv, sinGvR1, zsinGvR2, Gpref, optimize=p) + ewg2 = -lib.einsum('ig,gx,gy->ixy', cosGvR1*(zcosGvR2*Gpref)[None,:], Gv, Gv) + ewg2 += -lib.einsum('ig,gx,gy->ixy', sinGvR1*(zsinGvR2*Gpref)[None,:], Gv, Gv) ewg2 /= 3 else: # qm pc - qm pc diff --git a/pyscf/scf/__init__.py b/pyscf/scf/__init__.py index 0dca602af5..07531aff45 100644 --- a/pyscf/scf/__init__.py +++ b/pyscf/scf/__init__.py @@ -36,11 +36,12 @@ chkfile : str checkpoint file to save MOs, orbital energies etc. conv_tol : float - converge threshold. Default is 1e-10 + converge threshold. Default is 1e-9 max_cycle : int max number of iterations. Default is 50 init_guess : str - initial guess method. It can be one of 'minao', 'atom', '1e', 'chkfile'. + initial guess method. It can be one of 'minao', 'atom', 'huckel', + 'mod_huckel', '1e', 'hcore', 'sap', 'chkfile'. Default is 'minao' DIIS : class listed in :mod:`scf.diis` Default is :class:`diis.SCF_DIIS`. Set it to None/False to turn off DIIS. diff --git a/pyscf/scf/_vhf.py b/pyscf/scf/_vhf.py index 65833914bf..98fb062b94 100644 --- a/pyscf/scf/_vhf.py +++ b/pyscf/scf/_vhf.py @@ -15,15 +15,15 @@ import sys import ctypes -import _ctypes import numpy from pyscf import lib from pyscf import gto from pyscf.gto.moleintor import make_cintopt, make_loc, ascint3 libcvhf = lib.load_library('libcvhf') + def _fpointer(name): - return ctypes.c_void_p(_ctypes.dlsym(libcvhf._handle, name)) + return ctypes.cast(getattr(libcvhf, name), ctypes.c_void_p) class VHFOpt: def __init__(self, mol, intor=None, diff --git a/pyscf/scf/dhf.py b/pyscf/scf/dhf.py index 1703ceab0d..18e92fb606 100644 --- a/pyscf/scf/dhf.py +++ b/pyscf/scf/dhf.py @@ -842,8 +842,39 @@ def __init__(self, mol): raise RuntimeError('zquatev library is required to perform Kramers-restricted DHF') UHF.__init__(self, mol) + def check_linear_dependency(self, s, verbose=None): + log = logger.new_logger(self, verbose) + idx = _kramers_pair_sort_ao_idx(self.mol, four_component=True) + s = s[idx[:,None], idx] + e, v = zquatev.eigh(s) + if log is not None: + abs_e = abs(e) + emax = abs_e.max() + emin = abs_e.min() + c = emax / emin + log.debug('cond(S) = %s', c) + if c > 1e10: + log.warn('Singularity detected in the overlap matrix. ' + 'SCF may be inaccurate and difficult to converge.') + + if hf.remove_overlap_zero_eigenvalue: + mask = e > hf.overlap_zero_eigenvalue_threshold + x = v[:,mask] / numpy.sqrt(e[mask]) + else: + x = v / numpy.sqrt(e) + x1 = numpy.empty_like(x) + x1[idx] = x + return x1 + def _eigh(self, h, s, overwrite=False, x=None): - return zquatev.solve_KR_FCSCE(self.mol, h, s) + if x is None: + if h.dtype != s.dtype: + s = s.astype(h.dtype) + return zquatev.solve_KR_FCSCE(self.mol, h, s) + else: + h = x.conj().T.dot(h).dot(x) + e, c = zquatev.eigh(h, iop=1) + return e, x.dot(c) def x2c1e(self): from pyscf.x2c import x2c @@ -1110,26 +1141,13 @@ def set_dm(self, dm, atm, bas, env): mol._bas.ctypes, ctypes.c_int(nbas), mol._env.ctypes) self.dm_cond = dm_cond - -if __name__ == '__main__': - import pyscf.gto - mol = pyscf.gto.Mole() - mol.verbose = 5 - mol.output = 'out_dhf' - - mol.atom.extend([['He', (0.,0.,0.)], ]) - mol.basis = { - 'He': [(0, 0, (1, 1)), - (0, 0, (3, 1)), - (1, 0, (1, 1)), ]} - mol.build() - - ############## - # SCF result - method = UHF(mol) - energy = method.scf() #-2.38146942868 - print(energy) - method.with_gaunt = True - print(method.scf()) # -2.38138339005 - method.with_breit = True - print(method.scf()) # -2.38138339005 +def _kramers_pair_sort_ao_idx(mol, four_component=True): + trmaps = mol.time_reversal_map() + idxA = numpy.where(trmaps > 0)[0] + idxB = trmaps[idxA] - 1 + if four_component: + n = trmaps.size + idx = numpy.hstack((idxA,idxA+n,idxB,idxB+n)) + else: + idx = numpy.hstack((idxA,idxB)) + return idx diff --git a/pyscf/scf/dispersion.py b/pyscf/scf/dispersion.py index 1c4a86f01c..91080c709e 100644 --- a/pyscf/scf/dispersion.py +++ b/pyscf/scf/dispersion.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright 2014-2023 The PySCF Developers. All Rights Reserved. +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ # limitations under the License. # # Author: Xiaojie Wu +# modified by Jiashu Liang # ''' dispersion correction for HF and DFT ''' -import warnings from functools import lru_cache from pyscf.lib import logger from pyscf import scf @@ -46,6 +46,11 @@ 'wb97m-d3bj': ('wb97m-v', False, 'd3bj'), 'b97m-d3bj': ('b97m-v', False, 'd3bj'), 'wb97x-d3bj': ('wb97x-v', False, 'd3bj'), + 'wb97x-3c': ('wb97x-v', False, 'd4:wb97x-3c'), + # CF22D is parameterized together with its D3 (zero-damping) dispersion + # correction, so it is enabled by default. The cf22d damping parameters + # are shipped with simple-dftd3 (>=1.2.1) under zero damping. + 'cf22d': ('cf22d', '', 'd3zero'), } # These xc functionals are not supported yet @@ -71,7 +76,9 @@ def parse_dft(xc_code): return _white_list[method_lower] if method_lower.endswith('-3c'): - raise NotImplementedError('*-3c methods are not supported yet.') + if method_lower == "wb97x-3c": + return _white_list[method_lower] + raise NotImplementedError('Only wb97x-3c is supported for now. Other 3c methods are not supported yet.') if '-d3' in method_lower or '-d4' in method_lower: xc, disp = method_lower.split('-') @@ -81,39 +88,112 @@ def parse_dft(xc_code): return xc, '', disp @lru_cache(128) -def parse_disp(dft_method): +def parse_disp(dft_method=None, disp=None): '''Decode the disp parameters based on the xc code. - Returns xc_code_for_dftd3, disp_version, with_3body - Example: b3lyp-d3bj2b -> (b3lyp, d3bj, False) - wb97x-d3bj -> (wb97x, d3bj, False) + The logic for determining the dispersion parameters is as follows: + 1. If `disp` is specified, it takes precedence. + - If `disp` contains ':', it is parsed as `disp_version:method`. + - Otherwise, the method is derived from `dft_method`. + 2. If `disp` is not specified, the dispersion settings are inferred from `dft_method`. + + The `with_3body` flag is determined by the dispersion version suffix: + - '2b' suffix -> False (2-body only) + - 'atm' suffix -> True (Axilrod-Teller-Muto 3-body term) + - 'd4' -> True (D4 always includes 3-body) + - 'd3' (without suffix) -> False + + Args: + dft_method (str): The DFT method name (e.g., 'b3lyp', 'wb97x-d3bj'). + disp (str): Explicit dispersion version (e.g., 'd3bj', 'd3bjatm'). + + Returns: + tuple: (disp_method, disp_version, with_3body) + + Examples: + >>> parse_disp('b3lyp-d3bj2b') + ('b3lyp', 'd3bj', False) + >>> parse_disp('b3lyp-d3bjatm') + ('b3lyp', 'd3bj', True) + >>> parse_disp('wb97x-d3bj') + ('wb97x', 'd3bj', False) + >>> parse_disp(None, 'd4:wb97x-3c') + ('wb97x-3c', 'd4', True) ''' - if dft_method == 'hf': - return 'hf', None, False - dft_lower = dft_method.lower() - xc, nlc, disp = parse_dft(dft_lower) - if dft_lower in XC_MAP: - xc = XC_MAP[dft_lower] + # If anything not specified, return None + if dft_method is None and disp is None: + return None, None, False + + def process_3body(disp_version): + if not disp_version: + return disp_version, False + if disp_version.endswith('2b'): + return disp_version[:-2], False + elif disp_version.endswith('atm'): + return disp_version[:-3], True + elif 'd4' in disp_version: + return disp_version, True + elif 'd3' in disp_version: + return disp_version, False + else: + raise ValueError(f"Unknown dispersion version {disp_version} in parse_disp.") + + if dft_method is not None: + dft_lower = dft_method.lower() + xc, _, disp_from_dft = parse_dft(dft_lower) + if xc in XC_MAP: + xc = XC_MAP[xc] + + # Use disp if specified + # returned method will be the latter part of disp if disp is a string with colon, otherwise, use xc + if disp is not None: + if ":" in disp: + disp_version, method = disp.split(':') + disp_version, with_3body = process_3body(disp_version) + return method, disp_version, with_3body + elif dft_method is not None: + disp, with_3body = process_3body(disp) + return xc, disp, with_3body + else: + raise ValueError(f"the method used in dispersion {disp} is not specified.") + + # otherwise, use disp_from_dft + if disp_from_dft is None: + return None, None, False + + if ":" in disp_from_dft: + disp_version, method = disp_from_dft.split(':') + disp_version, with_3body = process_3body(disp_version) + return method, disp_version, with_3body + + disp_from_dft, with_3body = process_3body(disp_from_dft) + return xc, disp_from_dft, with_3body - if disp is None: - return xc, None, False - disp_lower = disp.lower() - if disp_lower.endswith('2b'): - return xc, disp_lower.replace('2b', ''), False - elif disp_lower.endswith('atm'): - return xc, disp_lower.replace('atm', ''), True - else: - return xc, disp_lower, False def check_disp(mf, disp=None): - '''Check whether to apply dispersion correction based on the xc attribute. - If dispersion is allowed, return the DFTD3 disp version, such as d3bj, - d3zero, d4. + '''Check if dispersion correction should be applied and if the version is supported. + + The function determines the dispersion method from the SCF object (`mf`) or the + explicit `disp` argument. It then verifies if the determined dispersion version + is supported in `DISP_VERSIONS`. + + Args: + mf (scf.hf.SCF): The SCF object (HF or DFT). + disp (str or bool, optional): Dispersion version to check. + If None, uses `mf.disp`. + If False, returns False immediately. + + Returns: + bool: True if dispersion is enabled and supported. + False if dispersion is disabled (disp=False) or not specified/implied. + + Raises: + ValueError: If the dispersion version is not supported. ''' if disp is None: - disp = mf.disp - if disp == 0: # disp = False + disp = getattr(mf, 'disp', None) + if disp is False or disp == 0: return False # To prevent mf.do_disp() triggering the SCF.__getattr__ method, do not use @@ -123,38 +203,60 @@ def check_disp(mf, disp=None): else: # Set the disp method for both HF and MCSCF to 'hf' method = 'hf' - disp_version = parse_disp(method)[1] + disp_version = parse_disp(method, disp)[1] - if disp is None: # Using the disp version decoded from the mf.xc attribute - if disp_version is None: - return False - elif disp_version is None: # Using the version specified by mf.disp - disp_version = disp - elif disp != disp_version: - raise RuntimeError(f'mf.disp {disp} conflicts with mf.xc {method}') + if disp_version is None: + return False if disp_version not in DISP_VERSIONS: - raise NotImplementedError - return disp_version + raise ValueError(f"Unknown dispersion version {disp_version}.") + return True def get_dispersion(mf, disp=None, with_3body=None, verbose=None): - disp_version = check_disp(mf, disp) - if not disp_version: + ''' + Calculate the dispersion correction energy. + + Args: + mf : SCF object + The SCF object. + disp : str, optional + The dispersion correction version. Default is None. + Format examples: "d3", "d3bj", "d4", "d3bj2b", "d3bjatm", "d4:wb97x-3c", etc. + Note: In "d4:wb97x-3c", the latter part follows the method id of simple-dftd3 and dftd4 repo. + with_3body : bool, optional + Whether to include the 3-body term. Default is None. + verbose : int, optional + The verbose level. Default is None. + + Returns: + float + The dispersion correction energy. + + Note: + Priority of `disp` and `with_3body`: + 1. Function arguments (disp, with_3body) + 2. mf.disp (if available) + 3. mf.xc (parsed from the functional name) + ''' + if not check_disp(mf, disp): return 0. + if disp is None: + disp = getattr(mf, 'disp', None) + try: from pyscf.dispersion import dftd3, dftd4 except ImportError: print('dftd3 and dftd4 not available. Install them with `pip install pyscf-dispersion`') raise - mol = mf.mol - method = getattr(mf, 'xc', 'hf') - method, _, disp_with_3body = parse_disp(method) - - if with_3body is not None: + dft_method = getattr(mf, 'xc', 'hf') + method, disp_version, disp_with_3body = parse_disp(dft_method, disp) + if with_3body is None: with_3body = disp_with_3body + mol = mf.mol + # for dftd3 if disp_version[:2].upper() == 'D3': logger.info(mf, "Calc dispersion correction with DFTD3.") diff --git a/pyscf/scf/hf.py b/pyscf/scf/hf.py index 3fcab01f8d..d63dad977e 100644 --- a/pyscf/scf/hf.py +++ b/pyscf/scf/hf.py @@ -21,8 +21,6 @@ ''' import sys -import tempfile - from functools import reduce import numpy import scipy.linalg @@ -766,7 +764,7 @@ def get_init_guess(mol, key='minao', **kwargs): Kwargs: key : str - One of 'minao', 'atom', 'huckel', 'hcore', '1e', 'sap', 'chkfile'. + One of 'minao', 'atom', 'huckel', 'mod_huckel', 'hcore', '1e', 'sap', 'chkfile'. ''' return RHF(mol).get_init_guess(mol, key, **kwargs) @@ -1654,7 +1652,8 @@ class SCF(lib.StreamObject): be skipped and the kernel function will compute only the total energy based on the initial guess. Default value is 50. init_guess : str - initial guess method. It can be one of 'minao', 'atom', 'huckel', 'hcore', '1e', 'sap', 'chkfile'. + initial guess method. It can be one of 'minao', 'atom', 'huckel', + 'mod_huckel', 'hcore', '1e', 'sap', 'chkfile'. Default is 'minao' sap_basis : str or dict basis for SAP initial guess, either filename or path as str or @@ -1771,7 +1770,7 @@ def __init__(self, mol): else: # the chkfile will be removed automatically, to save the chkfile, assign a # filename to self.chkfile - self._chkfile = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + self._chkfile = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) self.chkfile = self._chkfile.name ################################################## @@ -1946,7 +1945,7 @@ def init_guess_by_huckel(self, mol=None): return self.make_rdm1(mo_coeff, mo_occ) @lib.with_doc(init_guess_by_mod_huckel.__doc__) - def init_guess_by_mod_huckel(self, updated_rule, mol=None): + def init_guess_by_mod_huckel(self, mol=None): if mol is None: mol = self.mol logger.info(self, '''Initial guess from on-the-fly Huckel, doi:10.1021/acs.jctc.8b01089, employing the updated GWH rule from doi:10.1021/ja00480a005.''') diff --git a/pyscf/scf/test/test_dhf.py b/pyscf/scf/test/test_dhf.py index 2db8b48017..280ca4dbcd 100644 --- a/pyscf/scf/test/test_dhf.py +++ b/pyscf/scf/test/test_dhf.py @@ -100,25 +100,25 @@ def test_get_grad(self): g = mf.get_grad(mf.mo_coeff, mf.mo_occ) self.assertAlmostEqual(abs(g).max(), 0, 5) - if scf.dhf.zquatev: - def test_rhf(self): - mol = gto.M( - verbose = 5, - output = '/dev/null', - atom = ''' - O 0 0 0 - H 0 -0.757 0.587 - H 0 0.757 0.587''', - basis = '631g', - ) - mf = scf.dhf.RHF(mol) - mf.with_ssss = False - mf.conv_tol_grad = 1e-5 - self.assertAlmostEqual(mf.kernel(), -76.03852477545016, 8) - - mf.ssss_approx = None - mf.conv_tol_grad = 1e-5 - self.assertAlmostEqual(mf.kernel(), -76.03852480744785, 8) + @unittest.skipIf(scf.dhf.zquatev is None, 'requires zquatev') + def test_rhf(self): + mol = gto.M( + verbose = 5, + output = '/dev/null', + atom = ''' + O 0 0 0 + H 0 -0.757 0.587 + H 0 0.757 0.587''', + basis = '631g', + ) + mf = scf.dhf.RHF(mol) + mf.with_ssss = False + mf.conv_tol_grad = 1e-5 + self.assertAlmostEqual(mf.kernel(), -76.03852477545016, 8) + + mf.ssss_approx = None + mf.conv_tol_grad = 1e-5 + self.assertAlmostEqual(mf.kernel(), -76.03852480744785, 8) def test_get_veff(self): n4c = mol.nao_2c() * 2 @@ -326,6 +326,23 @@ def test_h2_sto3g(self): e = mol.DHF().kernel() self.assertAlmostEqual(e, -1.066122658859047, 12) + def test_he_with_gaunt(self): + mol = gto.M( + atom=[['He', (0.,0.,0.)]], + basis = { + 'He': [(0, 0, (1, 1)), + (0, 0, (3, 1)), + (1, 0, (1, 1)), ]}) + method = mol.DHF() + energy = method.scf() + self.assertAlmostEqual(energy, -2.38146942868, 8) + method.with_gaunt = True + energy = method.scf() + self.assertAlmostEqual(energy, -2.38138339005, 8) + method.with_breit = True + energy = method.scf() + self.assertAlmostEqual(energy, -2.38138339005, 8) + def _fill_gaunt(mol, erig): n2c = erig.shape[0] n4c = n2c * 2 diff --git a/pyscf/scf/test/test_diffuse_orbital.py b/pyscf/scf/test/test_diffuse_orbital.py index 2ac3420d15..a9ca8eedac 100644 --- a/pyscf/scf/test/test_diffuse_orbital.py +++ b/pyscf/scf/test/test_diffuse_orbital.py @@ -19,6 +19,11 @@ from pyscf import lib from pyscf import scf, dft +try: + from pyscf.dispersion import dftd3 +except (ImportError, OSError): + dftd3 = None + def setUpModule(): global mol mol = pyscf.M( @@ -97,6 +102,23 @@ def test_rhf_soscf(self): [[ 2.44273951e-01, 2.44377010e-02, 6.79546462e-17], [-2.44288315e-01, -2.44313901e-02, -1.66959137e-16]])).max() < 1e-5 + @unittest.skipIf(dftd3 is None, "dftd3 not available") + def test_rks_soscf(self): + mf = dft.RKS(mol, xc = "wB97M-d3bj") + mf.grids.atom_grid = (99,590) + mf.conv_tol = 1e-10 + mf = mf.newton() + energy = mf.kernel() + assert mf.converged + assert np.abs(energy - -7.773544875779531) < 1e-5 + + gobj = mf.Gradients() + gradient = gobj.kernel() + assert np.max(np.abs(gradient - np.array([ + [ 2.44614610e-01, 2.44653881e-02, -3.14001231e-18], + [-2.44641034e-01, -2.44569088e-02, -6.41480825e-18], + ]))) < 1e-5 + def test_uhf(self): mf = dft.RKS(mol, xc = "PBE") mf.grids.atom_grid = (50,194) diff --git a/pyscf/scf/test/test_diis.py b/pyscf/scf/test/test_diis.py index afa53d989b..524f220e1d 100644 --- a/pyscf/scf/test/test_diis.py +++ b/pyscf/scf/test/test_diis.py @@ -14,9 +14,9 @@ # limitations under the License. import unittest -import tempfile import numpy from pyscf import gto +from pyscf import lib from pyscf import scf from pyscf.scf import diis @@ -82,7 +82,7 @@ def test_diis_restart(self): H 0 1.757 1.587''', basis = '631g', ) - tmpf = tempfile.NamedTemporaryFile() + tmpf = lib.NamedTemporaryFile() mf = scf.RHF(mol) mf.diis_file = tmpf.name eref = mf.kernel() diff --git a/pyscf/scf/test/test_dispersion_logic.py b/pyscf/scf/test/test_dispersion_logic.py new file mode 100644 index 0000000000..3e22ae3edd --- /dev/null +++ b/pyscf/scf/test/test_dispersion_logic.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python +# Copyright 2014-2026 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from pyscf import gto, scf +from pyscf.scf import dispersion + + +class KnownKS(scf.hf.KohnShamDFT): + def __init__(self, xc='b3lyp'): + self.xc = xc + self.disp = None + + +class KnownHF(scf.hf.SCF): + def __init__(self): + self.disp = None + + +class TestDispersionLogic(unittest.TestCase): + def test_parse_disp_none(self): + # Case 1: All None + self.assertEqual(dispersion.parse_disp(None, None), (None, None, False)) + + def test_parse_disp_explicit(self): + # Case 2: Explicit disp takes precedence + # b3lyp normally has no disp. + self.assertEqual(dispersion.parse_disp('b3lyp', 'd3bj'), ('b3lyp', 'd3bj', False)) + + # disp with colon override method + self.assertEqual(dispersion.parse_disp(None, 'd4:wb97x-3c'), ('wb97x-3c', 'd4', True)) + self.assertEqual(dispersion.parse_disp('b3lyp', 'd3bj:pbe'), ('pbe', 'd3bj', False)) + + # disp with suffix + self.assertEqual(dispersion.parse_disp('b3lyp', 'd3bj2b'), ('b3lyp', 'd3bj', False)) + self.assertEqual(dispersion.parse_disp('b3lyp', 'd3bjatm'), ('b3lyp', 'd3bj', True)) + + # d4 always implies 3body + self.assertEqual(dispersion.parse_disp('b3lyp', 'd4'), ('b3lyp', 'd4', True)) + + def test_parse_disp_from_method(self): + # Case 3: Infer from method + # b3lyp -> no disp + self.assertEqual(dispersion.parse_disp('b3lyp'), (None, None, False)) + + # wb97x-d3bj -> d3bj + self.assertEqual(dispersion.parse_disp('wb97x-d3bj'), ('wb97x', 'd3bj', False)) + + # wb97x-d4s -> d4s + self.assertEqual(dispersion.parse_disp('wb97x-d4s'), ('wb97x', 'd4s', True)) + + # wb97x-3c -> d4, 3body=True (from whitelist) + self.assertEqual(dispersion.parse_disp('wb97x-3c'), ('wb97x-3c', 'd4', True)) + + def test_parse_disp_errors(self): + # Unknown disp version + with self.assertRaises(ValueError): + dispersion.parse_disp('b3lyp', 'unknown_ver') + + # Disp specified but method unknown/missing (if disp string doesn't contain colon) + # Actually parse_disp(None, 'd3bj') -> raises ValueError "the method used in dispersion d3bj is not specified." + with self.assertRaises(ValueError): + dispersion.parse_disp(None, 'd3bj') + + def test_check_disp(self): + mol = gto.M(atom='H 0 0 0; H 0 0 1') + + # 1. RHF object (no .xc) + mf_hf = scf.RHF(mol) + self.assertFalse(dispersion.check_disp(mf_hf)) + + # If mf.disp = None + mf_hf.disp = None + # parse_disp('hf', None) -> ('hf', None, False) -> check_disp returns False + self.assertFalse(dispersion.check_disp(mf_hf)) + + # If we set mf.disp = 'd3bj' + mf_hf.disp = 'd3bj' + self.assertTrue(dispersion.check_disp(mf_hf)) + + # 2. KohnShamDFT object (has .xc) + mf_dft = KnownKS() + mf_dft.xc = 'b3lyp' + mf_dft.disp = None + + # b3lyp -> no disp -> False + self.assertFalse(dispersion.check_disp(mf_dft)) + + # Explicit disp + self.assertTrue(dispersion.check_disp(mf_dft, disp='d3bj')) + + # Explicit disp=False + self.assertFalse(dispersion.check_disp(mf_dft, disp=False)) + + # Implicit disp from method + mf_dft.xc = 'wb97x-d3bj' + self.assertTrue(dispersion.check_disp(mf_dft)) + + # Unsupported disp version + with self.assertRaises(ValueError): + dispersion.check_disp(mf_dft, disp='unsupported') + + +if __name__ == "__main__": + unittest.main() diff --git a/pyscf/scf/test/test_ghf.py b/pyscf/scf/test/test_ghf.py index e5ce7b393a..4639e7f89b 100644 --- a/pyscf/scf/test/test_ghf.py +++ b/pyscf/scf/test/test_ghf.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import numpy import scipy.linalg from functools import reduce @@ -41,7 +40,7 @@ def setUpModule(): ) mf = scf.GHF(mol) mf.conv_tol = 1e-12 - mf.chkfile = tempfile.NamedTemporaryFile().name + mf.chkfile = lib.NamedTemporaryFile().name mf.kernel() molsym = gto.M( @@ -57,8 +56,8 @@ def setUpModule(): mfsym = scf.GHF(molsym).run(conv_tol=1e-10) mol1 = gto.M(atom=mol.atom, basis='631g', spin=2, verbose=0) - mf_r = scf.RHF(mol1).run(conv_tol=1e-10, chkfile=tempfile.NamedTemporaryFile().name) - mf_u = scf.RHF(mol1).run(conv_tol=1e-10, chkfile=tempfile.NamedTemporaryFile().name) + mf_r = scf.RHF(mol1).run(conv_tol=1e-10, chkfile=lib.NamedTemporaryFile().name) + mf_u = scf.RHF(mol1).run(conv_tol=1e-10, chkfile=lib.NamedTemporaryFile().name) def tearDownModule(): global mol, mf, molsym, mfsym, mol1, mf_r, mf_u @@ -110,7 +109,7 @@ def test_init_guess_atom(self): self.assertAlmostEqual(lib.fp(dm[24:,24:])*2, 2.7821827416174094, 7) def test_init_guess_chk(self): - dm = mol.GHF(chkfile=tempfile.NamedTemporaryFile().name).get_init_guess(mol, key='chkfile') + dm = mol.GHF(chkfile=lib.NamedTemporaryFile().name).get_init_guess(mol, key='chkfile') self.assertEqual(dm.shape, (48,48)) self.assertAlmostEqual(lib.fp(dm), 1.8117584283411752, 5) diff --git a/pyscf/scf/test/test_h2o.py b/pyscf/scf/test/test_h2o.py index b545e177c7..1eb8d78b8b 100644 --- a/pyscf/scf/test/test_h2o.py +++ b/pyscf/scf/test/test_h2o.py @@ -19,7 +19,6 @@ import unittest import numpy import scipy.linalg -import tempfile from pyscf import lib from pyscf import gto from pyscf import scf @@ -196,7 +195,7 @@ def test_init_guess_minao(self): self.assertEqual(dm.mo_occ.size, dm.mo_coeff.shape[1]) s = scf.hf.get_ovlp(mol) occ, mo = scipy.linalg.eigh(dm, s, type=2) - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) scf.chkfile.dump_scf(mol, ftmp.name, 0, occ, mo, occ) self.assertAlmostEqual(numpy.linalg.norm(dm), 3.0334714065913508, 9) @@ -220,7 +219,7 @@ def test_init_guess_atom(self): self.assertEqual(dm.mo_occ.size, dm.mo_coeff.shape[1]) s = scf.hf.get_ovlp(mol) occ, mo = scipy.linalg.eigh(dm, s, type=2) - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) scf.chkfile.dump_scf(mol, ftmp.name, 0, occ, mo, occ) self.assertAlmostEqual(numpy.linalg.norm(dm), 3.041411845876416, 8) @@ -249,7 +248,7 @@ def test_init_guess_1e(self): self.assertEqual(dm.mo_occ.size, dm.mo_coeff.shape[1]) s = scf.hf.get_ovlp(mol) occ, mo = scipy.linalg.eigh(dm, s, type=2) - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) scf.chkfile.dump_scf(mol, ftmp.name, 0, occ, mo, occ, overwrite_mol=False) # dump_scf twice to test overwrite_mol scf.chkfile.dump_scf(mol, ftmp.name, 0, occ, mo, occ) @@ -275,7 +274,7 @@ def test_init_guess_1e(self): self.assertAlmostEqual(numpy.linalg.norm(dm1), 7.5925205205065422, 9) def test_init_guess_chkfile(self): - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) def save(HFclass): mf0 = HFclass(mol) mf0.chkfile = ftmp.name diff --git a/pyscf/scf/test/test_rhf.py b/pyscf/scf/test/test_rhf.py index 2334cdd248..c48acd8e05 100644 --- a/pyscf/scf/test/test_rhf.py +++ b/pyscf/scf/test/test_rhf.py @@ -18,22 +18,16 @@ import numpy import unittest -import tempfile from pyscf import lib from pyscf import gto from pyscf import scf +from pyscf.scf import _vhf from pyscf.scf import atom_hf -import sys try: - import dftd3 -except ImportError: - pass - -try: - import dftd4 -except ImportError: - pass + from pyscf.dispersion import dftd3, dftd4 +except (ImportError, OSError): + dftd3 = dftd4 = None def setUpModule(): global mol, mf, n2sym, n2mf, re_ecp1, re_ecp2 @@ -49,7 +43,7 @@ def setUpModule(): mf = scf.RHF(mol) mf.conv_tol = 1e-10 - mf.chkfile = tempfile.NamedTemporaryFile().name + mf.chkfile = lib.NamedTemporaryFile().name mf.kernel() n2sym = gto.M( @@ -229,7 +223,7 @@ def test_atom_hf_with_ecp(self): self.assertAlmostEqual(scf_result['Cu'][0], -194.92388639203045, 9) def test_init_guess_chk(self): - dm = mol.HF(chkfile=tempfile.NamedTemporaryFile().name).get_init_guess(mol, key='chkfile') + dm = mol.HF(chkfile=lib.NamedTemporaryFile().name).get_init_guess(mol, key='chkfile') self.assertAlmostEqual(lib.fp(dm), 2.5912875957299684, 5) dm = mf.get_init_guess(mol, key='chkfile') @@ -253,6 +247,12 @@ def test_init_guess_huckel(self): dm = scf.hf.RHF(mol).get_init_guess(mol, key='mod_huckel') self.assertAlmostEqual(lib.fp(dm), 3.233072986208057, 5) + # init_guess_by_mod_huckel should be callable without arguments, + # consistent with init_guess_by_huckel and the UHF/ROHF/GHF/DHF + # implementations. + dm = scf.hf.RHF(mol).init_guess_by_mod_huckel() + self.assertAlmostEqual(lib.fp(dm), 3.233072986208057, 5) + dm = scf.ROHF(mol).init_guess_by_mod_huckel() self.assertAlmostEqual(lib.fp(dm[0]), 3.233072986208057/2, 5) @@ -371,7 +371,7 @@ def test_analyze(self): def test_scf(self): self.assertAlmostEqual(mf.e_tot, -76.026765673119627, 9) - @unittest.skipIf('dispersion' not in sys.modules, "requires the dftd3 library") + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") def test_scf_d3(self): mf = scf.RHF(mol) mf.disp = 'd3bj' @@ -380,7 +380,7 @@ def test_scf_d3(self): e_tot = mf.kernel() self.assertAlmostEqual(e_tot, -76.03127458778653, 9) - @unittest.skipIf('dispersion' not in sys.modules, "requires the dftd4 library") + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") def test_scf_d4(self): mf = scf.RHF(mol) mf.disp = 'd4' @@ -763,12 +763,12 @@ def test_as_scanner(self): self.assertAlmostEqual(mf_scanner(mol.atom), -76.075408156235909, 9) mol1 = gto.M(atom='H 0 0 0; H 0 0 .9', basis='cc-pvdz') - ref = mol1.RHF(chkfile=tempfile.NamedTemporaryFile().name).x2c().density_fit().run() + ref = mol1.RHF(chkfile=lib.NamedTemporaryFile().name).x2c().density_fit().run() e1 = mf_scanner('H 0 0 0; H 0 0 .9') self.assertAlmostEqual(e1, -1.116394048204042, 9) self.assertAlmostEqual(e1, ref.e_tot, 9) - mfs = mol1.RHF(chkfile=tempfile.NamedTemporaryFile().name).as_scanner() + mfs = mol1.RHF(chkfile=lib.NamedTemporaryFile().name).as_scanner() mfs.__dict__.update(scf.chkfile.load(ref.chkfile, 'scf')) e = mfs(mol1) self.assertAlmostEqual(e, -1.1163913004438035, 9) @@ -901,10 +901,10 @@ def test_get_vj(self): self.assertAlmostEqual(numpy.linalg.norm(vj1), 77.035779188661465, 9) orig = mf1.opt.prescreen - self.assertEqual(orig, scf._vhf._fpointer('CVHFnrs8_prescreen').value) + self.assertEqual(orig, _vhf._fpointer('CVHFnrs8_prescreen').value) mf1.opt.prescreen = orig mf1.opt.prescreen = 'CVHFnoscreen' - self.assertEqual(mf1.opt.prescreen, scf._vhf._fpointer('CVHFnoscreen').value) + self.assertEqual(mf1.opt.prescreen, _vhf._fpointer('CVHFnoscreen').value) # issue #1114 dm = numpy.eye(nao, dtype=int) diff --git a/pyscf/scf/test/test_uhf.py b/pyscf/scf/test/test_uhf.py index e94f0d52e5..e0163e47ff 100644 --- a/pyscf/scf/test/test_uhf.py +++ b/pyscf/scf/test/test_uhf.py @@ -23,6 +23,25 @@ from pyscf import gto from pyscf import scf +try: + from pyscf.dispersion import dftd3, dftd4 +except (ImportError, OSError): + dftd3 = dftd4 = None + + +def make_disp_mol(): + return gto.M( + atom=''' +C -0.65830719, 0.61123287, -0.00800148 +C 0.73685281, 0.61123287, -0.00800148 +C 1.43439081, 1.81898387, -0.00800148 +C 0.73673681, 3.02749287, -0.00920048 +''', + basis='ccpvtz', + charge=1, + spin=1, + output='/dev/null') + def setUpModule(): global mol, mf, n2sym, n2mf, mol2, mf2, bak mol = gto.M( @@ -120,6 +139,44 @@ def test_init_guess_sap(self): dm2 = scf.uhf.UHF(mol).get_init_guess(mol, key='sap') self.assertAlmostEqual(lib.fp(dm2), 0.6440359527450615, 7) + def test_break_spin_symm_mix(self): + # H2 at equilibrium: verify DM properties of the breaksym='mix' initial guess + mol_h2 = gto.M(atom='H 0 0 0; H 0 0 1.4', basis='sto-3g', spin=0, verbose=0) + s = mol_h2.intor_symmetric('int1e_ovlp') + + mf_h2 = scf.UHF(mol_h2) + dm = mf_h2.init_guess_by_minao(mol_h2, breaksym='mix') + dma, dmb = dm + + # spin symmetry must be broken + self.assertFalse(numpy.allclose(dma, dmb)) + + # electron count preserved: Tr(S * DM) = N_elec per spin + self.assertAlmostEqual(numpy.einsum('ij,ji->', s, dma), 1.0, 5) + self.assertAlmostEqual(numpy.einsum('ij,ji->', s, dmb), 1.0, 5) + + # DMs must be positive semidefinite (physically valid density matrices) + self.assertTrue(numpy.all(numpy.linalg.eigvalsh(dma) > -1e-10)) + self.assertTrue(numpy.all(numpy.linalg.eigvalsh(dmb) > -1e-10)) + + def test_break_spin_symm_mix_h2_dissociation(self): + # At stretched H2 (well past the Coulson-Fischer point) UHF with + # breaksym='mix' should find a lower-energy broken-symmetry solution + # than RHF, with the unpaired electrons localised on separate atoms. + mol_h2 = gto.M(atom='H 0 0 0; H 0 0 4.0', basis='sto-3g', spin=0, verbose=0) + + e_rhf = scf.RHF(mol_h2).kernel() + + mf_uhf = scf.UHF(mol_h2) + mf_uhf.init_guess_breaksym = 'mix' + e_uhf = mf_uhf.kernel() + + self.assertTrue(mf_uhf.converged) + # broken-symmetry UHF must be lower than RHF at stretched geometry + self.assertLess(e_uhf, e_rhf) + # significant spin contamination expected ( -> 1 as R -> inf) + self.assertGreater(mf_uhf.spin_square()[0], 0.5) + def test_get_grad(self): g = mf2.get_grad(mf2.mo_coeff, mf2.mo_occ) self.assertAlmostEqual(abs(g).max(), 0, 6) @@ -152,6 +209,24 @@ def test_mulliken_spin_pop(self): def test_scf(self): self.assertAlmostEqual(mf.e_tot, -76.026765673119627, 9) + @unittest.skipIf(dftd3 is None, "requires the dftd3 library") + def test_uhf_d3bj(self): + mol = make_disp_mol() + mf = scf.UHF(mol) + mf.disp = 'd3bj' + e_disp = mf.get_dispersion() + print(e_disp) + self.assertAlmostEqual(e_disp, -0.030566786972, 9) + + @unittest.skipIf(dftd4 is None, "requires the dftd4 library") + def test_uhf_d4(self): + mol = make_disp_mol() + mf = scf.UHF(mol) + mf.disp = 'd4' + e_disp = mf.get_dispersion() + print(e_disp) + self.assertAlmostEqual(e_disp, -0.0096708308236, 9) + def test_scf_negative_spin(self): mol = gto.M(atom = ''' O 0 0 0 diff --git a/pyscf/scf/uhf.py b/pyscf/scf/uhf.py index fa71c9b1e9..7190be0be1 100644 --- a/pyscf/scf/uhf.py +++ b/pyscf/scf/uhf.py @@ -122,6 +122,57 @@ def _break_dm_spin_symm(mol, dm, breaksym=1): dmb = numpy.zeros_like(dma) for b0, b1, p0, p1 in mol.aoslice_by_atom(): dmb[...,p0:p1,p0:p1] = dma[...,p0:p1,p0:p1] + elif breaksym == 'mix': + # 45-degree HOMO-LUMO rotation mixes the frontier orbitals between alpha + # and beta spins while keeping them delocalized over the full molecule. + # Unlike breaksym=1 (which zeroes off-diagonal AO blocks and artificially + # localises charge onto atoms), this rotation preserves the full molecular + # orbital character and provides a smoother path away from the RHF fixed + # point, reducing the risk of converging back to the closed-shell solution. + mo_coeff = getattr(dm, 'mo_coeff', None) + mo_occ = getattr(dm, 'mo_occ', None) + if mo_coeff is not None and mo_occ is not None: + # Energy-ordered MOs (ascending): HOMO = last occupied, LUMO = first virtual. + mo_a = mo_coeff[0] + occ_a = mo_occ[0] + occ_b = mo_occ[1] + occ_idx = numpy.where(occ_a > 0.5)[0] + vir_idx = numpy.where(occ_a < 0.5)[0] + homo_idx = occ_idx[-1] + lumo_idx = vir_idx[0] + else: + # No MO info: build a MINAO restricted DM, construct the Fock matrix + # from it, and diagonalise once to get energy-ordered MOs. This costs + # one Fock build and one diagonalisation (no SCF iterations) but gives + # the true HOMO/LUMO rather than an arbitrary vector from the degenerate + # virtual subspace that a plain DM diagonalisation would produce. + rhf_tmp = hf.RHF(mol) + dm_minao = hf.init_guess_by_minao(mol) + fock = rhf_tmp.get_hcore() + rhf_tmp.get_veff(mol, dm_minao) + s1e = mol.intor_symmetric('int1e_ovlp') + _, mo_a = rhf_tmp.eig(fock, s1e) + mo_occ_rhf = rhf_tmp.get_occ(_, mo_a) + occ_a = (mo_occ_rhf > 1e-8).astype(numpy.double) + occ_b = occ_a.copy() + occ_idx = numpy.where(occ_a > 0.5)[0] + vir_idx = numpy.where(occ_a < 0.5)[0] + homo_idx = occ_idx[-1] + lumo_idx = vir_idx[0] + if len(occ_idx) > 0 and len(vir_idx) > 0: + homo = mo_a[:, homo_idx] + lumo = mo_a[:, lumo_idx] + c = numpy.sqrt(0.5) + # alpha HOMO -> (HOMO + LUMO)/sqrt(2) + mo_alpha = mo_a.copy() + mo_alpha[:, homo_idx] = c * (homo + lumo) + # beta HOMO -> (HOMO - LUMO)/sqrt(2) + mo_beta = mo_a.copy() + mo_beta[:, homo_idx] = c * (homo - lumo) + dma = numpy.dot(mo_alpha[:, occ_a > 0.5] * occ_a[occ_a > 0.5], + mo_alpha[:, occ_a > 0.5].conj().T) + dmb = numpy.dot(mo_beta[:, occ_b > 0.5] * occ_b[occ_b > 0.5], + mo_beta[:, occ_b > 0.5].conj().T) + else: # Adjust num. electrons for density matrices (issue #1839) # Get overlap matrix @@ -760,11 +811,15 @@ class UHF(hf.SCF): If given, freeze the number of (alpha,beta) electrons to the given value. level_shift : number or two-element list level shift (in Eh) for alpha and beta Fock if two-element list is given. - init_guess_breaksym : int - This configuration controls the algorithm used to break the spin - symmetry of the initial guess: - - 0 to disable symmetry breaking in the initial guess. - - 1 to use the default algorithm introduced in pyscf-1.7. + init_guess_breaksym : int or str + Controls how spin symmetry is broken in the initial guess: + - 0 to disable symmetry breaking. + - 1 (default) to use the atom-block algorithm introduced in pyscf-1.7. + - 'mix' to rotate the HOMO and LUMO by 45 degrees between alpha and + beta spins. Builds one MINAO Fock matrix and diagonalises it to get + energy-ordered MOs, then mixes: alpha HOMO -> (HOMO+LUMO)/sqrt(2), + beta HOMO -> (HOMO-LUMO)/sqrt(2). Preserves molecular delocalization and + gives a smoother symmetry break than mode 1. - 2 to adjust the num. electrons for spin-up and spin-down density matrices (issue #1839). Examples: diff --git a/pyscf/solvent/grad/pcm.py b/pyscf/solvent/grad/pcm.py index 1e93c38b11..7e81cbe954 100644 --- a/pyscf/solvent/grad/pcm.py +++ b/pyscf/solvent/grad/pcm.py @@ -220,6 +220,8 @@ def grad_qv(pcmobj, dm, q_sym = None): fakemol.cart = mol.cart v_nj = df.incore.aux_e2(mol, fakemol, intor=int3c2e_ip1, aosym='s1', cintopt=cintopt) dvj += numpy.einsum('xijk,ij,k->xi', v_nj, dm, q_sym[p0:p1]) + # Free up v_nj to stay within mem limits + del v_nj int3c2e_ip2 = mol._add_suffix('int3c2e_ip2') cintopt = gto.moleintor.make_cintopt(mol._atm, mol._bas, mol._env, int3c2e_ip2) @@ -229,6 +231,8 @@ def grad_qv(pcmobj, dm, q_sym = None): fakemol.cart = mol.cart q_nj = df.incore.aux_e2(mol, fakemol, intor=int3c2e_ip2, aosym='s1', cintopt=cintopt) dq[:,p0:p1] = numpy.einsum('xijk,ij,k->xk', q_nj, dm, q_sym[p0:p1]) + # Free up q_nj to stay within mem limits + del q_nj aoslice = mol.aoslice_by_atom() dq = numpy.asarray([numpy.sum(dq[:,p0:p1], axis=1) for p0,p1 in gridslice]) @@ -415,7 +419,7 @@ def make_grad_object(base_method): assert isinstance(base_method, _Solvation) with_solvent = base_method.with_solvent if with_solvent.frozen: - raise RuntimeError('Frozen solvent model is not avialbe for energy gradients') + raise RuntimeError('Frozen solvent model is not available for energy gradients') # create the Gradients in vacuum. Cannot call super().Gradients() here # because other dynamic corrections might be applied to the base_method. diff --git a/pyscf/solvent/pol_embed.py b/pyscf/solvent/pol_embed.py index b6b48e0c9a..17cb71c096 100644 --- a/pyscf/solvent/pol_embed.py +++ b/pyscf/solvent/pol_embed.py @@ -496,7 +496,7 @@ def nuc_grad_method(self, grad_method): 1 0.000000 -0.935307 -1.082500 ''', basis='sto3g') mf = mol.RHF() - with tempfile.NamedTemporaryFile() as f: + with lib.NamedTemporaryFile() as f: f.write(b'''! @COORDINATES 3 diff --git a/pyscf/solvent/test/test_pol_embed.py b/pyscf/solvent/test/test_pol_embed.py index 2f6289beec..ec45acd259 100644 --- a/pyscf/solvent/test/test_pol_embed.py +++ b/pyscf/solvent/test/test_pol_embed.py @@ -15,7 +15,6 @@ import unittest import os -import tempfile import numpy from numpy.testing import assert_allclose from pyscf import lib, gto, scf, dft @@ -34,7 +33,7 @@ def setUpModule(): global potf, potf2, mol, mol2, potfile, potfile2 - potf = tempfile.NamedTemporaryFile() + potf = lib.NamedTemporaryFile() potf.write(b'''! @COORDINATES 3 @@ -70,7 +69,7 @@ def setUpModule(): ''', basis='sto3g', verbose=7, output='/dev/null') - potf2 = tempfile.NamedTemporaryFile() + potf2 = lib.NamedTemporaryFile() potf2.write(b'''! water molecule + a large, positive charge to force electron spill-out @COORDINATES 4 @@ -294,7 +293,7 @@ def test_pe_scf_ecp(self): self.assertAlmostEqual(mf.e_tot, -168.147494986446, 8) def test_as_scanner(self): - mf = mol.RHF(chkfile=tempfile.NamedTemporaryFile().name) + mf = mol.RHF(chkfile=lib.NamedTemporaryFile().name) mf_scanner = solvent.PE(mf, potfile).as_scanner() mf_scanner(mol) self.assertAlmostEqual(mf_scanner.with_solvent.e, 0.00020182314249546455, 9) diff --git a/pyscf/tdscf/test/test_tddks.py b/pyscf/tdscf/test/test_tddks.py index 742671cea7..084887cb87 100644 --- a/pyscf/tdscf/test/test_tddks.py +++ b/pyscf/tdscf/test/test_tddks.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import numpy from pyscf import lib, gto, scf, dft from pyscf import tdscf @@ -38,8 +37,7 @@ def setUpModule(): mol.basis = 'uncsto3g' mol.spin = 1 mol.build() - mf_lda = mol.DKS().set(xc='lda,', conv_tol=1e-12, - chkfile=tempfile.NamedTemporaryFile().name).run() + mf_lda = mol.DKS().set(xc='lda,', conv_tol=1e-12, chkfile=lib.NamedTemporaryFile().name).run() def tearDownModule(): global mol, mf_lda diff --git a/pyscf/tdscf/test/test_tdgks.py b/pyscf/tdscf/test/test_tdgks.py index 30ee8a92bc..4411664f19 100644 --- a/pyscf/tdscf/test/test_tdgks.py +++ b/pyscf/tdscf/test/test_tdgks.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import numpy from pyscf import lib, gto, scf, dft from pyscf import tdscf @@ -48,16 +47,14 @@ def setUpModule(): mol.spin = 1 mol.build() - mf_lda = mol.GKS().set(xc='lda,', conv_tol=1e-12, - chkfile=tempfile.NamedTemporaryFile().name).newton().run() + mf_lda = mol.GKS().set(xc='lda,', conv_tol=1e-12, chkfile=lib.NamedTemporaryFile().name).newton().run() mcol_lda = None if mcfun is not None: - mcol_lda = mol.GKS().set(xc='lda,', conv_tol=1e-12, - collinear='mcol', chkfile=tempfile.NamedTemporaryFile().name) + mcol_lda = mol.GKS().set(xc='lda,', conv_tol=1e-12, chkfile=lib.NamedTemporaryFile().name, + collinear='mcol') mcol_lda._numint.spin_samples = 6 mcol_lda = mcol_lda.run() - mf_bp86 = molsym.GKS().set(xc='bp86', conv_tol=1e-12, - chkfile=tempfile.NamedTemporaryFile().name).run() + mf_bp86 = molsym.GKS().set(xc='bp86', conv_tol=1e-12, chkfile=lib.NamedTemporaryFile().name).run() def tearDownModule(): global mol, molsym, mf_bp86, mf_lda, mcol_lda diff --git a/pyscf/tdscf/test/test_tdrks_vv10.py b/pyscf/tdscf/test/test_tdrks_vv10.py index bcb3d779bd..efae7c7c81 100644 --- a/pyscf/tdscf/test/test_tdrks_vv10.py +++ b/pyscf/tdscf/test/test_tdrks_vv10.py @@ -117,9 +117,9 @@ def test_wb97xv_tda(self): mf = make_mf(mol) tda = mf.TDA() tda.exclude_nlc = False - test_excitation_energy, test_state_vector = tda.kernel(nstates = len(reference_excited_state_energy)) + test_excitation_energy, test_state_vector = tda.kernel(nstates = 2) - assert np.linalg.norm(test_excitation_energy - reference_excitation_energy) < excitation_energy_threshold + assert np.linalg.norm(test_excitation_energy - reference_excitation_energy[:2]) < excitation_energy_threshold reference_transition_dipole = np.array([ [-0.0039, -0.0088, -0.0068], @@ -130,14 +130,14 @@ def test_wb97xv_tda(self): ]) test_transition_dipole = tda.transition_dipole() - for i_dipole in range(reference_transition_dipole.shape[0]): + for i_dipole in range(2): assert np.linalg.norm(test_transition_dipole[i_dipole] - reference_transition_dipole[i_dipole]) < dipole_threshold \ or np.linalg.norm(test_transition_dipole[i_dipole] + reference_transition_dipole[i_dipole]) < dipole_threshold reference_oscillator_strength = np.array([0.0000204074, 0.0054841178, 0.0031204297, 0.0063755735, 0.0137712931]) test_oscillator_strength = tda.oscillator_strength() - assert np.linalg.norm(test_oscillator_strength - reference_oscillator_strength) < oscillator_strength_threshold + assert np.linalg.norm(test_oscillator_strength - reference_oscillator_strength[:2]) < oscillator_strength_threshold def test_wb97xv_tddft_triplet_high_cost(self): ### Q-Chem input @@ -232,7 +232,7 @@ def test_wb97xv_unrestricted_tddft_high_cost(self): assert np.linalg.norm(test_oscillator_strength - reference_oscillator_strength) < oscillator_strength_threshold - def test_wb97xv_unrestricted_tda(self): + def test_wb97xv_unrestricted_tda_high_cost(self): # Same Q-Chem input as above, Q-Chem computes both TDA and TDDFT in the same run reference_ground_state_energy = -150.9397884760 reference_excited_state_energy = np.array([-150.88981193, -150.79604327, -150.75118183, -150.72292823, -150.71461300]) diff --git a/pyscf/tdscf/test/test_tduks.py b/pyscf/tdscf/test/test_tduks.py index f752c2e4fe..7f45ced23f 100644 --- a/pyscf/tdscf/test/test_tduks.py +++ b/pyscf/tdscf/test/test_tduks.py @@ -146,8 +146,8 @@ def test_tddft_camb3lyp(self): es = td.kernel(nstates=4)[0] a,b = td.get_ab() e_ref = diagonalize(a, b, 5) - self.assertAlmostEqual(abs(es[:3]-e_ref[:3]).max(), 0, 6) - self.assertAlmostEqual(lib.fp(es[:3]*27.2114), 7.69383202636, 4) + self.assertAlmostEqual(abs(es[:3]-e_ref[:3]).max(), 0, 5) + self.assertAlmostEqual(lib.fp(es[:3]*27.2114) - 7.69383202636, 0, 4) def test_tda_b3lyp(self): td = tdscf.TDA(mf_b3lyp) diff --git a/pyscf/tools/c60struct.py b/pyscf/tools/c60struct.py index f9bbbb587c..975bd78abb 100644 --- a/pyscf/tools/c60struct.py +++ b/pyscf/tools/c60struct.py @@ -15,12 +15,15 @@ from functools import reduce import numpy + def rotmatz(ang): c = numpy.cos(ang) s = numpy.sin(ang) return numpy.array((( c, s, 0), (-s, c, 0), ( 0, 0, 1),)) + + def rotmaty(ang): c = numpy.cos(ang) s = numpy.sin(ang) @@ -28,11 +31,11 @@ def rotmaty(ang): ( 0, 1, 0), (-s, 0, c),)) + def r2edge(ang, r): return 2*r*numpy.sin(ang/2) - def make60(b5, b6): theta1 = numpy.arccos(1/numpy.sqrt(5)) theta2 = (numpy.pi - theta1) * .5 diff --git a/pyscf/tools/chgcar.py b/pyscf/tools/chgcar.py index 315f846732..2be1f9ae9d 100644 --- a/pyscf/tools/chgcar.py +++ b/pyscf/tools/chgcar.py @@ -166,7 +166,7 @@ def __init__(self, cell, nx=60, ny=60, nz=60, resolution=RESOLUTION, self.mol = cell cell = cell.view(pbcgto.Cell) if (isinstance(cell.unit, str) and - cell.unit.startswith(('B','b','au','AU'))): + cell.unit.startswith(('B','b','au','AU'))): cell.a = self.box else: cell.a = self.box * lib.param.BOHR @@ -183,7 +183,7 @@ def __init__(self, cell, nx=60, ny=60, nz=60, resolution=RESOLUTION, self.boxorig = numpy.zeros(3) self.vol = cell.vol - def get_coords(self) : + def get_coords(self): """ Result: set of coordinates to compute a field which is to be stored in the file. """ @@ -208,9 +208,9 @@ def write(self, field, fname, comment=None): field = field * self.vol boxA = self.box * lib.param.BOHR - atomList= [cell.atom_pure_symbol(i) for i in range(cell.natm)] + atomList = [cell.atom_pure_symbol(i) for i in range(cell.natm)] Axyz = zip(atomList, cell.atom_coords().tolist()) - Axyz = sorted(Axyz, key = lambda x: x[0]) + Axyz = sorted(Axyz, key=lambda x: x[0]) swappedCoords = [(vec[1]+self.boxorig) * lib.param.BOHR for vec in Axyz] vaspAtomicInfo = collections.Counter([xyz[0] for xyz in Axyz]) vaspAtomicInfo = sorted(vaspAtomicInfo.items()) @@ -221,8 +221,8 @@ def write(self, field, fname, comment=None): f.write('%14.8f %14.8f %14.8f \n' % (boxA[0,0],boxA[0,1],boxA[0,2])) f.write('%14.8f %14.8f %14.8f \n' % (boxA[1,0],boxA[1,1],boxA[1,2])) f.write('%14.8f %14.8f %14.8f \n' % (boxA[2,0],boxA[2,1],boxA[2,2])) - f.write(''.join(['%5.3s'%atomN[0] for atomN in vaspAtomicInfo]) + '\n') - f.write(''.join(['%5d'%atomN[1] for atomN in vaspAtomicInfo]) + '\n') + f.write(''.join(['%5.3s' % atomN[0] for atomN in vaspAtomicInfo]) + '\n') + f.write(''.join(['%5d' % atomN[1] for atomN in vaspAtomicInfo]) + '\n') f.write('Cartesian \n') for ia in range(cell.natm): f.write(' %14.8f %14.8f %14.8f\n' % tuple(swappedCoords[ia])) @@ -244,6 +244,6 @@ def read(self, chgcar_file): from pyscf.tools import chgcar cell = gto.M(atom='H 0 0 0; H 0 0 1', a=numpy.eye(3)*3) mf = scf.RHF(cell).run() - chgcar.density(cell, 'h2.CHGCAR', mf.make_rdm1()) #makes total density - chgcar.orbital(cell, 'h2_mo1.CHGCAR', mf.mo_coeff[:,0]) # makes mo#1 (sigma) - chgcar.orbital(cell, 'h2_mo2.CHGCAR', mf.mo_coeff[:,1]) # makes mo#2 (sigma*) + chgcar.density(cell, 'h2.CHGCAR', mf.make_rdm1()) # makes total density + chgcar.orbital(cell, 'h2_mo1.CHGCAR', mf.mo_coeff[:,0]) # makes mo#1 (sigma) + chgcar.orbital(cell, 'h2_mo2.CHGCAR', mf.mo_coeff[:,1]) # makes mo#2 (sigma*) diff --git a/pyscf/tools/cubegen.py b/pyscf/tools/cubegen.py index 52f769abe8..d4d972965a 100644 --- a/pyscf/tools/cubegen.py +++ b/pyscf/tools/cubegen.py @@ -274,12 +274,12 @@ def __init__(self, mol, nx=80, ny=80, nz=80, resolution=RESOLUTION, self.ys = numpy.linspace(0, 1, ny, endpoint=True) self.zs = numpy.linspace(0, 1, nz, endpoint=True) - def get_coords(self) : + def get_coords(self): """ Result: set of coordinates to compute a field which is to be stored in the file. """ frac_coords = lib.cartesian_prod([self.xs, self.ys, self.zs]) - return frac_coords @ self.box + self.boxorig # Convert fractional coordinates to real-space coordinates + return frac_coords @ self.box + self.boxorig # Convert fractional coordinates to real-space coordinates def get_ngrids(self): return self.nx * self.ny * self.nz @@ -310,7 +310,7 @@ def write(self, field, fname, comment=None): f.write(f'{self.nz:5d}{delta[2,0]:12.6f}{delta[2,1]:12.6f}{delta[2,2]:12.6f}\n') for ia in range(mol.natm): atmsymb = mol.atom_symbol(ia) - f.write('%5d%12.6f'% (gto.charge(atmsymb), 0.)) + f.write('%5d%12.6f' % (gto.charge(atmsymb), 0.)) f.write('%12.6f%12.6f%12.6f\n' % tuple(coord[ia])) for ix in range(self.nx): @@ -326,6 +326,7 @@ def read(self, cube_file): data = f.readline().split() natm = int(data[0]) self.boxorig = numpy.array([float(x) for x in data[1:]]) + def parse_nx(data): from pyscf.pbc.gto import Cell d = data.split() @@ -361,6 +362,6 @@ def parse_nx(data): H 0.761561, 0.478993, 0.00000000 H -0.761561, 0.478993, 0.00000000''', basis='6-31g*') mf = scf.RHF(mol).run() - cubegen.density(mol, 'h2o_den.cube', mf.make_rdm1()) #makes total density + cubegen.density(mol, 'h2o_den.cube', mf.make_rdm1()) # makes total density cubegen.mep(mol, 'h2o_pot.cube', mf.make_rdm1()) cubegen.orbital(mol, 'h2o_mo1.cube', mf.mo_coeff[:,0]) diff --git a/pyscf/tools/molden.py b/pyscf/tools/molden.py index 07862101be..31da31bca4 100644 --- a/pyscf/tools/molden.py +++ b/pyscf/tools/molden.py @@ -76,6 +76,7 @@ def orbital_coeff(mol, fout, mo_coeff, spin='Alpha', symm=None, ene=None, for i,j in enumerate(aoidx): fout.write(' %3d %18.14g\n' % (i+1, mo_coeff[j,imo])) + def from_mo(mol, filename, mo_coeff, spin='Alpha', symm=None, ene=None, occ=None, ignore_h=IGNORE_H): '''Dump the given MOs in Molden format''' @@ -87,6 +88,8 @@ def from_mo(mol, filename, mo_coeff, spin='Alpha', symm=None, ene=None, def from_scf(mf, filename, ignore_h=IGNORE_H): '''Dump the given SCF object in Molden format''' dump_scf(mf, filename, ignore_h) + + def dump_scf(mf, filename, ignore_h=IGNORE_H): import pyscf.scf mol = mf.mol @@ -104,6 +107,7 @@ def dump_scf(mf, filename, ignore_h=IGNORE_H): orbital_coeff(mf.mol, f, mf.mo_coeff, ene=mf.mo_energy, occ=mf.mo_occ, ignore_h=ignore_h) + def from_mcscf(mc, filename, ignore_h=IGNORE_H, cas_natorb=False): mol = mc.mol dm1 = mc.make_rdm1() @@ -118,6 +122,7 @@ def from_mcscf(mc, filename, ignore_h=IGNORE_H, cas_natorb=False): header(mol, f, ignore_h) orbital_coeff(mol, f, mo_coeff, ene=mo_energy, occ=occ, ignore_h=ignore_h) + def from_chkfile(filename, chkfile, key='scf/mo_coeff', ignore_h=IGNORE_H): import pyscf.scf with open(filename, 'w') as f: @@ -156,6 +161,7 @@ def from_chkfile(filename, chkfile, key='scf/mo_coeff', ignore_h=IGNORE_H): _SEC_RE = re.compile(r'\[[^]]+\]') + def _read_one_section(molden_fp): sec = [None] last_pos = 0 @@ -183,10 +189,12 @@ def _read_one_section(molden_fp): return sec + def _parse_natoms(lines, envs): envs['natm'] = natm = int(lines[1]) return natm + def _parse_atoms(lines, envs): if 'ANG' in lines[0].upper(): envs['unit'] = 1 @@ -203,9 +211,10 @@ def _parse_atoms(lines, envs): sys.stderr.write('Number of atoms in section ATOMS does not equal to N_ATOMS\n') return atoms + def _parse_charge(lines, envs): - mulliken_charges = [float(_d2e(x)) for x in lines[1:]] - return mulliken_charges + return [float(_d2e(x)) for x in lines[1:]] # Mulliken charges + def _parse_gto(lines, envs): mol = envs['mol'] @@ -251,6 +260,7 @@ def read_one_bas(lsym, nb, fac=1): mol._basis = envs['basis'] = gto.format_basis(_basis, sort_basis=False) return mol + def _parse_mo(lines, envs): mol = envs['mol'] if not mol._built: @@ -263,7 +273,7 @@ def _parse_mo(lines, envs): mo_energy = [] spins = [] mo_occ = [] - mo_coeff_prim = [] # primary data, will be reworked for missing values + mo_coeff_prim = [] # primary data, will be reworked for missing values coeff_idx = [] mo_id = 0 for line in lines[1:]: @@ -296,7 +306,6 @@ def _parse_mo(lines, envs): s = mol.intor('int1e_ovlp') mo_coeff = numpy.einsum('i,ij->ij', numpy.sqrt(1/s.diagonal()), mo_coeff) - return mol, mo_energy, mo_coeff, mo_occ, irrep_labels, spins @@ -316,6 +325,7 @@ def _parse_core(lines, envs): 'ECP information was lost when saving to molden format.\n\n') return mol.ecp + _SEC_PARSER = {'N_ATOMS' : _parse_natoms, 'ATOMS' : _parse_atoms, 'GTO' : _parse_gto, @@ -326,10 +336,11 @@ def _parse_core(lines, envs): _SEC_ORDER = ['N_ATOMS', 'ATOMS', 'GTO', 'CHARGE', 'MO', 'CORE', 'MOLDEN FORMAT'] + def load(moldenfile, verbose=0): '''Extract mol and orbitals from molden file ''' - sec_kinds = {} # found sections and their lines are stored in this dic + sec_kinds = {} # found sections and their lines are stored in this dic with open(moldenfile, 'r') as f: mol = gto.Mole() mol.cart = True @@ -367,7 +378,7 @@ def load(moldenfile, verbose=0): if sec_kind == 'MO' and 'MO' in sec_kinds: if len(sec_kinds['MO']) == 1: mol, mo_energy, mo_coeff, mo_occ, irrep_labels, spins = \ - _parse_mo(sec_kinds['MO'][0], tokens) + _parse_mo(sec_kinds['MO'][0], tokens) # If found only one MO section while 'B' appears in the spins # labels, the MOs so obtained are spin orbitals, with beta # orbitals at the second half of the mo_coeff matrix. @@ -376,23 +387,24 @@ def load(moldenfile, verbose=0): # general spin orbitals which allows to mix spin alpha # and spin beta components in the same orbitals raise NotImplementedError - else: - # Regular spin orbitals, alpha and beta do not mix - beta_idx = numpy.array([s[0] == 'B' for s in spins]) - alpha_idx = ~beta_idx - mo_energy = mo_energy[alpha_idx], mo_energy[beta_idx] - mo_coeff = mo_coeff[:,alpha_idx], mo_coeff[:,beta_idx] - mo_occ = mo_occ[alpha_idx], mo_occ[beta_idx] + + # Regular spin orbitals, alpha and beta do not mix + beta_idx = numpy.array([s[0] == 'B' for s in spins]) + alpha_idx = ~beta_idx + mo_energy = mo_energy[alpha_idx], mo_energy[beta_idx] + mo_coeff = mo_coeff[:,alpha_idx], mo_coeff[:,beta_idx] + mo_occ = mo_occ[alpha_idx], mo_occ[beta_idx] + if irrep_labels: irrep_labels = numpy.array(irrep_labels) irrep_labels = irrep_labels[alpha_idx], irrep_labels[beta_idx] - spins = numpy.array(spins) - spins = spins[alpha_idx], spins[beta_idx] + spins = numpy.array(spins) + spins = spins[alpha_idx], spins[beta_idx] elif len(sec_kinds['MO']) == 2: res_a = _parse_mo(sec_kinds['MO'][0], tokens) res_b = _parse_mo(sec_kinds['MO'][1], tokens) mo_energy, mo_coeff, mo_occ, irrep_labels, spins = \ - list(zip(res_a[1:], res_b[1:])) + list(zip(res_a[1:], res_b[1:])) mol = res_b[0] if sec_kind in sec_kinds: @@ -406,11 +418,14 @@ def load(moldenfile, verbose=0): mol.build(0, 0) return mol, mo_energy, mo_coeff, mo_occ, irrep_labels, spins + parse = read = load + def _d2e(token): return token.replace('D', 'e').replace('d', 'e') + def header(mol, fout, ignore_h=IGNORE_H): if ignore_h: mol = remove_high_l(mol)[0] @@ -426,7 +441,7 @@ def header(mol, fout, ignore_h=IGNORE_H): fout.write('[GTO]\n') for ia, (sh0, sh1, p0, p1) in enumerate(mol.offset_nr_by_atom()): - fout.write('%d 0\n' %(ia+1)) + fout.write('%d 0\n' % (ia+1)) for ib in range(sh0, sh1): l = mol.bas_angular(ib) nprim = mol.bas_nprim(ib) @@ -452,6 +467,7 @@ def header(mol, fout, ignore_h=IGNORE_H): fout.write('%s : %d\n' % (ia+1, nelec_ecp_core)) fout.write('\n') + def order_ao_index(mol): # reorder d,f,g function to # 5D: D 0, D+1, D-1, D+2, D-2 @@ -500,6 +516,7 @@ def order_ao_index(mol): off += l * 2 + 1 return idx + def remove_high_l(mol, mo_coeff=None): '''Remove high angular momentum (l >= 5) functions before dumping molden file. If molden function raised error message ``RuntimeError l=5 is not supported``, @@ -521,23 +538,22 @@ def remove_high_l(mol, mo_coeff=None): pmol.build(0, 0) if mo_coeff is None: return pmol, None - else: - p1 = 0 - idx = [] - for ib in range(mol.nbas): - l = mol.bas_angular(ib) - nc = mol.bas_nctr(ib) - if mol.cart: - nd = (l + 1) * (l + 2) // 2 - else: - nd = l * 2 + 1 - p0, p1 = p1, p1 + nd * nc - if l <= 4: - idx.append(range(p0, p1)) - idx = numpy.hstack(idx) - return pmol, mo_coeff[idx] + p1 = 0 + idx = [] + for ib in range(mol.nbas): + l = mol.bas_angular(ib) + nc = mol.bas_nctr(ib) + if mol.cart: + nd = (l + 1) * (l + 2) // 2 + else: + nd = l * 2 + 1 + p0, p1 = p1, p1 + nd * nc + if l <= 4: + idx.append(range(p0, p1)) + idx = numpy.hstack(idx) + return pmol, mo_coeff[idx] if __name__ == '__main__': @@ -545,7 +561,7 @@ def remove_high_l(mol, mo_coeff=None): import tempfile mol = gto.Mole() mol.verbose = 5 - mol.output = None#'out_gho' + mol.output = None # 'out_gho' mol.atom = [['C', (0.,0.,0.)], ['H', ( 1, 1, 1)], ['H', (-1,-1, 1)], @@ -561,7 +577,7 @@ def remove_high_l(mol, mo_coeff=None): print(order_ao_index(mol)) orbital_coeff(mol, mol.stdout, m.mo_coeff) - ftmp = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + ftmp = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) from_mo(mol, ftmp.name, m.mo_coeff) print(parse(ftmp.name)) diff --git a/pyscf/tools/qcschema.py b/pyscf/tools/qcschema.py index 9a264552b5..06fbfbb682 100644 --- a/pyscf/tools/qcschema.py +++ b/pyscf/tools/qcschema.py @@ -7,7 +7,8 @@ import pyscf from pyscf.lib.parameters import BOHR -def load_qcschema_json( file_name ): + +def load_qcschema_json(file_name): ''' Does: loads qcschema format json into a dictionary Input: @@ -16,12 +17,11 @@ def load_qcschema_json( file_name ): Returns: dict in qcschema format ''' # load qcschema output json file - data = None - with open(file_name,'r') as f: - data = json.load(f) - return data + with open(file_name, 'r') as f: + return json.load(f) + -def load_qcschema_go_final_json( file_name ): +def load_qcschema_go_final_json(file_name): ''' Does: loads qcschema format geometry optimization json and returns only the optimized 'final' geometry @@ -35,12 +35,11 @@ def load_qcschema_go_final_json( file_name ): # load qcschema GO output json file # and return last 'trajectory' point's entries # (this is the optimized molecule) - data = None temp = None - with open(file_name,'r') as f: + with open(file_name, 'r') as f: temp = json.load(f) - data = temp["trajectory"][-1] - return data + return temp["trajectory"][-1] + def load_qcschema_molecule(qcschema_dict, to_Angstrom=False, xyz=False, mol_select=1, step=0): ''' @@ -67,21 +66,21 @@ def load_qcschema_molecule(qcschema_dict, to_Angstrom=False, xyz=False, mol_sele xyz=True: output a string in xyz file format i.e. first line is number of atoms. ''' - if(mol_select == 1): + if (mol_select == 1): syms = np.array(qcschema_dict["molecule"]["symbols"]) geo = np.array(qcschema_dict["molecule"]["geometry"]) - elif(mol_select == 2): + elif (mol_select == 2): syms = np.array(qcschema_dict["initial_molecule"]["symbols"]) geo = np.array(qcschema_dict["initial_molecule"]["geometry"]) - elif(mol_select == 3): + elif (mol_select == 3): syms = np.array(qcschema_dict["final_molecule"]["symbols"]) geo = np.array(qcschema_dict["final_molecule"]["geometry"]) - elif(mol_select == 4): + elif (mol_select == 4): # for geometry or md, can load a specific geometry syms = np.array(qcschema_dict["trajectory"][step]["molecule"]["symbols"]) geo = np.array(qcschema_dict["trajectory"][step]["molecule"]["geometry"]) - if(to_Angstrom): + if (to_Angstrom): # convert Bohr to Angstrom geo = geo*BOHR @@ -91,13 +90,14 @@ def load_qcschema_molecule(qcschema_dict, to_Angstrom=False, xyz=False, mol_sele PySCF_atoms = list(zip(syms, geo)) # Return as string or return as xyz-format string (i.e. top is NAtoms,blankline) - if(xyz): + if (xyz): bldstr = f'{NAtoms}\n\n' for element, coordinates in PySCF_atoms: bldstr += f'{element} {coordinates[0]}, {coordinates[1]}, {coordinates[2]}\n' PySCF_atoms = bldstr return PySCF_atoms + def load_qcschema_hessian(qcschema_dict): ''' Does: loads hessian from qcschema format dictionary @@ -117,8 +117,8 @@ def load_qcschema_hessian(qcschema_dict): NAtom = len(syms) # reshape from (3N)**2 array to (N,N,3,3) - hessian = np.array(qc_h).reshape(NAtom,NAtom,3,3) - return hessian + return np.array(qc_h).reshape(NAtom, NAtom, 3, 3) + def load_qcschema_scf_info(qcschema_dict): ''' @@ -133,18 +133,17 @@ def load_qcschema_scf_info(qcschema_dict): # Restricted wfn has schema scf_occupations_a occ of 1 or 0. # Need to double if rhf/rks/rohf method = qcschema_dict["keywords"]["scf"]["method"] - if(method == 'rks' or method == 'roks' or method == 'rhf' or method == 'rohf'): + if (method == 'rks' or method == 'roks' or method == 'rhf' or method == 'rohf'): OccFactor = 2.0 have_beta = False - elif(method == 'uks' or method == 'uhf'): + elif (method == 'uks' or method == 'uhf'): OccFactor = 1.0 have_beta = True - elif(method == 'gks' or method == 'ghf'): + elif (method == 'gks' or method == 'ghf'): OccFactor = 1.0 have_beta = False else: raise RuntimeError('qcschema: cannot determine method..exit') - return # need to reshape MO coefficients for PySCF shape. nao = qcschema_dict["properties"]["calcinfo_nbasis"] @@ -160,34 +159,32 @@ def load_qcschema_scf_info(qcschema_dict): # get the 4 things that PySCF wants # ...remembering to reshape coeffs and scale occupancies. - e_tot = float( qcschema_dict["properties"]["return_energy"] ) + e_tot = float(qcschema_dict["properties"]["return_energy"]) mo_coeff = np.reshape(qcschema_dict["wavefunction"]["scf_orbitals_a"],(nao,nmo)) - mo_occ = np.array( qcschema_dict["wavefunction"]["scf_occupations_a"] )*OccFactor - mo_energy = np.array( qcschema_dict["wavefunction"]["scf_eigenvalues_a"] ) - if(have_beta): + mo_occ = np.array(qcschema_dict["wavefunction"]["scf_occupations_a"])*OccFactor + mo_energy = np.array(qcschema_dict["wavefunction"]["scf_eigenvalues_a"]) + if (have_beta): # for each useful piece of info we need to combine alpha and beta into 2d array, with alpha first # MO occupations mo_occ_beta = qcschema_dict["wavefunction"]["scf_occupations_b"] - mo_occ = np.vstack( (mo_occ, mo_occ_beta) ) + mo_occ = np.vstack((mo_occ, mo_occ_beta)) # MO coefficients mo_coeff_beta = np.reshape(qcschema_dict["wavefunction"]["scf_orbitals_b"],(nao,nmo)) - mo_coeff = np.vstack( (mo_coeff,mo_coeff_beta)) + mo_coeff = np.vstack((mo_coeff,mo_coeff_beta)) mo_coeff = np.reshape(mo_coeff,(2,nao,nmo)) # MO energies - mo_energy_beta = np.array( qcschema_dict["wavefunction"]["scf_eigenvalues_b"] ) - mo_energy = np.vstack( (mo_energy, mo_energy_beta) ) + mo_energy_beta = np.array(qcschema_dict["wavefunction"]["scf_eigenvalues_b"]) + mo_energy = np.vstack((mo_energy, mo_energy_beta)) # etot obviously doesn't need manipulation # convert to dictionary for PySCF - scf_dic = {'e_tot' : e_tot, - 'mo_energy': mo_energy, - 'mo_occ' : mo_occ, - 'mo_coeff' : mo_coeff} - - return scf_dic + return {'e_tot': e_tot, + 'mo_energy': mo_energy, + 'mo_occ': mo_occ, + 'mo_coeff': mo_coeff} -def recreate_mol_obj(qcschema_dict,to_Angstrom=False): +def recreate_mol_obj(qcschema_dict, to_Angstrom=False): ''' Does: recreates mol object from qcschema format dictionary Input: @@ -197,31 +194,32 @@ def recreate_mol_obj(qcschema_dict,to_Angstrom=False): Returns: mol object ''' - ## Mol info: ## - PySCF_charge = int( qcschema_dict["molecule"]["molecular_charge"] ) + # ### Mol info: ### + PySCF_charge = int(qcschema_dict["molecule"]["molecular_charge"]) # PySCF 'spin' is number of unpaired electrons, it will be mult-1 - PySCF_spin = int( qcschema_dict["molecule"]["molecular_multiplicity"] ) - 1 - PySCF_basis = str( qcschema_dict["model"]["basis"] ) + PySCF_spin = int(qcschema_dict["molecule"]["molecular_multiplicity"]) - 1 + PySCF_basis = str(qcschema_dict["model"]["basis"]) # Cartesian/Pure basis - PySCF_cart = bool( qcschema_dict["keywords"]["basisSet"]["cartesian"] ) + PySCF_cart = bool(qcschema_dict["keywords"]["basisSet"]["cartesian"]) # Get molecular structure. - PySCF_atoms = load_qcschema_molecule(qcschema_dict, to_Angstrom,False) + PySCF_atoms = load_qcschema_molecule(qcschema_dict, to_Angstrom, False) # Unit Bohr or Angstrom. QCSchema default is Bohr but can change here. - if(to_Angstrom): - units='A' + if (to_Angstrom): + units = 'A' else: - units='B' + units = 'B' - ## Create mol ## + # ### Create mol ### mol = pyscf.gto.Mole(atom=PySCF_atoms,basis=PySCF_basis,ecp=PySCF_basis, charge=PySCF_charge,spin=PySCF_spin,cart=PySCF_cart,unit=units) - mol.build(False,False) + mol.build(False, False) return mol + def recreate_scf_obj(qcschema_dict,mol): ''' Does: recreates scf object from qcschema format dictionary @@ -235,25 +233,24 @@ def recreate_scf_obj(qcschema_dict,mol): scf_dict = load_qcschema_scf_info(qcschema_dict) # create scf object - method = qcschema_dict["keywords"]["scf"]["method"] - if(method =='rks'): + method = qcschema_dict["keywords"]["scf"]["method"] + if (method == 'rks'): ks = mol.RKS() - elif(method =='uks'): + elif (method == 'uks'): ks = mol.UKS() - elif(method =='rhf'): + elif (method == 'rhf'): ks = mol.RHF() - elif(method =='uhf'): + elif (method == 'uhf'): ks = mol.UHF() - elif(method =='gks'): + elif (method == 'gks'): ks = mol.GKS() - elif(method =='ghf'): + elif (method == 'ghf'): ks = mol.GHF() else: raise RuntimeError('qcschema: cannot determine method..exit') - return # get functional - if(method == 'rks' or method == 'uks' or method == 'gks'): + if (method == 'rks' or method == 'uks' or method == 'gks'): functional = qcschema_dict["keywords"]["xcFunctional"]["name"] ks.xc = functional diff --git a/pyscf/tools/test/test_cubegen.py b/pyscf/tools/test/test_cubegen.py index 6a383a7e1f..cc25623f6c 100644 --- a/pyscf/tools/test/test_cubegen.py +++ b/pyscf/tools/test/test_cubegen.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile from pyscf import lib, gto, scf from pyscf.tools import cubegen @@ -35,7 +34,7 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_mep(self): - with tempfile.NamedTemporaryFile() as ftmp: + with lib.NamedTemporaryFile() as ftmp: mep = cubegen.mep(mol, ftmp.name, mf.make_rdm1(), nx=10, ny=10, nz=10) self.assertEqual(mep.shape, (10,10,10)) @@ -47,7 +46,7 @@ def test_mep(self): self.assertAlmostEqual(lib.fp(mep), -4.653995909548524, 5) def test_orb(self): - with tempfile.NamedTemporaryFile() as ftmp: + with lib.NamedTemporaryFile() as ftmp: orb = cubegen.orbital(mol, ftmp.name, mf.mo_coeff[:,0], nx=10, ny=10, nz=10) self.assertEqual(orb.shape, (10,10,10)) @@ -65,7 +64,7 @@ def test_orb(self): def test_rho(self): - with tempfile.NamedTemporaryFile() as ftmp: + with lib.NamedTemporaryFile() as ftmp: rho = cubegen.density(mol, ftmp.name, mf.make_rdm1(), nx=10, ny=10, nz=10) self.assertEqual(rho.shape, (10,10,10)) @@ -96,7 +95,7 @@ def test_rho_with_pbc(self): cell.output = '/dev/null' cell.build() mf = cell.RHF().run() - with tempfile.NamedTemporaryFile() as ftmp: + with lib.NamedTemporaryFile() as ftmp: rho = cubegen.density(cell, ftmp.name, mf.make_rdm1(), nx=10, ny=10, nz=10) cc = cubegen.Cube(cell) diff --git a/pyscf/tools/test/test_fcidump.py b/pyscf/tools/test/test_fcidump.py index 45ae7df48d..c51de60926 100644 --- a/pyscf/tools/test/test_fcidump.py +++ b/pyscf/tools/test/test_fcidump.py @@ -14,13 +14,11 @@ # limitations under the License. import unittest -import tempfile from functools import reduce import numpy from pyscf import lib from pyscf import gto, scf, ao2mo from pyscf.tools import fcidump -import tempfile def setUpModule(): global mol, mf @@ -36,7 +34,7 @@ def setUpModule(): mol.verbose = 0 mol.build(0, 0) - mf = mol.RHF(chkfile=tempfile.NamedTemporaryFile().name).run() + mf = mol.RHF(chkfile=lib.NamedTemporaryFile().name).run() def tearDownModule(): global mol, mf @@ -44,19 +42,19 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_from_chkfile(self): - tmpfcidump = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + tmpfcidump = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) fcidump.from_chkfile(tmpfcidump.name, mf.chkfile, tol=1e-15, molpro_orbsym=True) def test_from_integral(self): - tmpfcidump = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + tmpfcidump = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) h1 = reduce(numpy.dot, (mf.mo_coeff.T, mf.get_hcore(), mf.mo_coeff)) h2 = ao2mo.full(mf._eri, mf.mo_coeff) fcidump.from_integrals(tmpfcidump.name, h1, h2, h1.shape[0], mol.nelectron, tol=1e-15) def test_read(self): - with tempfile.NamedTemporaryFile(mode='w+') as f: + with lib.NamedTemporaryFile(mode='w+') as f: f.write('''&FCI NORB=4, NELEC=4, MS2=0, ISYM=1, ORBSYM=1,2,3,4, @@ -72,7 +70,7 @@ def test_read(self): result = fcidump.read(f.name) self.assertEqual(result['ISYM'], 1) - with tempfile.NamedTemporaryFile(mode='w+') as f: + with lib.NamedTemporaryFile(mode='w+') as f: f.write('''&FCI NORB=4, NELEC=4, MS2=0, ISYM=1,ORBSYM=1,2,3,4, &END 0.42 1 1 1 1 0.33 1 1 2 2 @@ -87,7 +85,7 @@ def test_read(self): def test_to_scf(self): '''Test from_scf and to_scf''' - tmpfcidump = tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) + tmpfcidump = lib.NamedTemporaryFile(dir=lib.param.TMPDIR) fcidump.from_scf(mf, tmpfcidump.name) mf1 = fcidump.to_scf(tmpfcidump.name) mf1.init_guess = mf.make_rdm1() @@ -96,9 +94,9 @@ def test_to_scf(self): self.assertTrue(numpy.array_equal(mf.orbsym, mf1.orbsym)) def test_to_scf_with_symmetry(self): - with tempfile.NamedTemporaryFile(dir=lib.param.TMPDIR) as tmpfcidump: + with lib.NamedTemporaryFile(dir=lib.param.TMPDIR) as tmpfcidump: mol = gto.M(atom='H 0 0 0; H 1 0 0', symmetry=True) - mf = mol.RHF().run() + mf = mol.RHF(chkfile=lib.NamedTemporaryFile().name).run() fcidump.from_scf(mf, tmpfcidump.name) mf = fcidump.to_scf(tmpfcidump.name) self.assertEqual(mf.mol.groupname, 'D2h') diff --git a/pyscf/tools/test/test_finite_diff.py b/pyscf/tools/test/test_finite_diff.py index 3af64c8c88..10abd08804 100644 --- a/pyscf/tools/test/test_finite_diff.py +++ b/pyscf/tools/test/test_finite_diff.py @@ -62,7 +62,6 @@ def test_no_scanner(self): def test_convergence_failed(self): mol = pyscf.M(atom='H 0 0 0; H 0 0 1') - mol.verbose = 4 geom_ref = mol.atom_coords() mf = mol.RHF().run() ref = mf.Gradients().kernel() diff --git a/pyscf/tools/test/test_molden.py b/pyscf/tools/test/test_molden.py index 944c7c9178..32cbdd24f6 100644 --- a/pyscf/tools/test/test_molden.py +++ b/pyscf/tools/test/test_molden.py @@ -14,7 +14,6 @@ # limitations under the License. import unittest -import tempfile from pyscf import lib, gto, scf from pyscf.tools import molden @@ -37,7 +36,7 @@ def tearDownModule(): class KnownValues(unittest.TestCase): def test_dump_scf(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() fname = ftmp.name molden.dump_scf(mf, fname) res = molden.read(fname) @@ -45,7 +44,7 @@ def test_dump_scf(self): self.assertAlmostEqual(abs(mf.mo_coeff-mo_coeff).max(), 0, 12) def test_dump_uhf(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() fname = ftmp.name with lib.temporary_env(mol, spin=2, charge=2): mf = scf.UHF(mol).run() @@ -57,7 +56,7 @@ def test_dump_uhf(self): self.assertAlmostEqual(abs(mf.mo_coeff[1]-mo_coeff[1]).max(), 0, 12) def test_dump_cartesian_gto_orbital(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() fname = ftmp.name with lib.temporary_env(mol, cart=True, symmetry=False): mf = scf.UHF(mol).run() @@ -69,7 +68,7 @@ def test_dump_cartesian_gto_orbital(self): self.assertAlmostEqual(abs(mf.mo_coeff[1]-mo_coeff[1]).max(), 0, 12) def test_dump_cartesian_gto_symm_orbital(self): - ftmp = tempfile.NamedTemporaryFile() + ftmp = lib.NamedTemporaryFile() fname = ftmp.name pmol = mol.copy() @@ -83,7 +82,7 @@ def test_dump_cartesian_gto_symm_orbital(self): self.assertAlmostEqual(abs(mf.mo_coeff-mo_coeff).max(), 0, 12) def test_basis_not_sorted(self): - with tempfile.NamedTemporaryFile('w') as ftmp: + with lib.NamedTemporaryFile('w') as ftmp: ftmp.write('''\ [Molden Format] made by pyscf v[2.4.0] diff --git a/pyscf/tools/wfn_format.py b/pyscf/tools/wfn_format.py index 6470604012..2fba89516b 100644 --- a/pyscf/tools/wfn_format.py +++ b/pyscf/tools/wfn_format.py @@ -90,11 +90,13 @@ [21,24,25,30,33,31,26,34,35,28,22,27,32,29,23], # G [56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36], # H ] + + def write_mo(fout, mol, mo_coeff, mo_energy=None, mo_occ=None): if mol.cart: raise NotImplementedError('Cartesian basis not available') - #FIXME: Duplicated primitives may lead to problems. x2c._uncontract_mol + # FIXME: Duplicated primitives may lead to problems. x2c._uncontract_mol # is the workaround at the moment to remove duplicated primitives. from pyscf.x2c import x2c mol, ctr = x2c._uncontract_mol(mol, True, 0.) @@ -118,8 +120,7 @@ def write_mo(fout, mol, mo_coeff, mo_energy=None, mo_occ=None): mosub = numpy.einsum('yki,cy,pk->pci', mosub, c2s, c) mo_cart.append(mosub.transpose(1,0,2).reshape(-1,nmo)) - for t in TYPE_MAP[l]: - types.append([t]*np) + types.extend([t] * np for t in TYPE_MAP[l]) ncart = mol.bas_len_cart(ib) exps.extend([es]*ncart) centers.extend([ia+1]*(np*ncart)) @@ -139,11 +140,11 @@ def write_mo(fout, mol, mo_coeff, mo_energy=None, mo_occ=None): % (mol.atom_pure_symbol(ia), ia+1, ia+1, x, y, z, mol.atom_charge(ia))) for i0, i1 in lib.prange(0, nprim, 20): - fout.write('CENTRE ASSIGNMENTS %s\n' % ''.join('%3d'%x for x in centers[i0:i1])) + fout.write('CENTRE ASSIGNMENTS %s\n' % ''.join('%3d' % x for x in centers[i0:i1])) for i0, i1 in lib.prange(0, nprim, 20): - fout.write('TYPE ASSIGNMENTS %s\n' % ''.join('%3d'%x for x in types[i0:i1])) + fout.write('TYPE ASSIGNMENTS %s\n' % ''.join('%3d' % x for x in types[i0:i1])) for i0, i1 in lib.prange(0, nprim, 5): - fout.write('EXPONENTS %s\n' % ' '.join('%13.7E'%x for x in exps[i0:i1])) + fout.write('EXPONENTS %s\n' % ' '.join('%13.7E' % x for x in exps[i0:i1])) for k in range(nmo): mo = mo_cart[:,k] @@ -156,15 +157,16 @@ def write_mo(fout, mol, mo_coeff, mo_energy=None, mo_occ=None): fout.write('MO %-4d OCC NO = %12.8f ORB. ENERGY = %12.8f\n' % (k+1, mo_occ[k], mo_energy[k])) for i0, i1 in lib.prange(0, nprim, 5): - fout.write(' %s\n' % ' '.join('%15.8E'%x for x in mo[i0:i1])) + fout.write(' %s\n' % ' '.join('%15.8E' % x for x in mo[i0:i1])) fout.write('END DATA\n') if mo_energy is None or mo_occ is None: fout.write('ALDET ENERGY = 0.0000000000 VIRIAL(-V/T) = 0.00000000\n') elif mo_energy is None and mo_occ is None: pass - else : + else: fout.write('RHF ENERGY = 0.0000000000 VIRIAL(-V/T) = 0.00000000\n') + def write_ci(fout, fcivec, norb, nelec, ncore=0): from pyscf import fci if isinstance(nelec, (int, numpy.number)): @@ -179,6 +181,7 @@ def write_ci(fout, fcivec, norb, nelec, ncore=0): nb = fci.cistring.num_strings(norb, nelecb) stringsa = fci.cistring.gen_strings4orblist(range(norb), neleca) stringsb = fci.cistring.gen_strings4orblist(range(norb), nelecb) + def str2orbidx(string, ncore): bstring = bin(string) return [i+1+ncore for i,s in enumerate(bstring[::-1]) if s == '1'] @@ -188,9 +191,10 @@ def str2orbidx(string, ncore): addra, addrb = divmod(iaddr, nb) idxa = ['%3d' % x for x in str2orbidx(stringsa[addra], ncore)] idxb = ['%3d' % (-x) for x in str2orbidx(stringsb[addrb], ncore)] - #TODO:add a cuttoff and a counter for ndets + # TODO:add a cuttoff and a counter for ndets fout.write('%18.10E %s %s\n' % (fcivec[addra,addrb], ' '.join(idxa), ' '.join(idxb))) + if __name__ == '__main__': from pyscf import scf, mcscf, symm from pyscf.tools import molden @@ -198,9 +202,9 @@ def str2orbidx(string, ncore): unit='B', basis='ccpvtz', verbose=4, symmetry=1, symmetry_subgroup='d2h') mf = scf.RHF(mol).run() - coeff = mf.mo_coeff[:,mf.mo_occ>0] - energy = mf.mo_energy[mf.mo_occ>0] - occ = mf.mo_occ[mf.mo_occ>0] + coeff = mf.mo_coeff[:,mf.mo_occ > 0] + energy = mf.mo_energy[mf.mo_occ > 0] + occ = mf.mo_occ[mf.mo_occ > 0] with open('n2_hf.wfn', 'w') as f2: write_mo(f2, mol, coeff, energy, occ) # diff --git a/pyscf/x2c/sfx2c1e.py b/pyscf/x2c/sfx2c1e.py index 62004154e6..03734abefb 100644 --- a/pyscf/x2c/sfx2c1e.py +++ b/pyscf/x2c/sfx2c1e.py @@ -184,19 +184,7 @@ def get_hcore(self, mol=None): h1 = x2c._get_hcore_fw(t, v, w, s, x, c) elif 'ATOM' in self.approx.upper(): - atom_slices = xmol.offset_nr_by_atom() - nao = xmol.nao_nr() - x = numpy.zeros((nao,nao)) - for ia in range(xmol.natm): - ish0, ish1, p0, p1 = atom_slices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - t1 = xmol.intor('int1e_kin', shls_slice=shls_slice) - s1 = xmol.intor('int1e_ovlp', shls_slice=shls_slice) - with xmol.with_rinv_at_nucleus(ia): - z = -xmol.atom_charge(ia) - v1 = z * xmol.intor('int1e_rinv', shls_slice=shls_slice) - w1 = z * xmol.intor('int1e_prinvp', shls_slice=shls_slice) - x[p0:p1,p0:p1] = x2c._x2c1e_xmatrix(t1, v1, w1, s1, c) + x = _atomic_1e_x(xmol) h1 = x2c._get_hcore_fw(t, v, w, s, x, c) else: @@ -253,19 +241,7 @@ def get_xmat(self, mol=None): assert ('1E' in self.approx.upper()) if 'ATOM' in self.approx.upper(): - atom_slices = xmol.offset_nr_by_atom() - nao = xmol.nao_nr() - x = numpy.zeros((nao,nao)) - for ia in range(xmol.natm): - ish0, ish1, p0, p1 = atom_slices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - t1 = xmol.intor('int1e_kin', shls_slice=shls_slice) - s1 = xmol.intor('int1e_ovlp', shls_slice=shls_slice) - with xmol.with_rinv_at_nucleus(ia): - z = -xmol.atom_charge(ia) - v1 = z * xmol.intor('int1e_rinv', shls_slice=shls_slice) - w1 = z * xmol.intor('int1e_prinvp', shls_slice=shls_slice) - x[p0:p1,p0:p1] = x2c._x2c1e_xmatrix(t1, v1, w1, s1, c) + x = _atomic_1e_x(xmol) else: t = xmol.intor_symmetric('int1e_kin') v = xmol.intor_symmetric('int1e_nuc') @@ -297,28 +273,22 @@ def hcore_deriv_generator(self, mol=None, deriv=1): SpinFreeX2C = SpinFreeX2CHelper - -if __name__ == '__main__': - mol = gto.Mole() - mol.build( - verbose = 0, - atom = [["O" , (0. , 0. , 0.)], - [1 , (0. , -0.757 , 0.587)], - [1 , (0. , 0.757 , 0.587)] ], - basis = 'ccpvdz-dk', - ) - - method = hf.RHF(mol) - enr = method.kernel() - print('E(NR) = %.12g' % enr) - - method = sfx2c1e(hf.RHF(mol)) - esfx2c = method.kernel() - print('E(SFX2C1E) = %.12g' % esfx2c) - method.with_x2c.basis = 'unc-ccpvqz-dk' - print('E(SFX2C1E) = %.12g' % method.kernel()) - method.with_x2c.approx = 'atom1e' - print('E(SFX2C1E) = %.12g' % method.kernel()) - - mf = method.density_fit().undo_x2c().run() - print('E(DF-NR) = %.12g' % mf.e_tot) +def _atomic_1e_x(mol): + atoms = x2c._atoms_in_mole(mol) + x_conf = {} + c = lib.param.LIGHT_SPEED + for elem, atom in atoms.items(): + t1 = atom.intor_symmetric('int1e_kin') + s1 = atom.intor_symmetric('int1e_ovlp') + v1 = atom.intor_symmetric('int1e_nuc') + w1 = atom.intor_symmetric('int1e_pnucp') + x_conf[elem] = x2c._x2c1e_xmatrix(t1, v1, w1, s1, c) + + atom_slices = mol.offset_nr_by_atom() + nao = mol.nao + x = numpy.zeros((nao, nao)) + for ia in range(mol.natm): + p0, p1 = atom_slices[ia, 2:] + elem = mol.atom_symbol(ia) + x[p0:p1,p0:p1] = x_conf[elem] + return x diff --git a/pyscf/x2c/sfx2c1e_grad.py b/pyscf/x2c/sfx2c1e_grad.py index fa8fa9a881..18a4e4c223 100644 --- a/pyscf/x2c/sfx2c1e_grad.py +++ b/pyscf/x2c/sfx2c1e_grad.py @@ -25,7 +25,7 @@ import scipy.linalg from pyscf import lib from pyscf import gto -from pyscf.x2c import x2c +from pyscf.x2c import x2c, sfx2c1e def hcore_grad_generator(x2cobj, mol=None): '''nuclear gradients of 1-component X2c hcore Hamiltonian (spin-free part only) @@ -49,25 +49,13 @@ def hcore_deriv(atm_id): def gen_sf_hfw(mol, approx='1E'): approx = approx.upper() - c = lib.param.LIGHT_SPEED h0, s0 = _get_h0_s0(mol) e0, c0 = scipy.linalg.eigh(h0, s0) - aoslices = mol.aoslice_by_atom() nao = mol.nao_nr() if 'ATOM' in approx: - x0 = numpy.zeros((nao,nao)) - for ia in range(mol.natm): - ish0, ish1, p0, p1 = aoslices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - t1 = mol.intor('int1e_kin', shls_slice=shls_slice) - s1 = mol.intor('int1e_ovlp', shls_slice=shls_slice) - with mol.with_rinv_at_nucleus(ia): - z = -mol.atom_charge(ia) - v1 = z * mol.intor('int1e_rinv', shls_slice=shls_slice) - w1 = z * mol.intor('int1e_prinvp', shls_slice=shls_slice) - x0[p0:p1,p0:p1] = x2c._x2c1e_xmatrix(t1, v1, w1, s1, c) + x0 = sfx2c1e._atomic_1e_x(mol) else: cl0 = c0[:nao,nao:] cs0 = c0[nao:,nao:] @@ -244,51 +232,3 @@ def _get_r1(s0_roots, s_nesc0, s1, s_nesc1, r0_roots): R1 += reduce(numpy.dot, (vr0_s0_invsqrt.T, vr0_wr0_sqrt.T, s1_sqrt)) R1 = reduce(numpy.dot, (v_s, R1, v_s.T)) return R1 - - -if __name__ == '__main__': - bak = lib.param.LIGHT_SPEED - lib.param.LIGHT_SPEED = 10 - def get_h(mol): - c = lib.param.LIGHT_SPEED - t = mol.intor_symmetric('int1e_kin') - v = mol.intor_symmetric('int1e_nuc') - s = mol.intor_symmetric('int1e_ovlp') - w = mol.intor_symmetric('int1e_pnucp') - return x2c._x2c1e_get_hcore(t, v, w, s, c) - - mol = gto.M( - verbose = 0, - atom = [["O" , (0. , 0. , 0.0001)], - [1 , (0. , -0.757 , 0.587)], - [1 , (0. , 0.757 , 0.587)]], - basis = '3-21g', - ) - h_1 = get_h(mol) - - mol = gto.M( - verbose = 0, - atom = [["O" , (0. , 0. ,-0.0001)], - [1 , (0. , -0.757 , 0.587)], - [1 , (0. , 0.757 , 0.587)]], - basis = '3-21g', - ) - h_2 = get_h(mol) - h_ref = (h_1 - h_2) / 0.0002 * lib.param.BOHR - - mol = gto.M( - verbose = 0, - atom = [["O" , (0. , 0. , 0. )], - [1 , (0. , -0.757 , 0.587)], - [1 , (0. , 0.757 , 0.587)]], - basis = '3-21g', - ) - hcore_deriv = gen_sf_hfw(mol) - h1 = hcore_deriv(0) - print(abs(h1[2]-h_ref).max()) - lib.param.LIGHT_SPEED = bak - - print(lib.finger(h1) - -1.4618392662849411) - hcore_deriv = gen_sf_hfw(mol, approx='atom1e') - h1 = hcore_deriv(0) - print(lib.finger(h1) - -1.3596826558976405) diff --git a/pyscf/x2c/sfx2c1e_hess.py b/pyscf/x2c/sfx2c1e_hess.py index 2385d6422d..2e629d4244 100644 --- a/pyscf/x2c/sfx2c1e_hess.py +++ b/pyscf/x2c/sfx2c1e_hess.py @@ -26,7 +26,7 @@ import scipy.linalg from pyscf import lib from pyscf import gto -from pyscf.x2c import x2c +from pyscf.x2c import x2c, sfx2c1e from pyscf.x2c import sfx2c1e_grad def hcore_hess_generator(x2cobj, mol=None): @@ -60,17 +60,7 @@ def gen_sf_hfw(mol, approx='1E'): aoslices = mol.aoslice_by_atom() nao = mol.nao_nr() if 'ATOM' in approx: - x0 = numpy.zeros((nao,nao)) - for ia in range(mol.natm): - ish0, ish1, p0, p1 = aoslices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - t1 = mol.intor('int1e_kin', shls_slice=shls_slice) - s1 = mol.intor('int1e_ovlp', shls_slice=shls_slice) - with mol.with_rinv_at_nucleus(ia): - z = -mol.atom_charge(ia) - v1 = z * mol.intor('int1e_rinv', shls_slice=shls_slice) - w1 = z * mol.intor('int1e_prinvp', shls_slice=shls_slice) - x0[p0:p1,p0:p1] = x2c._x2c1e_xmatrix(t1, v1, w1, s1, c) + x0 = sfx2c1e._atomic_1e_x(mol) else: cl0 = c0[:nao,nao:] cs0 = c0[nao:,nao:] @@ -329,46 +319,3 @@ def _get_r2(s0_roots, sa0, s1i, sa1i, s1j, sa1j, s2, sa2, r0_roots): R2 += lib.einsum('i,iq,qj->ij' , w_invsqrt , R0_mid , s2_sqrt) R2 = reduce(numpy.dot, (v_s, R2, v_s.T)) return R2 - - -if __name__ == '__main__': - bak = lib.param.LIGHT_SPEED - lib.param.LIGHT_SPEED = 10 - - mol = gto.M( - verbose = 0, - atom = [["O" , (0. , 0. , 0.0001)], - [1 , (0. , -0.757 , 0.587)], - [1 , (0. , 0.757 , 0.587)]], - basis = '3-21g', - ) - h1_deriv_1 = sfx2c1e_grad.gen_sf_hfw(mol, approx='1E') - - mol = gto.M( - verbose = 0, - atom = [["O" , (0. , 0. ,-0.0001)], - [1 , (0. , -0.757 , 0.587)], - [1 , (0. , 0.757 , 0.587)]], - basis = '3-21g', - ) - h1_deriv_2 = sfx2c1e_grad.gen_sf_hfw(mol, approx='1E') - - mol = gto.M( - verbose = 0, - atom = [["O" , (0. , 0. , 0. )], - [1 , (0. , -0.757 , 0.587)], - [1 , (0. , 0.757 , 0.587)]], - basis = '3-21g', - ) - h2_deriv = gen_sf_hfw(mol) - - h2 = h2_deriv(0,0) - h2_ref = (h1_deriv_1(0)[2] - h1_deriv_2(0)[2]) / 0.0002 * lib.param.BOHR - print(abs(h2[2,2]-h2_ref).max()) - print(lib.finger(h2) - 33.71188112440316) - - h2 = h2_deriv(1,0) - h2_ref = (h1_deriv_1(1)[2] - h1_deriv_2(1)[2]) / 0.0002 * lib.param.BOHR - print(abs(h2[2,2]-h2_ref).max()) - print(lib.finger(h2) - -23.609411428378138) - lib.param.LIGHT_SPEED = bak diff --git a/pyscf/x2c/test/test_tdscf.py b/pyscf/x2c/test/test_tdscf.py index a23607b78d..3a1aa04ca3 100644 --- a/pyscf/x2c/test/test_tdscf.py +++ b/pyscf/x2c/test/test_tdscf.py @@ -17,7 +17,6 @@ # import unittest -import tempfile import numpy from pyscf import lib, gto, scf from pyscf.dft import radi @@ -40,8 +39,7 @@ def setUpModule(): mol.spin = 1 mol.build() - mf_lda = dft.UKS(mol).set(xc='lda,', conv_tol=1e-12, - chkfile=tempfile.NamedTemporaryFile().name).newton().run() + mf_lda = dft.UKS(mol).set(xc='lda,', conv_tol=1e-12, chkfile=lib.NamedTemporaryFile().name).newton().run() def tearDownModule(): global mol, mf_lda diff --git a/pyscf/x2c/test/test_x2c.py b/pyscf/x2c/test/test_x2c.py index eb061626a4..443b002714 100644 --- a/pyscf/x2c/test/test_x2c.py +++ b/pyscf/x2c/test/test_x2c.py @@ -17,6 +17,7 @@ # import numpy +import scipy.linalg import unittest from pyscf import gto from pyscf import scf @@ -185,23 +186,42 @@ def test_lindep_xbasis(self): C F 0.761000000E+00 0.100000000E+01 ''') - xmol, c = x2c.X2C(mol).get_xmol(mol) + x2c_obj = x2c.X2C(mol) + xmol, c = x2c_obj.get_xmol(mol) self.assertEqual(xmol.nbas, 18) self.assertEqual(xmol.nao, 42) self.assertAlmostEqual(lib.fp(c), -5.480689638416739, 12) + hcore = x2c_obj.get_hcore() + s = mol.intor_symmetric('int1e_ovlp_spinor') + e_ref = scipy.linalg.eigvalsh(hcore, s) + + mol = gto.M(atom='C', basis=(mol.basis, [[0, [0.128500001, 1]]])) + x2c_obj = x2c.X2C(mol) + xmol, c = x2c_obj.get_xmol(mol) + self.assertEqual(xmol.nbas, 19) + self.assertEqual(xmol.nao, 43) + hcore = x2c_obj.get_hcore() + s = mol.intor_symmetric('int1e_ovlp_spinor') + d, t = scipy.linalg.eigh(s) + idx = d > 1e-8 + t = t[:,idx] / numpy.sqrt(d[idx]) + tht = t.T.conj().dot(hcore.dot(t)) + e = scipy.linalg.eigvalsh(tht) + self.assertAlmostEqual(abs(e - e_ref).max(), 0, 6) + def test_get_hcore(self): myx2c = scf.RHF(mol).sfx2c1e() myx2c.with_x2c.get_xmat = lambda xmol: numpy.zeros((xmol.nao, xmol.nao)) h1 = myx2c.with_x2c.get_hcore() ref = mol.intor('int1e_nuc') - self.assertAlmostEqual(abs(h1 - ref).max(), 0, 12) + self.assertAlmostEqual(abs(h1 - ref).max(), 0, 11) with_x2c = x2c.X2C(mol) with_x2c.get_xmat = lambda xmol: numpy.zeros((xmol.nao_2c(), xmol.nao_2c())) h1 = with_x2c.get_hcore() ref = mol.intor('int1e_nuc_spinor') - self.assertAlmostEqual(abs(h1 - ref).max(), 0, 12) + self.assertAlmostEqual(abs(h1 - ref).max(), 0, 11) def test_ghf(self): # Test whether the result of spinor X2C is a solution of .GHF().x2c() @@ -225,9 +245,20 @@ def test_ghf_atom(self): mf_atom1e = mol.GHF().x2c1e() mf_atom1e.with_x2c.approx = 'ATOM1E' mf_atom1e.kernel() - self.assertAlmostEqual(abs(mf_1e.e_tot - mf_atom1e.e_tot).max(), 0, 9) + self.assertAlmostEqual(mf_1e.e_tot, mf_atom1e.e_tot, 9) self.assertAlmostEqual(abs(mf_1e.mo_energy - mf_atom1e.mo_energy).max(), 0, 9) + with lib.temporary_env(lib.param, LIGHT_SPEED=15.): + mol = gto.M(atom='Ne 0 1 -1; Ne 0 8 8', basis='ccpvdz') + mf_1e = mol.GHF().x2c1e() + mf_1e.kernel() + mf_atom1e = mol.GHF().x2c1e() + mf_atom1e.with_x2c.approx = 'ATOM1E' + mf_atom1e.kernel() + self.assertAlmostEqual(mf_1e.e_tot, -267.39699993561, 8) + self.assertAlmostEqual(mf_1e.e_tot, mf_atom1e.e_tot, 8) + self.assertAlmostEqual(abs(mf_1e.mo_energy - mf_atom1e.mo_energy).max(), 0, 6) + def test_gks(self): mol = gto.M(atom='C', basis='ccpvdz-dk') ref = mol.DKS(xc='b3lyp').x2c().run() @@ -241,6 +272,17 @@ def test_gks(self): self.assertAlmostEqual(mf.e_tot, ref.e_tot, 9) self.assertAlmostEqual(abs(mf.dip_moment() - ref.dip_moment()).max(), 0, 9) + with lib.temporary_env(lib.param, LIGHT_SPEED=15.): + mol = gto.M(atom='Ne 0 1 -1; Ne 0 8 8', basis='ccpvdz') + mf_1e = mol.DKS().x2c1e() + mf_1e.kernel() + mf_atom1e = mol.DKS().x2c1e() + mf_atom1e.with_x2c.approx = 'ATOM1E' + mf_atom1e.kernel() + self.assertAlmostEqual(mf_1e.e_tot, -266.688128052731, 8) + self.assertAlmostEqual(mf_1e.e_tot, mf_atom1e.e_tot, 8) + self.assertAlmostEqual(abs(mf_1e.mo_energy - mf_atom1e.mo_energy).max(), 0, 6) + def test_undo_x2c(self): mf = mol.RHF().x2c().density_fit() self.assertEqual(mf.__class__.__name__, 'DFsfX2C1eRHF') diff --git a/pyscf/x2c/test/test_x2c_grad.py b/pyscf/x2c/test/test_x2c_grad.py index c823154ad8..2fe158d573 100644 --- a/pyscf/x2c/test/test_x2c_grad.py +++ b/pyscf/x2c/test/test_x2c_grad.py @@ -19,7 +19,7 @@ import scipy.linalg from pyscf import lib from pyscf import gto -from pyscf.x2c import sfx2c1e +from pyscf.x2c import x2c, sfx2c1e from pyscf.x2c import sfx2c1e_grad def _sqrt0(a): @@ -268,6 +268,47 @@ def test_hfw(self): fh = x2cobj.hcore_deriv_generator(deriv=1) self.assertAlmostEqual(abs(fh(0)[2] - fh_ref).max(), 0, 7) + def test_hcore(self): + with lib.light_speed(10) as c: + mol = gto.M( + verbose = 0, + atom = [["O" , (0. , 0. , 0.0001)], + [1 , (0. , -0.757 , 0.587)], + [1 , (0. , 0.757 , 0.587)]], + basis = '3-21g', + ) + h_1 = sfx2c1e.SpinFreeX2CHelper(mol).set(xuncontract=False).get_hcore() + ha_1 = sfx2c1e.SpinFreeX2CHelper(mol).set(xuncontract=False, approx='ATOM1E').get_hcore() + + mol = gto.M( + verbose = 0, + atom = [["O" , (0. , 0. ,-0.0001)], + [1 , (0. , -0.757 , 0.587)], + [1 , (0. , 0.757 , 0.587)]], + basis = '3-21g', + ) + h_2 = sfx2c1e.SpinFreeX2CHelper(mol).set(xuncontract=False).get_hcore() + ha_2 = sfx2c1e.SpinFreeX2CHelper(mol).set(xuncontract=False, approx='ATOM1E').get_hcore() + h_ref = (h_1 - h_2) / 0.0002 * lib.param.BOHR + ha_ref = (ha_1 - ha_2) / 0.0002 * lib.param.BOHR + + mol = gto.M( + verbose = 0, + atom = [["O" , (0. , 0. , 0. )], + [1 , (0. , -0.757 , 0.587)], + [1 , (0. , 0.757 , 0.587)]], + basis = '3-21g', + ) + hcore_deriv = sfx2c1e_grad.gen_sf_hfw(mol) + h1 = hcore_deriv(0) + self.assertAlmostEqual(abs(h1[2]-h_ref).max(), 0, 6) + + self.assertAlmostEqual(lib.fp(h1), -1.4618392662849411, 9) + hcore_deriv = sfx2c1e_grad.gen_sf_hfw(mol, approx='atom1e') + h1 = hcore_deriv(0) + self.assertAlmostEqual(abs(h1[2]-ha_ref).max(), 0, 6) + self.assertAlmostEqual(lib.fp(h1), -1.4802587171126063, 9) + if __name__ == "__main__": print("Full Tests for sfx2c1e gradients") unittest.main() diff --git a/pyscf/x2c/test/test_x2c_hess.py b/pyscf/x2c/test/test_x2c_hess.py index e1cc187f9e..06285640ce 100644 --- a/pyscf/x2c/test/test_x2c_hess.py +++ b/pyscf/x2c/test/test_x2c_hess.py @@ -637,6 +637,53 @@ def test_hfw2(self): h2_ref = (h1_deriv_1(1)[2] - h1_deriv_2(1)[2]) / 0.0002 * lib.param.BOHR self.assertAlmostEqual(abs(h2[2,2]-h2_ref).max(), 0, 7) + def test_hcore(self): + with lib.light_speed(10) as c: + mol = gto.M( + verbose = 0, + atom = [["O" , (0. , 0. , 0.0001)], + [1 , (0. , -0.757 , 0.587)], + [1 , (0. , 0.757 , 0.587)]], + basis = '3-21g', + ) + h1_deriv_1 = sfx2c1e_grad.gen_sf_hfw(mol, approx='1E') + ha1_deriv_1 = sfx2c1e_grad.gen_sf_hfw(mol, approx='ATOM1E') + + mol = gto.M( + verbose = 0, + atom = [["O" , (0. , 0. ,-0.0001)], + [1 , (0. , -0.757 , 0.587)], + [1 , (0. , 0.757 , 0.587)]], + basis = '3-21g', + ) + h1_deriv_2 = sfx2c1e_grad.gen_sf_hfw(mol, approx='1E') + ha1_deriv_2 = sfx2c1e_grad.gen_sf_hfw(mol, approx='ATOM1E') + + mol = gto.M( + verbose = 0, + atom = [["O" , (0. , 0. , 0. )], + [1 , (0. , -0.757 , 0.587)], + [1 , (0. , 0.757 , 0.587)]], + basis = '3-21g', + ) + h2_deriv = sfx2c1e_hess.gen_sf_hfw(mol) + ha2_deriv = sfx2c1e_hess.gen_sf_hfw(mol, approx='ATOM1E') + + h2 = h2_deriv(0,0) + h2_ref = (h1_deriv_1(0)[2] - h1_deriv_2(0)[2]) / 0.0002 * lib.param.BOHR + self.assertAlmostEqual(abs(h2[2,2]-h2_ref).max(), 0, 6) + self.assertAlmostEqual(lib.fp(h2), 33.71188112440316, 9) + + h2 = h2_deriv(1,0) + h2_ref = (h1_deriv_1(1)[2] - h1_deriv_2(1)[2]) / 0.0002 * lib.param.BOHR + self.assertAlmostEqual(abs(h2[2,2]-h2_ref).max(), 0, 6) + self.assertAlmostEqual(lib.fp(h2), -23.609411428378138, 7) + + h2 = ha2_deriv(0,0) + h2_ref = (ha1_deriv_1(0)[2] - ha1_deriv_2(0)[2]) / 0.0002 * lib.param.BOHR + self.assertAlmostEqual(abs(h2[2,2]-h2_ref).max(), 0, 6) + self.assertAlmostEqual(lib.fp(h2), 33.718665748856324, 9) + if __name__ == "__main__": print("Full Tests for sfx2c1e gradients") diff --git a/pyscf/x2c/x2c.py b/pyscf/x2c/x2c.py index d2bb85526c..408ac0736c 100644 --- a/pyscf/x2c/x2c.py +++ b/pyscf/x2c/x2c.py @@ -91,19 +91,7 @@ def get_hcore(self, mol=None): h1 = _get_hcore_fw(t, v, w, s, x, c) elif 'ATOM' in self.approx.upper(): - atom_slices = xmol.offset_2c_by_atom() - n2c = xmol.nao_2c() - x = numpy.zeros((n2c,n2c), dtype=numpy.complex128) - for ia in range(xmol.natm): - ish0, ish1, p0, p1 = atom_slices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - s1 = xmol.intor('int1e_ovlp_spinor', shls_slice=shls_slice) - t1 = xmol.intor('int1e_spsp_spinor', shls_slice=shls_slice) * .5 - with xmol.with_rinv_at_nucleus(ia): - z = -xmol.atom_charge(ia) - v1 = z*xmol.intor('int1e_rinv_spinor', shls_slice=shls_slice) - w1 = z*xmol.intor('int1e_sprinvsp_spinor', shls_slice=shls_slice) - x[p0:p1,p0:p1] = _x2c1e_xmatrix(t1, v1, w1, s1, c) + x = _spinor_atomic_1e_x(xmol) h1 = _get_hcore_fw(t, v, w, s, x, c) else: @@ -239,19 +227,7 @@ def get_xmat(self, mol=None): assert ('1E' in self.approx.upper()) if 'ATOM' in self.approx.upper(): - atom_slices = xmol.offset_2c_by_atom() - n2c = xmol.nao_2c() - x = numpy.zeros((n2c,n2c), dtype=numpy.complex128) - for ia in range(xmol.natm): - ish0, ish1, p0, p1 = atom_slices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - s1 = xmol.intor('int1e_ovlp_spinor', shls_slice=shls_slice) - t1 = xmol.intor('int1e_spsp_spinor', shls_slice=shls_slice) * .5 - with xmol.with_rinv_at_nucleus(ia): - z = -xmol.atom_charge(ia) - v1 = z*xmol.intor('int1e_rinv_spinor', shls_slice=shls_slice) - w1 = z*xmol.intor('int1e_sprinvsp_spinor', shls_slice=shls_slice) - x[p0:p1,p0:p1] = _x2c1e_xmatrix(t1, v1, w1, s1, c) + x = _spinor_atomic_1e_x(xmol) else: s = xmol.intor_symmetric('int1e_ovlp_spinor') t = xmol.intor_symmetric('int1e_spsp_spinor') * .5 @@ -277,6 +253,8 @@ def reset(self, mol=None): self.mol = mol return self + to_gpu = lib.to_gpu + class SpinorX2CHelper(X2CHelperBase): '''2-component X2c (including spin-free and spin-dependent terms) in the j-adapted spinor basis. @@ -309,21 +287,7 @@ def get_hcore(self, mol=None): h1 = _get_hcore_fw(t, v, w, s, x, c) elif 'ATOM' in self.approx.upper(): - atom_slices = xmol.offset_nr_by_atom() - # spin-orbital basis is twice the size of NR basis - atom_slices[:,2:] *= 2 - nao = xmol.nao_nr() * 2 - x = numpy.zeros((nao,nao), dtype=numpy.complex128) - for ia in range(xmol.natm): - ish0, ish1, p0, p1 = atom_slices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - t1 = _block_diag(xmol.intor('int1e_kin', shls_slice=shls_slice)) - s1 = _block_diag(xmol.intor('int1e_ovlp', shls_slice=shls_slice)) - with xmol.with_rinv_at_nucleus(ia): - z = -xmol.atom_charge(ia) - v1 = _block_diag(z * xmol.intor('int1e_rinv', shls_slice=shls_slice)) - w1 = _sigma_dot(z * xmol.intor('int1e_sprinvsp', shls_slice=shls_slice)) - x[p0:p1,p0:p1] = _x2c1e_xmatrix(t1, v1, w1, s1, c) + x = _spin_orbital_atomic_1e_x(xmol) h1 = _get_hcore_fw(t, v, w, s, x, c) else: @@ -371,21 +335,7 @@ def get_xmat(self, mol=None): assert ('1E' in self.approx.upper()) if 'ATOM' in self.approx.upper(): - atom_slices = xmol.offset_nr_by_atom() - # spin-orbital basis is twice the size of NR basis - atom_slices[:,2:] *= 2 - nao = xmol.nao_nr() * 2 - x = numpy.zeros((nao,nao), dtype=numpy.complex128) - for ia in range(xmol.natm): - ish0, ish1, p0, p1 = atom_slices[ia] - shls_slice = (ish0, ish1, ish0, ish1) - t1 = _block_diag(xmol.intor('int1e_kin', shls_slice=shls_slice)) - s1 = _block_diag(xmol.intor('int1e_ovlp', shls_slice=shls_slice)) - with xmol.with_rinv_at_nucleus(ia): - z = -xmol.atom_charge(ia) - v1 = _block_diag(z * xmol.intor('int1e_rinv', shls_slice=shls_slice)) - w1 = _sigma_dot(z * xmol.intor('int1e_sprinvsp', shls_slice=shls_slice)) - x[p0:p1,p0:p1] = _x2c1e_xmatrix(t1, v1, w1, s1, c) + x = _spin_orbital_atomic_1e_x(xmol) else: t = _block_diag(xmol.intor_symmetric('int1e_kin')) v = _block_diag(xmol.intor_symmetric('int1e_nuc')) @@ -673,8 +623,31 @@ def __init__(self, mol): if dhf.zquatev is None: raise RuntimeError('zquatev library is required to perform Kramers-restricted X2C-RHF') - def _eigh(self, h, s): - return dhf.zquatev.solve_KR_FCSCE(self.mol, h, s) + def check_linear_dependency(self, s, verbose=None): + log = logger.new_logger(self, verbose) + idx = dhf._kramers_pair_sort_ao_idx(self.mol, four_component=False) + s = s[idx[:,None], idx] + e, v = dhf.zquatev.eigh(s) + if log is not None: + abs_e = abs(e) + emax = abs_e.max() + emin = abs_e.min() + c = emax / emin + log.debug('cond(S) = %s', c) + if c > 1e10: + log.warn('Singularity detected in the overlap matrix. ' + 'SCF may be inaccurate and difficult to converge.') + + if hf.remove_overlap_zero_eigenvalue: + mask = e > hf.overlap_zero_eigenvalue_threshold + x = v[:,mask] / numpy.sqrt(e[mask]) + else: + x = v / numpy.sqrt(e) + x1 = numpy.empty_like(x) + x1[idx] = x + return x1 + + _eigh = dhf.RDHF._eigh def to_ks(self, xc='HF'): '''Convert the input mean-field object to an X2C-KS object. @@ -824,12 +797,12 @@ def _uncontract_mol(mol, xuncontract=None, exp_drop=0.2): def _sqrt(a, tol=1e-14): - e, v = numpy.linalg.eigh(a) + e, v = scipy.linalg.eigh(a) idx = e > tol return numpy.dot(v[:,idx]*numpy.sqrt(e[idx]), v[:,idx].T.conj()) def _invsqrt(a, tol=1e-14): - e, v = numpy.linalg.eigh(a) + e, v = scipy.linalg.eigh(a) idx = e > tol return numpy.dot(v[:,idx]/numpy.sqrt(e[idx]), v[:,idx].T.conj()) @@ -850,7 +823,7 @@ def _get_hcore_fw(t, v, w, s, x, c): def _get_r(s, snesc): # R^dag \tilde{S} R = S # R = S^{-1/2} [S^{-1/2}\tilde{S}S^{-1/2}]^{-1/2} S^{1/2} - w, v = numpy.linalg.eigh(s) + w, v = scipy.linalg.eigh(s) idx = w > 1e-14 v = v[:,idx] w_sqrt = numpy.sqrt(w[idx]) @@ -859,7 +832,7 @@ def _get_r(s, snesc): # eigenvectors of S as the new basis snesc = reduce(numpy.dot, (v.conj().T, snesc, v)) r_mid = numpy.einsum('i,ij,j->ij', w_invsqrt, snesc, w_invsqrt) - w1, v1 = numpy.linalg.eigh(r_mid) + w1, v1 = scipy.linalg.eigh(r_mid) idx1 = w1 > 1e-14 v1 = v1[:,idx1] r_mid = numpy.dot(v1/numpy.sqrt(w1[idx1]), v1.conj().T) @@ -885,19 +858,20 @@ def _x2c1e_xmatrix(t, v, w, s, c): e, a = scipy.linalg.eigh(h, m) cl = a[:nao,nao:] cs = a[nao:,nao:] - x = numpy.linalg.solve(cl.T, cs.T).T # B = XA + x = scipy.linalg.solve(cl.T, cs.T).T # B = XA except scipy.linalg.LinAlgError: - d, t = numpy.linalg.eigh(m) + d, t = scipy.linalg.eigh(m) idx = d>LINEAR_DEP_THRESHOLD t = t[:,idx] / numpy.sqrt(d[idx]) tht = reduce(numpy.dot, (t.T.conj(), h, t)) - e, a = numpy.linalg.eigh(tht) + e, a = scipy.linalg.eigh(tht) a = numpy.dot(t, a) idx = e > -c**2 cl = a[:nao,idx] cs = a[nao:,idx] - # X = B A^{-1} = B A^T S - x = cs.dot(cl.conj().T).dot(m) + # X = B A^{-1} = B (A^T A)^{-1} A^T + cl_inv = scipy.linalg.solve(cl.conj().T.dot(cl), cl.conj().T) + x = cs.dot(cl_inv) return x def _x2c1e_get_hcore(t, v, w, s, c): @@ -919,11 +893,11 @@ def _x2c1e_get_hcore(t, v, w, s, c): # cs = a[nao:,nao:] e = e[nao:] except scipy.linalg.LinAlgError: - d, t = numpy.linalg.eigh(m) + d, t = scipy.linalg.eigh(m) idx = d>LINEAR_DEP_THRESHOLD t = t[:,idx] / numpy.sqrt(d[idx]) tht = reduce(numpy.dot, (t.T.conj(), h, t)) - e, a = numpy.linalg.eigh(tht) + e, a = scipy.linalg.eigh(tht) a = numpy.dot(t, a) idx = e > -c**2 cl = a[:nao,idx] @@ -959,7 +933,7 @@ def _x2c1e_get_hcore(t, v, w, s, c): # = S A R[A]^{-1}^+ A^+ h1 A R[A]^{-1} A^+ S # = S A R[A]^{-1}^+ e R[A]^{-1} A^+ S (2) - w, u = numpy.linalg.eigh(reduce(numpy.dot, (cl.T.conj(), s, cl))) + w, u = scipy.linalg.eigh(reduce(numpy.dot, (cl.T.conj(), s, cl))) idx = w > 1e-14 # Adopt (2) here because X is not appeared in Eq (2). # R[A] = u w^{1/2} u^+, so R[A]^{-1} A^+ S in Eq (2) is @@ -1076,3 +1050,55 @@ def _decontract_spinor(mol, atoms=None): pmol._env = numpy.hstack(env) contr_coeff = scipy.linalg.block_diag(*contr_coeff) return pmol, contr_coeff + +def _atoms_in_mole(mol): + atoms = {} + for i in range(mol.natm): + symb = mol.atom_symbol(i) + if symb not in atoms: + atoms[symb] = atom = mol.copy(deep=False) + mask = mol._bas[:,mole.ATOM_OF] == i + atom._bas = mol._bas[mask] + atom._atm = mol._atm[i:i+1] + atom._bas[:,mole.ATOM_OF] = 0 + return atoms + +def _spin_orbital_atomic_1e_x(mol): + atoms = _atoms_in_mole(mol) + x_conf = {} + c = lib.param.LIGHT_SPEED + for elem, atom in atoms.items(): + t1 = _block_diag(atom.intor_symmetric('int1e_kin')) + s1 = _block_diag(atom.intor_symmetric('int1e_ovlp')) + v1 = _block_diag(atom.intor_symmetric('int1e_nuc')) + w1 = _sigma_dot(atom.intor('int1e_spnucsp')) + x_conf[elem] = _x2c1e_xmatrix(t1, v1, w1, s1, c) + + atom_slices = mol.offset_nr_by_atom() + nao = mol.nao_nr() + x = numpy.zeros((2, nao, 2, nao), dtype=numpy.complex128) + for ia in range(mol.natm): + p0, p1 = atom_slices[ia, 2:] + elem = mol.atom_symbol(ia) + x[:,p0:p1,:,p0:p1] = x_conf[elem].reshape(2, p1-p0, 2, p1-p0) + return x.reshape(nao*2, nao*2) + +def _spinor_atomic_1e_x(mol): + atoms = _atoms_in_mole(mol) + x_conf = {} + c = lib.param.LIGHT_SPEED + for elem, atom in atoms.items(): + t1 = atom.intor_symmetric('int1e_kin_spinor') + s1 = atom.intor_symmetric('int1e_ovlp_spinor') + v1 = atom.intor_symmetric('int1e_nuc_spinor') + w1 = atom.intor_symmetric('int1e_spnucsp_spinor') + x_conf[elem] = _x2c1e_xmatrix(t1, v1, w1, s1, c) + + atom_slices = mol.offset_2c_by_atom() + nao = mol.nao_2c() + x = numpy.zeros((nao, nao), dtype=numpy.complex128) + for ia in range(mol.natm): + p0, p1 = atom_slices[ia, 2:] + elem = mol.atom_symbol(ia) + x[p0:p1,p0:p1] = x_conf[elem] + return x