Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/common/include/acc_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@
& copyout_val.strip('\n') + create_val.strip('\n') + &
& no_create_val.strip('\n') + present_val.strip('\n') + &
& deviceptr_val.strip('\n') + attach_val.strip('\n')
#:set acc_directive = '!$acc parallel ' + &
& acc_clause_val + extraAccArgs_val.strip('\n')
#:set acc_directive = FOLD_DIRECTIVE('!$acc parallel ' + &
& acc_clause_val + extraAccArgs_val.strip('\n'), '!$acc').strip('\n')
#:set end_acc_directive = '!$acc end parallel'
$:acc_directive
$:code
Expand Down Expand Up @@ -153,8 +153,8 @@
& copyout_val.strip('\n') + create_val.strip('\n') + &
& no_create_val.strip('\n') + present_val.strip('\n') + &
& deviceptr_val.strip('\n') + attach_val.strip('\n')
#:set acc_directive = '!$acc parallel loop ' + &
& clause_val + extraAccArgs_val.strip('\n')
#:set acc_directive = FOLD_DIRECTIVE('!$acc parallel loop ' + &
& clause_val + extraAccArgs_val.strip('\n'), '!$acc').strip('\n')
$:acc_directive
#:enddef

Expand Down
4 changes: 2 additions & 2 deletions src/common/include/omp_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@
& deviceptr_val.strip('\n') + attach_val.strip('\n')

#:set omp_clause_val = omp_clause_val.strip('\n')
#:set omp_directive = '!$omp target teams ' + omp_clause_val + extraOmpArgs_val.strip('\n')
#:set omp_directive = FOLD_DIRECTIVE('!$omp target teams ' + omp_clause_val + extraOmpArgs_val.strip('\n'), '!$omp').strip('\n')

#:set omp_end_directive = '!$omp end target teams'
$:omp_directive
Expand Down Expand Up @@ -186,7 +186,7 @@
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
#:endif

#:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
#:set omp_directive = FOLD_DIRECTIVE(omp_start_directive + clause_val + extraOmpArgs_val.strip('\n'), '!$omp').strip('\n')
$:omp_directive
#:enddef

Expand Down
25 changes: 25 additions & 0 deletions src/common/include/shared_parallel_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,29 @@
#:endif
$:extraArgs_val
#:enddef

#:def FOLD_DIRECTIVE(directive, sentinel, width=200)
#! Fold a long GPU directive across free-form continuation lines so it stays
#! under nvfortran's ~1000-char source-line limit. Breaks only at whole-clause
#! boundaries (clause(args) groups and bare keywords), repeating the sentinel
#! (e.g. '!$acc&') on each continuation -- which fypp's --no-folding cannot do
#! because its generic folder omits the sentinel. Every emitted line is no
#! longer than the prefix plus the single longest clause, i.e. no longer than
#! the unfolded line a build with one fewer clause already compiles.
#:set _toks = re.findall(r'\w+\([^)]*\)|\S+', directive)
#:set _lines = []
#:set _cur = ''
#:for _t in _toks
#:if _cur == ''
#:set _cur = _t
#:elif len(_cur) + 1 + len(_t) > width
#:set _lines = _lines + [_cur + ' &']
#:set _cur = sentinel + '& ' + _t
#:else
#:set _cur = _cur + ' ' + _t
#:endif
#:endfor
#:set _lines = _lines + [_cur]
$:'\n'.join(_lines)
#:enddef
! New line at end of file is required for FYPP
19 changes: 11 additions & 8 deletions src/simulation/m_riemann_solver_hll.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,23 @@ contains
real(wp) :: vel_L_tmp, vel_R_tmp
real(wp) :: Ms_L, Ms_R, pres_SL, pres_SR
real(wp) :: alpha_L_sum, alpha_R_sum
real(wp) :: zcoef, pcorr !< low Mach number correction
real(wp) :: zcoef, pcorr !< low Mach number correction
type(riemann_states) :: c_fast, pres_mag
type(riemann_states_vec3) :: B
type(riemann_states) :: Ga !< Gamma (Lorentz factor)
type(riemann_states) :: Ga !< Gamma (Lorentz factor)
type(riemann_states) :: vdotB, B2
type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z)
type(riemann_states_vec3) :: cm !< Conservative momentum variables
type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z)
type(riemann_states_vec3) :: cm !< Conservative momentum variables
integer :: i, j, k, l, q !< Generic loop iterators
integer, dimension(2) :: Re_size_loc !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU
! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions

call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, &
& qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz)

! Reshaping inputted data based on dimensional splitting direction
call s_initialize_riemann_solver(flux_src_vf, norm_dir)
Re_size_loc = Re_size
#:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in &
[(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), &
(2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2', 'is1', 'is3'), &
Expand All @@ -119,7 +121,8 @@ contains
& Y_L, Y_R, MW_L, MW_R, R_gas_L, R_gas_R, Cp_L, Cp_R, Cv_L, Cv_R, Gamm_L, Gamm_R, gamma_L, &
& gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, &
& gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, Ms_L, Ms_R, pres_SL, &
& pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]')
& pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R, s_M, s_P, xi_M, xi_P]', &
& copyin='[norm_dir]', firstprivate='[Re_size_loc]')
do l = ${Z_BND}$%beg, ${Z_BND}$%end
do k = ${Y_BND}$%beg, ${Y_BND}$%end
do j = ${X_BND}$%beg, ${X_BND}$%end
Expand Down Expand Up @@ -216,11 +219,11 @@ contains
Re_L(i) = dflt_real
Re_R(i) = dflt_real

if (Re_size(i) > 0) Re_L(i) = 0._wp
if (Re_size(i) > 0) Re_R(i) = 0._wp
if (Re_size_loc(i) > 0) Re_L(i) = 0._wp
if (Re_size_loc(i) > 0) Re_R(i) = 0._wp

$:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
do q = 1, Re_size_loc(i)
Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
end do
Expand Down
42 changes: 22 additions & 20 deletions src/simulation/m_riemann_solver_hllc.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,15 @@ contains
#:else
real(wp), dimension(num_dims) :: xi_field_L, xi_field_R
#:endif
real(wp) :: G_L, G_R
real(wp) :: vel_L_rms, vel_R_rms, vel_avg_rms
real(wp) :: vel_L_tmp, vel_R_tmp
real(wp) :: rho_Star, E_Star, p_Star, p_K_Star, vel_K_star
real(wp) :: pres_SL, pres_SR, Ms_L, Ms_R
real(wp) :: flux_ene_e
real(wp) :: zcoef, pcorr !< low Mach number correction
integer :: Re_max, i, j, k, l, q !< Generic loop iterators
real(wp) :: G_L, G_R
real(wp) :: vel_L_rms, vel_R_rms, vel_avg_rms
real(wp) :: vel_L_tmp, vel_R_tmp
real(wp) :: rho_Star, E_Star, p_Star, p_K_Star, vel_K_star
real(wp) :: pres_SL, pres_SR, Ms_L, Ms_R
real(wp) :: flux_ene_e
real(wp) :: zcoef, pcorr !< low Mach number correction
integer :: Re_max, i, j, k, l, q !< Generic loop iterators
integer, dimension(2) :: Re_size_loc !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU
! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions

call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, &
Expand All @@ -127,6 +128,7 @@ contains
! Reshaping inputted data based on dimensional splitting direction

call s_initialize_riemann_solver(flux_src_vf, norm_dir)
Re_size_loc = Re_size

#:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in &
[(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), &
Expand All @@ -147,7 +149,7 @@ contains
& rho_avg, H_avg, c_avg, gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, &
& vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, &
& alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, &
& xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]')
& xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]', firstprivate='[Re_size_loc]')
do l = ${Z_BND}$%beg, ${Z_BND}$%end
do k = ${Y_BND}$%beg, ${Y_BND}$%end
do j = ${X_BND}$%beg, ${X_BND}$%end
Expand Down Expand Up @@ -229,10 +231,10 @@ contains
do i = 1, 2
Re_L(i) = dflt_real
Re_R(i) = dflt_real
if (Re_size(i) > 0) Re_L(i) = 0._wp
if (Re_size(i) > 0) Re_R(i) = 0._wp
if (Re_size_loc(i) > 0) Re_L(i) = 0._wp
if (Re_size_loc(i) > 0) Re_R(i) = 0._wp
$:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
do q = 1, Re_size_loc(i)
Re_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
Re_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, &
& q) + Re_R(i)
Expand Down Expand Up @@ -782,7 +784,7 @@ contains
& Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, s_L, s_R, s_M, s_P, s_S, xi_M, &
& xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, &
& R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, &
& Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]')
& Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]', firstprivate='[Re_size_loc]')
do l = ${Z_BND}$%beg, ${Z_BND}$%end
do k = ${Y_BND}$%beg, ${Y_BND}$%end
do j = ${X_BND}$%beg, ${X_BND}$%end
Expand Down Expand Up @@ -851,11 +853,11 @@ contains
Re_L(i) = dflt_real
Re_R(i) = dflt_real

if (Re_size(i) > 0) Re_L(i) = 0._wp
if (Re_size(i) > 0) Re_R(i) = 0._wp
if (Re_size_loc(i) > 0) Re_L(i) = 0._wp
if (Re_size_loc(i) > 0) Re_R(i) = 0._wp

$:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
do q = 1, Re_size_loc(i)
Re_L(i) = (1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, &
& q)))/Res_gs(i, q) + Re_L(i)
Re_R(i) = (1._wp - qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, &
Expand Down Expand Up @@ -1179,7 +1181,7 @@ contains
& vel_R, Re_L, Re_R, alpha_L, alpha_R, s_L, s_R, s_S, vel_avg_rms, pcorr, zcoef, &
& vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, &
& tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, G_L, &
& G_R]', copyin='[is1, is2, is3]')
& G_R, c_sum_Yi_Phi, flux_ene_e]', copyin='[is1, is2, is3]', firstprivate='[Re_size_loc]')
do l = ${Z_BND}$%beg, ${Z_BND}$%end
do k = ${Y_BND}$%beg, ${Y_BND}$%end
do j = ${X_BND}$%beg, ${X_BND}$%end
Expand Down Expand Up @@ -1244,8 +1246,8 @@ contains
end do

Re_max = 0
if (Re_size(1) > 0) Re_max = 1
if (Re_size(2) > 0) Re_max = 2
if (Re_size_loc(1) > 0) Re_max = 1
if (Re_size_loc(2) > 0) Re_max = 2

if (viscous) then
$:GPU_LOOP(parallelism='[seq]')
Expand All @@ -1254,7 +1256,7 @@ contains
Re_R(i) = 0._wp

$:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
do q = 1, Re_size_loc(i)
Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
end do
Expand Down
29 changes: 16 additions & 13 deletions src/simulation/m_riemann_solver_lf.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,24 @@ contains
real(wp) :: vel_L_tmp, vel_R_tmp
real(wp) :: Ms_L, Ms_R, pres_SL, pres_SR
real(wp) :: alpha_L_sum, alpha_R_sum
real(wp) :: zcoef, pcorr !< low Mach number correction
real(wp) :: zcoef, pcorr !< low Mach number correction
type(riemann_states) :: c_fast, pres_mag
type(riemann_states_vec3) :: B
type(riemann_states) :: Ga !< Gamma (Lorentz factor)
type(riemann_states) :: Ga !< Gamma (Lorentz factor)
type(riemann_states) :: vdotB, B2
type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z)
type(riemann_states_vec3) :: cm !< Conservative momentum variables
integer :: i, j, k, l, q !< Generic loop iterators
type(riemann_states_vec3) :: b4 !< 4-magnetic field components (spatial: b4x, b4y, b4z)
type(riemann_states_vec3) :: cm !< Conservative momentum variables
integer :: i, j, k, l, q !< Generic loop iterators
integer, dimension(3) :: idx_right_phys !< Physical (j,k,l) indices for right state.
integer, dimension(2) :: Re_size_loc !< host copy of Re_size; amdflang reads the declare-target original stale cross-TU
! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions

call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, &
& qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz)

! Reshaping inputted data based on dimensional splitting direction
call s_initialize_riemann_solver(flux_src_vf, norm_dir)
Re_size_loc = Re_size
#:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in &
[(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), &
(2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2', 'is1', 'is3'), &
Expand All @@ -119,7 +121,8 @@ contains
& vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, alpha_R_sum, c_avg, pres_L, pres_R, &
& rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, &
& H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, &
& Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]')
& Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R, Gamm_L, Gamm_R, &
& flux_tau_L, flux_tau_R]', firstprivate='[Re_size_loc]')
do l = ${Z_BND}$%beg, ${Z_BND}$%end
do k = ${Y_BND}$%beg, ${Y_BND}$%end
do j = ${X_BND}$%beg, ${X_BND}$%end
Expand Down Expand Up @@ -216,11 +219,11 @@ contains
Re_L(i) = dflt_real
Re_R(i) = dflt_real

if (Re_size(i) > 0) Re_L(i) = 0._wp
if (Re_size(i) > 0) Re_R(i) = 0._wp
if (Re_size_loc(i) > 0) Re_L(i) = 0._wp
if (Re_size_loc(i) > 0) Re_R(i) = 0._wp

$:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
do q = 1, Re_size_loc(i)
Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
end do
Expand Down Expand Up @@ -599,7 +602,7 @@ contains

if (viscous) then
$:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, &
& vel_L, vel_R, Re_L, Re_R]', copyin='[norm_dir]')
& vel_L, vel_R, Re_L, Re_R]', copyin='[norm_dir]', firstprivate='[Re_size_loc]')
do l = isz%beg, isz%end
do k = isy%beg, isy%end
do j = isx%beg, isx%end
Expand Down Expand Up @@ -650,11 +653,11 @@ contains
Re_L(i) = dflt_real
Re_R(i) = dflt_real

if (Re_size(i) > 0) Re_L(i) = 0._wp
if (Re_size(i) > 0) Re_R(i) = 0._wp
if (Re_size_loc(i) > 0) Re_L(i) = 0._wp
if (Re_size_loc(i) > 0) Re_R(i) = 0._wp

$:GPU_LOOP(parallelism='[seq]')
do q = 1, Re_size(i)
do q = 1, Re_size_loc(i)
Re_L(i) = alpha_L(Re_idx(i, q))/Res_gs(i, q) + Re_L(i)
Re_R(i) = alpha_R(Re_idx(i, q))/Res_gs(i, q) + Re_R(i)
end do
Expand Down
Loading