!-------------------------------------- LICENCE BEGIN ------------------------------------ !Environment Canada - Atmospheric Science and Technology License/Disclaimer, ! version 3; Last Modified: May 7, 2008. !This is free but copyrighted software; you can use/redistribute/modify it under the terms !of the Environment Canada - Atmospheric Science and Technology License/Disclaimer !version 3 or (at your option) any later version that should be found at: !http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html ! !This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; !without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. !See the above mentioned License/Disclaimer for more details. !You should have received a copy of the License/Disclaimer along with this software; !if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), !CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca !-------------------------------------- LICENCE END -------------------------------------- ***s/r hzd_solmxma - parallel direct solution of high-order * diffusion equation with mxma8 #include "model_macros_f.h"*
subroutine hzd_solmxma (F_sol, F_Rhs_8, F_Xevec_8 , 42 % F_a_8, F_c_8 ,F_deltai_8, % minx1, maxx1, minx2, maxx2, nx1, nx2, nx3, F_pwr, % Minx,Maxx,Miny,Maxy,Gnk,Gni,nil,njl,nkl, % F_opsxp0_8, F_opsyp0_8,F_cdiff,F_npex,F_npey) * implicit none * integer minx1, maxx1, minx2, maxx2, nx1, nx2, nx3 , $ Minx , Maxx , Miny , Maxy , Gnk, Gni, F_pwr, $ njl , nkl , nil , F_npex, F_npey real*8 F_opsxp0_8(*), F_opsyp0_8(*), $ F_a_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3), $ F_c_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3), $ F_deltai_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3), $ F_Rhs_8(Minx:Maxx,Miny:Maxy,Gnk),F_Xevec_8(*) real F_cdiff, F_sol(Minx:Maxx,Miny:Maxy,Gnk) * *author * Abdessamad Qaddouri * *revision * v2_10 - Qaddouri A. - initial version * v3_00 - Desgagne & Lee - Lam configuration * v3_10 - Corbeil & Desgagne & Lee - AIXport+Opti+OpenMP * v3_11 - Corbeil L. - new RPNCOMM transpose * *object * *arguments * Name I/O Description *---------------------------------------------------------------- * F_sol_8 I/O - pre-r.h.s and result of horizontal diffusion * F_rhs_8 I - r.h.s. of elliptic equation and work vector * F_Xevec_8 I - eigenvectors * F_a_8 I - sub diagonal of LU factorization * F_b_8 I - diagonal of LU factorization * F_c_8 I - super diagonal of LU factorization * F_deltai_8 I - * minx1 I - minimum index on local PEx for K (trp_12dmin) * maxx1 I - maximum index on local PEx for K (trp_12dmax) * minx2 I - minimum index on local PEy for I (trp_22min) * maxx2 I - maximum index on local PEy for I (trp_22max) * Nx1 I - number of points on local PEx for K (trp_12dn) * Nx2 I - number of points on local PEy for I (trp_22n) * Nx3 I - number of points along Y globally (G_nj) * F_pwr I - power (high order) of factorization * Minx I - minimum index on X for Rhs,Sol * Maxx I - maximum index on X for Rhs,Sol * Miny I - minimum index on Y for Rhs,Sol * Maxy I - maximum index on Y for Rhs,Sol * Gnk I - number of points along K globally (G_nk) * Gni I - number of points along I globally (G_ni) * Nil I - number of points on local PEx for I (l_ni) * Njl I - number of points on local PEy for J (l_nj) * Nkl I - number of points on local PEx for K (trp_12dn) * F_opsxp0_8 I - east-west operator for HO diffusion solver:Hzd_xp0_8 * F_opsyp0_8 I - north-south operator for HO diffusion solver:Hzd_yp0_8 * F_cdiff I - Hzd_cdiff * F_npex1 I - number of processors on X * F_npey1 I - number of processors on Y * #include "ptopo.cdk"
#include "glb_ld.cdk"
#include "glb_pil.cdk"
* real*8, dimension(Miny:Maxy ,minx1:maxx1,Gni+F_npex) :: fdg1_8 real*8, dimension(Miny:Maxy ,minx1:maxx1,Gni) :: fwft_8 real*8, dimension(minx1:maxx1,minx2:maxx2,nx3+F_npey) :: fdg2_8 real*8, dimension(minx1:maxx1,minx2:maxx2,F_pwr,nx3) ::dn3_8,sol_8 real*8 ZERO_8, fact_8 parameter( ZERO_8 = 0.0 ) integer dim, o1,o2,i,j,k,l_pil_w,l_pil_e integer kkii, kilon, ki0, kin, pin ** * __________________________________________________________________ * * c call tmg_start(99,'hzd_solmxma') l_pil_w=0 l_pil_e=0 if (l_south) l_pil_w= Lam_pil_w if (l_north) l_pil_e= Lam_pil_e * * kilon = ( maxx2-l_pil_e-minx2+l_pil_w+1 + Ptopo_npeOpenMP)/Ptopo_npeOpenMP if (G_lam) then do k = 1, Gnk do j = 1+pil_s, njl-pil_n do i = 1+pil_w,nil-pil_e F_Rhs_8(i,j,k)= ((-1)**F_pwr)*dble(F_cdiff)*dble(F_sol(i,j,k)) enddo enddo enddo call rpn_comm_transpose ( F_Rhs_8, Minx, Maxx, Gni, (Maxy-Miny+1), % minx1, maxx1, Gnk, fdg1_8, 1,2 ) else fact_8=((-1)**F_pwr)*dble(F_cdiff) call rpn_comm_transpose48 ( F_sol, Minx, Maxx, Gni,1, (Maxy-Miny+1), % (Maxy-Miny+1),minx1, maxx1, Gnk, fdg1_8,1, % fact_8,0.d0) endif !$omp parallel private(kkii,ki0,kin,pin) shared(kilon) !$omp do do i = 1+Lam_pil_w, Gni-Lam_pil_e do k = 1, nkl do j = 1+pil_s, njl-pil_n fdg1_8(j,k,i) = F_opsxp0_8(Gni+i)*fdg1_8(j,k,i) enddo enddo enddo !$omp enddo * !$omp do do i = 1, Gni do k = minx1, maxx1 do j = Miny, 0+pil_s fdg1_8(j,k,i)= ZERO_8 enddo do j = njl+1-pil_n, Maxy fdg1_8(j,k,i)= ZERO_8 enddo do j = Miny , Maxy fwft_8(j,k,i)=ZERO_8 enddo enddo enddo !$omp enddo * * projection ( wfft = x transposed * g ) * c do k=1,Nkl c call mxma8( F_Xevec_8,Gni-Lam_pil_w-Lam_pil_e, 1, c % fdg1_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1) * (maxx1-minx1+1), 1, c % fwft_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1) * (maxx1-minx1+1), 1, c % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, c % (Maxy-Miny+1-pil_s-pil_n)) c enddo * !$omp do do k=1,Nkl call dgemm('N','N',(Maxy-Miny+1-pil_s-pil_n), . Gni-Lam_pil_w-Lam_pil_e, . Gni-Lam_pil_w-Lam_pil_e, . 1._8, fdg1_8(Miny+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1) * (maxx1-minx1+1),F_Xevec_8, . Gni-Lam_pil_w-Lam_pil_e, . 0._8, fwft_8(Miny+pil_s,k,1+Lam_pil_w), . (maxy-miny+1) * (maxx1-minx1+1)) enddo !$omp enddo !$omp single call rpn_comm_transpose(fwft_8,Miny,Maxy,nx3,(maxx1-minx1+1), % minx2, maxx2,Gni,fdg2_8,2,2) !$omp end single * * cote droit * !$omp do do j = 1, nx3 do o1 = 1, F_pwr do i = minx2, maxx2 do k = minx1, maxx1 sol_8(k,i,o1,j)= ZERO_8 dn3_8(k,i,o1,j)= ZERO_8 enddo enddo enddo enddo !$omp enddo * !$omp do do j = 1+Lam_pil_s, nx3-Lam_pil_n do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 dn3_8(k,i,1,j)= F_opsyp0_8(nx3+j)*fdg2_8(k,i,j) enddo enddo enddo !$omp enddo * * resolution du systeme blok-tridiagonal * * aller !$omp do do o1= 1, F_pwr do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 sol_8(k,i,o1,1+Lam_pil_s)=dn3_8(k,i,o1,1+Lam_pil_s) enddo enddo enddo !$omp enddo * !$omp do do kkii = 1, Ptopo_npeOpenMP ki0 = 1 +l_pil_w + kilon*(kkii-1) kin = min(ki0+kilon-1, maxx2-l_pil_e) pin = min(ki0+kilon-1, nx2-l_pil_e) do j = 2+Lam_pil_s, nx3-Lam_pil_n do o1= 1, F_pwr do o2= 1, F_pwr do i = ki0,kin do k = minx1, maxx1 sol_8(k,i,o1,j)= sol_8(k,i,o1,j) $ + F_a_8(o1,o2,i,j)*sol_8(k,i,o2,j-1) enddo enddo enddo enddo do o1= 1, F_pwr do i = ki0,pin do k = 1, nx1 sol_8(k,i,o1,j)=dn3_8(k,i,o1,j)-sol_8(k,i,o1,j) enddo enddo enddo enddo enddo !$omp enddo * * scale le cote droit pour retour * !$omp do do j= 1, nx3 do o1= 1,F_pwr do i= minx2,maxx2 do k= minx1,maxx1 dn3_8(k,i,o1,j)= ZERO_8 enddo enddo enddo enddo !$omp enddo !$omp do do j= 1+Lam_pil_s, nx3-Lam_pil_n do o2= 1, F_pwr do o1= 1, F_pwr do i = minx2+l_pil_w, maxx2-l_pil_e do k = minx1, maxx1 dn3_8(k,i,o1,j)= dn3_8(k,i,o1,j) $ + F_deltai_8(o1,o2,i,j)*sol_8(k,i,o2,j) enddo enddo enddo enddo enddo !$omp enddo * * retour * !$omp do do j = 1, nx3 if(j.eq.nx3-Lam_pil_n) then do o1= 1, F_pwr do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 sol_8(k,i,o1,j)= dn3_8(k,i,o1,j) enddo enddo enddo else do o1= 1, F_pwr do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 sol_8(k,i,o1,j)= 0.0 enddo enddo enddo endif enddo !$omp enddo * !$omp do do kkii = 1, Ptopo_npeOpenMP ki0 = 1 +l_pil_w + kilon*(kkii-1) kin = min(ki0+kilon-1, maxx2-l_pil_e) pin = min(ki0+kilon-1, nx2-l_pil_e) do j = nx3-1-Lam_pil_n, 1+Lam_pil_s, -1 do o1= 1, F_pwr do o2= 1, F_pwr do i = ki0,kin do k = minx1, maxx1 sol_8(k,i,o1,j)= sol_8(k,i,o1,j) $ + F_c_8(o1,o2,i,j)*sol_8(k,i,o2,j+1) enddo enddo enddo enddo do o1= 1, F_pwr do i = ki0, pin do k = 1, nx1 sol_8(k,i,o1,j)= dn3_8(k,i,o1,j) - sol_8(k,i,o1,j) enddo enddo enddo enddo enddo !$omp enddo * !$omp do do j = 1+Lam_pil_s, nx3-Lam_pil_n do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 fdg2_8(k,i,j)=sol_8(k,i,F_pwr,j) enddo enddo enddo !$omp enddo * !$omp single ! call tmg_stop(11) call rpn_comm_transpose(fwft_8,Miny,Maxy,nx3,(maxx1-minx1+1), % minx2, maxx2,Gni,fdg2_8,-2,2) !$omp end single * * inverse projection ( r = x * w ) * c do k=1,Nkl c call mxma8( F_Xevec_8,1,Gni-Lam_pil_w-Lam_pil_e, c % fwft_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1)*(maxx1-minx1+1), 1, c % fdg1_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1)*(maxx1-minx1+1), 1, c % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, c % (Maxy-Miny+1-pil_s-pil_n) ) c enddo !$omp do do k= 1,Nkl call dgemm('N','T', (maxy-miny+1-pil_s-pil_n) , . gni-Lam_pil_w-Lam_pil_e, . gni-Lam_pil_w-Lam_pil_e, . 1._8, fwft_8(Miny+pil_s,k,1+Lam_pil_w), . (maxy-miny+1) * (maxx1-minx1+1),F_Xevec_8, . gni-Lam_pil_w-Lam_pil_e, . 0._8,fdg1_8(Miny+pil_s,k,1+Lam_pil_w), . (maxy-miny+1) * (maxx1-minx1+1)) enddo !$omp enddo !$omp end parallel if (G_lam) then call rpn_comm_transpose(F_Rhs_8, Minx, Maxx, Gni, (Maxy-Miny+1), % minx1, maxx1, Gnk, fdg1_8, -1,2 ) do k= 1, Gnk do j= 1+pil_s, njl-pil_n do i= 1+pil_w, nil-pil_e F_sol(i,j,k)= sngl(F_Rhs_8(i,j,k)) enddo enddo enddo else call rpn_comm_transpose48 ( F_sol, Minx, Maxx, Gni,1, (Maxy-Miny+1), % (Maxy-Miny+1),minx1, maxx1, Gnk, fdg1_8, -1, % 1.0d0,0.d0) endif c call tmg_stop(99) * * * __________________________________________________________________ * return end