!-------------------------------------- LICENCE BEGIN ------------------------------------ !Environment Canada - Atmospheric Science and Technology License/Disclaimer, ! version 3; Last Modified: May 7, 2008. !This is free but copyrighted software; you can use/redistribute/modify it under the terms !of the Environment Canada - Atmospheric Science and Technology License/Disclaimer !version 3 or (at your option) any later version that should be found at: !http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html ! !This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; !without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. !See the above mentioned License/Disclaimer for more details. !You should have received a copy of the License/Disclaimer along with this software; !if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), !CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca !-------------------------------------- LICENCE END -------------------------------------- ***s/r sol_mxma8_2 - parallel direct solution of horizontal Helmholtz * problem. With mxma8 * #include "model_macros_f.h"![]()
subroutine sol_mxma8_2 ( Sol, Rhs, Xevec, 1 $ Minx, Maxx, Miny, Maxy, njl, $ Minz, Maxz, Nk, Nkl, $ Gni, Gnj, Minij, Maxij, L_nij, $ minx1, maxx1, minx2, maxx2,nx3, $ F_npex1, F_npey1, ai, bi, ci, $ fdg1,fdg2,fdwfft) * implicit none #include "ptopo.cdk"
#include "glb_ld.cdk"
#include "glb_pil.cdk"
* * *author Abdessamad Qaddouri- July 1999 * *revision * v2_00 - Qaddouri A. - initial MPI version * v3_00 - Lee/Qaddouri - for LAM version * v3_10 - Corbeil & Desgagne & Lee - AIXport+Opti+OpenMP * *arguments * Name I/O Description *---------------------------------------------------------------- * Sol O - result of solver * Rhs I - r.h.s. of elliptic equation * Xevec I - eigenvectors * Minx I - minimum index on X for Rhs,Sol * Maxx I - maximum index on X for Rhs,Sol * Miny I - minimum index on Y for Rhs,Sol * Maxy I - maximum index on Y for Rhs,Sol * Njl I - number of points on local PEy for J (ldnh_nj) * Minz I - minimum index on local PEx for K (trp_12smin) * Maxz I - maximum index on local PEx for K (trp_12smax) * Nk I - G_nk-1 points in z direction globally (Schm_nith) * Nkl I - number of points on local PEx for K (trp_12sn) * Gni I - number of points in x direction globally (G_ni) * Gnj I - number of points in y direction globally (G_nj) * Minij I - minimum index on local PEy for I (trp_22min) * Maxij I - maximum index on local PEy for I (trp_22max) * L_nij I - number of points on local PEy for I (trp_22n) * Minx1 I - minimum index on local PEx for K (trp_12smin) * Maxx1 I - maximum index on local PEx for K (trp_12smax) * Minx2 I - minimum index on local PEy for I (trp_22min) * Maxx2 I - maximum index on local PEy for I (trp_22max) * Nx3 I - number of points along J globally (G_nj) * F_npex1 I - number of processors on X * F_npey1 I - number of processors on Y * ai I - sub diagonal of LU factorization * bi I - diagonal of LU factorization * ci I - super diagonal of LU factorization * fdg1 I - work field * fdg2 I - work field * fdwfft I - work field * * integer F_npex1 , F_npey1 integer minx1, maxx1, minx2, maxx2,nx3 Real*8 ai(minx1:maxx1,minx2:maxx2,nx3), $ bi(minx1:maxx1,minx2:maxx2,nx3), $ ci(minx1:maxx1,minx2:maxx2,nx3) integer Minx, Maxx, Miny, Maxy, njl, $ Minz, Maxz, Nk , Nkl , $ Gni , Gnj , Minij, Maxij, L_nij real*8 Rhs(Minx:Maxx,Miny:Maxy,Nk), Sol(Minx:Maxx,Miny:Maxy,Nk) real*8 Xevec(*) real*8 fdwfft(Miny:Maxy,Minz:Maxz,Gni) real*8 fdg1(Miny:Maxy,Minz:Maxz,Gni+F_npex1) real*8 fdg2(Minz:Maxz,Minij:Maxij,Gnj+F_npey1) * integer i,j,k, jr,l_pil_w,l_pil_e integer piece, p0, pn, ptotal, plon real*8 zero, one parameter( zero = 0.0 ) parameter( one = 1.0 ) * C call tmg_start(88,'sol_mxma total') l_pil_w=0 l_pil_e=0 if (l_south) l_pil_w= Lam_pil_w if (l_north) l_pil_e= Lam_pil_e * call rpn_comm_transpose( Rhs, Minx, Maxx, Gni, (Maxy-Miny+1), % Minz, Maxz, Nk, fdg1, 1,2 ) !$omp parallel private(p0,pn,piece, jr) shared(ptotal,plon,bi,ai) !$omp do do i= 1,Gni do k= Minz, nkl do j= njl+1-pil_n,Maxy fdg1(j,k,i)=zero enddo enddo do k= Minz, nkl do j= Miny, pil_s fdg1(j,k,i)=zero enddo enddo do k= Nkl+1,Maxz do j= Miny,Maxy fdwfft(j,k,i)=zero enddo enddo do k= Minz, 0 do j= Miny,Maxy fdwfft(j,k,i)=zero enddo enddo enddo !$omp enddo * projection ( wfft = x transposed * g ) c do k=1,Nkl c call mxma8( xevec, Gni-Lam_pil_w-Lam_pil_e, 1, c % fdg1(1+pil_s,k,1+Lam_pil_w), (Maxy-Miny+1)* (Maxz-Minz+1), 1, c % fdwfft(1+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1)* (Maxz-Minz+1), 1, c % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, c % (Maxy-Miny+1-pil_s-pil_n)) c enddo !$omp do do k=1,Nkl call dgemm('N','N', (Maxy-Miny+1-pil_s-pil_n), . Gni-Lam_pil_w-Lam_pil_e, . Gni-Lam_pil_w-Lam_pil_e, . 1._8, fdg1(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1)* (Maxz-Minz+1),xevec, Gni-Lam_pil_w-Lam_pil_e, . 0._8, fdwfft(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1)* (Maxz-Minz+1)) enddo !$omp enddo !$omp single call rpn_comm_transpose $ ( fdwfft, Miny, Maxy, Gnj, (Maxz-Minz+1), $ Minij, Maxij, Gni, fdg2, 2, 2 ) ! call tmg_start(84,'sol_mxma 2') !$omp end single * ptotal = L_nij-l_pil_e-l_pil_w plon = (ptotal+Ptopo_npeOpenMP)/ Ptopo_npeOpenMP !$omp do do piece=1,Ptopo_npeOpenMP p0 = 1+l_pil_w + plon*(piece-1) pn = min(L_nij-l_pil_e,plon*piece+l_pil_w) j =1+Lam_pil_s c do i=1+l_pil_w,L_nij-l_pil_e do i=p0,pn do k=1,(Maxz-Minz+1) fdg2(k,i,j) = bi(k,i,j)*fdg2(k,i,j) enddo enddo do j =2+Lam_pil_s, Gnj-Lam_pil_n jr = j - 1 c do i=1+l_pil_w,L_nij-l_pil_e do i=p0,pn do k=1,(Maxz-Minz+1) fdg2(k,i,j) = bi(k,i,j)*fdg2(k,i,j) - ai(k,i,j) $ * fdg2(k,i,jr) enddo enddo enddo do j = Gnj-1-Lam_pil_n, 1+Lam_pil_s, -1 jr = j + 1 c do i=1+l_pil_w,L_nij-l_pil_e do i=p0,pn do k=1,(Maxz-Minz+1) fdg2(k,i,j) = fdg2(k,i,j) - ci(k,i,j) * fdg2(k,i,jr) enddo enddo enddo enddo !$omp enddo !$omp single ! call tmg_stop(84) call rpn_comm_transpose $ ( fdwfft, Miny, Maxy, Gnj, (Maxz-Minz+1), $ Minij, Maxij, Gni, fdg2,- 2, 2 ) ! call tmg_start(85,'dgemm2') !$omp end single * inverse projection ( r = x * w ) c do k=1,Nkl c call mxma8( xevec, 1, Gni-Lam_pil_w-Lam_pil_e, c % fdwfft(1+pil_s,k,1+Lam_pil_w), (Maxy-Miny+1)*(Maxz-Minz+1), 1, c % fdg1(1+pil_s,k,1+Lam_pil_w), (Maxy-Miny+1)*(Maxz-Minz+1), 1, c % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, c % (Maxy-Miny+1-pil_s-pil_n)) c enddo !$omp do do k=1,Nkl call dgemm('N','T', (Maxy-Miny+1-pil_s-pil_n), . Gni-Lam_pil_w-Lam_pil_e, . Gni-Lam_pil_w-Lam_pil_e, . 1._8, fdwfft(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1) * (Maxz-Minz+1),xevec, Gni-Lam_pil_w-Lam_pil_e, . 0._8, fdg1(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1) * (Maxz-Minz+1)) enddo !$omp end do !$omp end parallel ! call tmg_stop(85) call rpn_comm_transpose( Sol, Minx, Maxx, Gni, (Maxy-Miny+1), % Minz, Maxz, Nk, fdg1, -1, 2) C call tmg_stop(88) return end