!-------------------------------------- LICENCE BEGIN ------------------------------------ !Environment Canada - Atmospheric Science and Technology License/Disclaimer, ! version 3; Last Modified: May 7, 2008. !This is free but copyrighted software; you can use/redistribute/modify it under the terms !of the Environment Canada - Atmospheric Science and Technology License/Disclaimer !version 3 or (at your option) any later version that should be found at: !http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html ! !This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; !without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. !See the above mentioned License/Disclaimer for more details. !You should have received a copy of the License/Disclaimer along with this software; !if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), !CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca !-------------------------------------- LICENCE END -------------------------------------- ***s/r sol_mxma8_2_ad - ADJ of sol_mxma8_2 * #include "model_macros_f.h"*
subroutine sol_mxma8_2_ad ( Sol, Rhs, Xevec, 1 $ Minx, Maxx, Miny, Maxy, njl, $ Minz, Maxz, Nk, Nkl, $ Gni, Gnj, Minij, Maxij, L_nij, $ minx1, maxx1, minx2, maxx2,nx3, $ F_npex1, F_npey1, ai, bi, ci, $ fdg1,fdg2,fdwfft) * implicit none * integer F_npex1 , F_npey1 integer minx1, maxx1, minx2, maxx2,nx3 * real*8 ai(minx1:maxx1,minx2:maxx2,nx3), $ bi(minx1:maxx1,minx2:maxx2,nx3), $ ci(minx1:maxx1,minx2:maxx2,nx3) * integer Minx, Maxx, Miny, Maxy, njl, $ Minz, Maxz, Nk , Nkl , $ Gni , Gnj , Minij, Maxij, L_nij real*8 Rhs(Minx:Maxx,Miny:Maxy,Nk), Sol(Minx:Maxx,Miny:Maxy,Nk) real*8 Xevec(*) * real*8 fdwfft(Miny:Maxy,Minz:Maxz,Gni) real*8 fdg1(Miny:Maxy,Minz:Maxz,Gni+F_npex1) real*8 fdg2(Minz:Maxz,Minij:Maxij,Gnj+F_npey1) * *author * M.Tanguay * *revision * v2_10 - Tanguay M. - initial MPI version * v3_00 - Tanguay M. - adapt to restructured sol_mxma8_2 * v3_03 - Tanguay M. - Adjoint Lam configuration * v3_11 - Tanguay M. - AIXport+Opti+OpenMP for TLM-ADJ * *object * see id section * *ADJ of *arguments * Name I/O Description *---------------------------------------------------------------- * Sol O - result of solver * Rhs I - r.h.s. of elliptic equation * Xevec I - eigenvectors * Minx I - minimum index on X for Rhs,Sol * Maxx I - maximum index on X for Rhs,Sol * Miny I - minimum index on Y for Rhs,Sol * Maxy I - maximum index on Y for Rhs,Sol * Njl I - number of points on local PEy for J (ldnh_nj) * Minz I - minimum index on local PEx for K (trp_12smin) * Maxz I - maximum index on local PEx for K (trp_12smax) * Nk I - G_nk-1 points in z direction globally (Schm_nith) * Nkl I - number of points on local PEx for K (trp_12sn) * Gni I - number of points in x direction globally (G_ni) * Gnj I - number of points in y direction globally (G_nj) * Minij I - minimum index on local PEy for I (trp_22min) * Maxij I - maximum index on local PEy for I (trp_22max) * L_nij I - number of points on local PEy for I (trp_22n) * Minx1 I - minimum index on local PEx for K (trp_12smin) * Maxx1 I - maximum index on local PEx for K (trp_12smax) * Minx2 I - minimum index on local PEy for I (trp_22min) * Maxx2 I - maximum index on local PEy for I (trp_22max) * Nx3 I - number of points along J globally (G_nj) * F_npex1 I - number of processors on X * F_npey1 I - number of processors on Y * ai I - sub diagonal of LU factorization * bi I - diagonal of LU factorization * ci I - super diagonal of LU factorization * fdg1 I - work field * fdg2 I - work field * fdwfft I - work field *---------------------------------------------------------------- * *implicits #include "ptopo.cdk"
#include "glb_ld.cdk"
#include "glb_pil.cdk"
* integer j, jr, i, k, l_pil_w, l_pil_e integer piece, p0, pn, ptotal, plon real*8 ZERO_8 parameter( ZERO_8 = 0.0 ) * ______________________________________________________ * l_pil_w=0 l_pil_e=0 if (l_south) l_pil_w= Lam_pil_w if (l_north) l_pil_e= Lam_pil_e * call rpn_comm_transpose( Sol, Minx, Maxx, Gni, (Maxy-Miny+1), % Minz, Maxz, Nk, fdg1, 1, 2) * !$omp parallel private(p0,pn,piece, jr) shared(ptotal,plon,bi,ai) * * ADJ of * ------ * inverse projection ( r = x * w ) * C do k=1,Nkl C call mxma8( xevec, Gni-Lam_pil_w-Lam_pil_e, 1, C % fdg1 (1+pil_s,k,1+Lam_pil_w), (Maxy-Miny+1)*(Maxz-Minz+1), 1, C % fdwfft(1+pil_s,k,1+Lam_pil_w), (Maxy-Miny+1)*(Maxz-Minz+1), 1, C % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, C % (Maxy-Miny+1-pil_s-pil_n)) C enddo !$omp do do k=1,Nkl call dgemm('N','N', (Maxy-Miny+1-pil_s-pil_n), . Gni-Lam_pil_w-Lam_pil_e, . Gni-Lam_pil_w-Lam_pil_e, . 1._8, fdg1(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1)* (Maxz-Minz+1),xevec, Gni-Lam_pil_w-Lam_pil_e, . 0._8, fdwfft(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1)* (Maxz-Minz+1)) enddo !$omp end do * !$omp single call rpn_comm_transpose $ ( fdwfft, Miny, Maxy, Gnj, (Maxz-Minz+1), $ Minij, Maxij, Gni, fdg2, 2, 2 ) !$omp end single * ptotal = L_nij-l_pil_e-l_pil_w plon = (ptotal+Ptopo_npeOpenMP)/ Ptopo_npeOpenMP * !$omp do do piece=Ptopo_npeOpenMP,1,-1 p0 = 1+l_pil_w + plon*(piece-1) pn = min(L_nij-l_pil_e,plon*piece+l_pil_w) * do j = 1+Lam_pil_s,Gnj-1-Lam_pil_n jr = j + 1 C do i=L_nij-l_pil_e,1+l_pil_w,-1 do i=pn,p0,-1 do k=(Maxz-Minz+1),1,-1 fdg2(k,i,jr) = - ci(k,i,j) * fdg2(k,i,j) + fdg2(k,i,jr) enddo enddo enddo * do j = Gnj-Lam_pil_n,2+Lam_pil_s,-1 jr = j - 1 C do i=L_nij-l_pil_e,1+l_pil_w,-1 do i=pn,p0,-1 do k=(Maxz-Minz+1),1,-1 fdg2(k,i,jr) = - ai(k,i,j) * fdg2(k,i,j) + fdg2(k,i,jr) fdg2(k,i,j) = bi(k,i,j) * fdg2(k,i,j) enddo enddo enddo * j = 1+Lam_pil_s C do i=L_nij-l_pil_e,1+l_pil_w,-1 do i=pn,p0,-1 do k=(Maxz-Minz+1),1,-1 fdg2(k,i,j) = bi(k,i,j)*fdg2(k,i,j) enddo enddo enddo !$omp enddo * !$omp single call rpn_comm_transpose $ ( fdwfft, Miny, Maxy, Gnj, (Maxz-Minz+1), $ Minij, Maxij, Gni, fdg2, -2, 2 ) !$omp end single * * ADJ of * ------ * projection ( wfft = x transposed * g ) * C do k=1,Nkl C call mxma8( xevec, 1, Gni-Lam_pil_w-Lam_pil_e, C % fdwfft(1+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1)* (Maxz-Minz+1), 1, C % fdg1 (1+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1)* (Maxz-Minz+1), 1, C % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, C % (Maxy-Miny+1-pil_s-pil_n)) C enddo !$omp do do k=1,Nkl call dgemm('N','T', (Maxy-Miny+1-pil_s-pil_n), . Gni-Lam_pil_w-Lam_pil_e, . Gni-Lam_pil_w-Lam_pil_e, . 1._8, fdwfft(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1) * (Maxz-Minz+1),xevec, Gni-Lam_pil_w-Lam_pil_e, . 0._8, fdg1(1+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1) * (Maxz-Minz+1)) enddo !$omp enddo * !$omp do do i= Gni,1,-1 do k= 0, Minz,-1 do j= Maxy,Miny,-1 fdwfft(j,k,i)=ZERO_8 enddo enddo do k= Maxz,Nkl+1,-1 do j= Maxy,Miny,-1 fdwfft(j,k,i)=ZERO_8 enddo enddo * do k= nkl,Minz,-1 do j= pil_s, Miny, -1 fdg1(j,k,i)=ZERO_8 enddo do j= Maxy,njl+1-pil_n,-1 fdg1(j,k,i)=ZERO_8 enddo enddo enddo !$omp enddo * !$omp end parallel * call rpn_comm_transpose( Rhs, Minx, Maxx, Gni, (Maxy-Miny+1), % Minz, Maxz, Nk, fdg1, -1,2 ) * return end