!-------------------------------------- LICENCE BEGIN ------------------------------------ !Environment Canada - Atmospheric Science and Technology License/Disclaimer, ! version 3; Last Modified: May 7, 2008. !This is free but copyrighted software; you can use/redistribute/modify it under the terms !of the Environment Canada - Atmospheric Science and Technology License/Disclaimer !version 3 or (at your option) any later version that should be found at: !http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html ! !This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; !without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. !See the above mentioned License/Disclaimer for more details. !You should have received a copy of the License/Disclaimer along with this software; !if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), !CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca !-------------------------------------- LICENCE END -------------------------------------- ***s/r hzd_solmxma_ad - ADJ of hzd_solmxma * #include "model_macros_f.h"*
subroutine hzd_solmxma_ad (F_sol, F_Rhs_8, F_Xevec_8 , 13 % F_a_8, F_c_8 ,F_deltai_8, % minx1, maxx1, minx2, maxx2, nx1, nx2, nx3, F_pwr, % Minx,Maxx,Miny,Maxy,Gnk,Gni,nil,njl,nkl, % F_opsxp0_8, F_opsyp0_8,F_cdiff,F_npex,F_npey) * implicit none * integer minx1, maxx1, minx2, maxx2, nx1, nx2, nx3 , $ Minx , Maxx , Miny , Maxy , Gnk, Gni, F_pwr, $ njl , nkl , nil , F_npex, F_npey real*8 F_opsxp0_8(*), F_opsyp0_8(*), $ F_a_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3), $ F_c_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3), $ F_deltai_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3), $ F_Rhs_8,F_Xevec_8(*) real F_cdiff, F_sol(Minx:Maxx,Miny:Maxy,Gnk) * *author * M.Tanguay * *revision * v2_21 - Tanguay M. - initial MPI version * v2_31 - Tanguay M. - adapt to f90 native dynamic memory allocation * v3_00 - Tanguay M. - adapt to restructured hzd_solmxma * v3_11 - Tanguay M. - AIXport+Opti+OpenMP for TLM-ADJ * v3_20 - Tanguay M. - Correction for transpose48 when NPEX.GT.1 * *object * see id section * *ADJ of *arguments * Name I/O Description *---------------------------------------------------------------- * F_sol_8 I/O - pre-r.h.s and result of horizontal diffusion * F_rhs_8 I - r.h.s. of elliptic equation and work vector * F_Xevec_8 I - eigenvectors * F_a_8 I - sub diagonal of LU factorization * F_b_8 I - diagonal of LU factorization * F_c_8 I - super diagonal of LU factorization * F_deltai_8 I - * minx1 I - minimum index on local PEx for K (trp_12dmin) * maxx1 I - maximum index on local PEx for K (trp_12dmax) * minx2 I - minimum index on local PEy for I (trp_22min) * maxx2 I - maximum index on local PEy for I (trp_22max) * Nx1 I - number of points on local PEx for K (trp_12dn) * Nx2 I - number of points on local PEy for I (trp_22n) * Nx3 I - number of points along Y globally (G_nj) * F_pwr I - power (high order) of factorization * Minx I - minimum index on X for Rhs,Sol * Maxx I - maximum index on X for Rhs,Sol * Miny I - minimum index on Y for Rhs,Sol * Maxy I - maximum index on Y for Rhs,Sol * Gnk I - number of points along K globally (G_nk) * Gni I - number of points along I globally (G_ni) * Nil I - number of points on local PEx for I (l_ni) * Njl I - number of points on local PEy for J (l_nj) * Nkl I - number of points on local PEx for K (trp_12dn) * F_opsxp0_8 I - east-west operator for HO diffusion solver:Hzd_xp0_8 * F_opsyp0_8 I - north-south operator for HO diffusion solver:Hzd_yp0_8 * F_cdiff I - Hzd_cdiff * F_npex1 I - number of processors on X * F_npey1 I - number of processors on Y *---------------------------------------------------------------- * *implicits #include "ptopo.cdk"
#include "glb_ld.cdk"
#include "glb_pil.cdk"
* real*8, dimension(Miny:Maxy ,minx1:maxx1,Gni+F_npex) :: fdg1_8 real*8, dimension(Miny:Maxy ,minx1:maxx1,Gni) :: fwft_8 real*8, dimension(minx1:maxx1,minx2:maxx2,nx3+F_npey) :: fdg2_8 real*8, dimension(minx1:maxx1,minx2:maxx2,F_pwr,nx3) :: dn3_8,sol_8 * real*8 ZERO_8, fact_8 parameter( ZERO_8 = 0.0 ) integer o1,o2,i,j,k,m,l_pil_w,l_pil_e integer kkii,kilon,ki0,kin,pi0,pin * __________________________________________________________________ * l_pil_w=0 l_pil_e=0 if (l_south) l_pil_w= Lam_pil_w if (l_north) l_pil_e= Lam_pil_e kilon = ( maxx2-l_pil_e-minx2+l_pil_w+1 + Ptopo_npeOpenMP)/Ptopo_npeOpenMP * * Zero adjoint variables * ---------------------- !$omp parallel * !$omp do do k= 1,Gni+F_npex do j= minx1,maxx1 do i= Miny ,Maxy fdg1_8(i,j,k) = ZERO_8 end do end do end do !$omp enddo * !$omp do do k= 1,Gni do j= minx1,maxx1 do i= Miny ,Maxy fwft_8(i,j,k) = ZERO_8 end do end do end do !$omp enddo * !$omp do do k= 1,nx3+F_npey do j= minx2,maxx2 do i= minx1,maxx1 fdg2_8(i,j,k) = ZERO_8 end do end do end do !$omp enddo * !$omp do do m= 1,nx3 do k= 1,F_pwr do j= minx2,maxx2 do i= minx1,maxx1 dn3_8(i,j,k,m) = ZERO_8 sol_8(i,j,k,m) = ZERO_8 end do end do end do end do !$omp enddo * !$omp end parallel * * ADJ of * Resolution * * ADJ of * resolution du systeme blok-tridiagonal * * ADJ of * retour * * ADJ of * inverse projection ( r = x * w ) * call rpn_comm_transpose48 ( F_sol, Minx, Maxx, Gni,1, (Maxy-Miny+1), % (Maxy-Miny+1),minx1, maxx1, Gnk, fdg1_8, 1, % 1.0d0,0.d0) * !$omp parallel private(kkii,ki0,kin,pi0,pin) shared(kilon) * !$omp do do k=Nkl,1,-1 C call mxma8( F_Xevec_8,Gni-Lam_pil_w-Lam_pil_e,1, C % fdg1_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1)*(maxx1-minx1+1), 1, C % fwft_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1)*(maxx1-minx1+1), 1, C % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, C % (Maxy-Miny+1-pil_s-pil_n) ) call dgemm('N','N',(Maxy-Miny+1-pil_s-pil_n), . Gni-Lam_pil_w-Lam_pil_e, . Gni-Lam_pil_w-Lam_pil_e, . 1._8, fdg1_8(Miny+pil_s,k,1+Lam_pil_w), . (Maxy-Miny+1) * (maxx1-minx1+1),F_Xevec_8, . Gni-Lam_pil_w-Lam_pil_e, . 0._8, fwft_8(Miny+pil_s,k,1+Lam_pil_w), . (maxy-miny+1) * (maxx1-minx1+1)) enddo !$omp enddo * !$omp single call rpn_comm_transpose(fwft_8,Miny,Maxy,nx3,(maxx1-minx1+1), % minx2, maxx2,Gni,fdg2_8, 2,2) !$omp end single * !$omp do do j = 1+Lam_pil_s, nx3-Lam_pil_n do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 sol_8(k,i,F_pwr,j) = fdg2_8(k,i,j) + sol_8(k,i,F_pwr,j) fdg2_8(k,i,j) = ZERO_8 enddo enddo enddo !$omp enddo * * !$omp do do kkii = Ptopo_npeOpenMP,1,-1 ki0 = minx2+l_pil_w + kilon*(kkii-1) kin = min(ki0+kilon-1, maxx2-l_pil_e) pi0 = 1 +l_pil_w + kilon*(kkii-1) pin = min(pi0+kilon-1, nx2-l_pil_e) * do j = 1+Lam_pil_s,nx3-1-Lam_pil_n do o1= F_pwr,1,-1 do i = pin,pi0,-1 do k = nx1,1,-1 dn3_8(k,i,o1,j)= sol_8(k,i,o1,j) + dn3_8(k,i,o1,j) sol_8(k,i,o1,j)= -sol_8(k,i,o1,j) enddo enddo enddo * do o1= F_pwr,1,-1 do o2= F_pwr,1,-1 do i = kin,ki0,-1 do k = maxx1,minx1,-1 sol_8(k,i,o2,j+1)=F_c_8(o1,o2,i,j)*sol_8(k,i,o1,j) + sol_8(k,i,o2,j+1) enddo enddo enddo enddo enddo * enddo !$omp enddo * !$omp do do j = 1, nx3 if(j.eq.nx3-Lam_pil_n) then do o1= 1, F_pwr do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 dn3_8(k,i,o1,j) = sol_8(k,i,o1,j) + dn3_8(k,i,o1,j) sol_8(k,i,o1,j) = ZERO_8 enddo enddo enddo else do o1= 1, F_pwr do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 sol_8(k,i,o1,j)= 0.0 enddo enddo enddo endif enddo !$omp enddo * * ADJ of * scale le cote droit pour retour * !$omp do do j= nx3-Lam_pil_n,1+Lam_pil_s,-1 do o1= F_pwr,1,-1 do o2= F_pwr,1,-1 do i = maxx2-l_pil_e,minx2+l_pil_w,-1 do k = maxx1,minx1,-1 sol_8(k,i,o2,j)= F_deltai_8(o1,o2,i,j)*dn3_8(k,i,o1,j) + sol_8(k,i,o2,j) enddo enddo enddo enddo enddo !$omp enddo * !$omp do do j= nx3,1,-1 do o1= F_pwr,1,-1 do i= maxx2,minx2,-1 do k= maxx1,minx1,-1 C dn3_8(k,i,o1,j)= 0.0 dn3_8(k,i,o1,j)= ZERO_8 enddo enddo enddo enddo !$omp enddo * * ADJ of * aller * !$omp do do kkii = Ptopo_npeOpenMP,1,-1 ki0 = minx2+l_pil_w + kilon*(kkii-1) kin = min(ki0+kilon-1, maxx2-l_pil_e) pi0 = 1 +l_pil_w + kilon*(kkii-1) pin = min(pi0+kilon-1, nx2-l_pil_e) * do j = nx3-Lam_pil_n,2+Lam_pil_s,-1 do o1= F_pwr,1,-1 do i = pin,pi0,-1 do k = nx1,1,-1 dn3_8(k,i,o1,j)= sol_8(k,i,o1,j) + dn3_8(k,i,o1,j) sol_8(k,i,o1,j)= -sol_8(k,i,o1,j) enddo enddo enddo do o1= F_pwr,1,-1 do o2= F_pwr,1,-1 do i = kin,ki0,-1 do k = maxx1,minx1,-1 sol_8(k,i,o2,j-1) = F_a_8(o1,o2,i,j)*sol_8(k,i,o1,j) % + sol_8(k,i,o2,j-1) enddo enddo enddo enddo enddo enddo !$omp enddo * !$omp do do o1= 1, F_pwr do i = 1+l_pil_w, nx2-l_pil_e do k = 1, nx1 dn3_8(k,i,o1,1+Lam_pil_s)= sol_8(k,i,o1,1+Lam_pil_s) + dn3_8(k,i,o1,1+Lam_pil_s) sol_8(k,i,o1,1+Lam_pil_s)= ZERO_8 enddo enddo enddo !$omp enddo * * ADJ of * cote droit * !$omp do do j = nx3-Lam_pil_n,1+Lam_pil_s,-1 do i = nx2-l_pil_e,1+l_pil_w,-1 do k = nx1,1,-1 fdg2_8(k,i,j) = F_opsyp0_8(nx3+j)*dn3_8(k,i,1,j) + fdg2_8(k,i,j) dn3_8(k,i,1,j)= ZERO_8 enddo enddo enddo !$omp enddo * !$omp do do j = 1, nx3 do o1 = 1, F_pwr do i = minx2, maxx2 do k = minx1, maxx1 sol_8(k,i,o1,j)= ZERO_8 dn3_8(k,i,o1,j)= ZERO_8 enddo enddo enddo enddo !$omp enddo * * ADJ of * projection ( wfft = x transposed * g ) * !$omp single call rpn_comm_transpose(fwft_8,Miny,Maxy,nx3,(maxx1-minx1+1), % minx2, maxx2,Gni,fdg2_8,-2,2) !$omp end single * !$omp do do k=Nkl,1,-1 C call mxma8( F_Xevec_8, 1, Gni-Lam_pil_w-Lam_pil_e, C % fwft_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1) * (maxx1-minx1+1), 1, C % fdg1_8(Miny+pil_s,k,1+Lam_pil_w),(Maxy-Miny+1) * (maxx1-minx1+1), 1, C % Gni-Lam_pil_w-Lam_pil_e, Gni-Lam_pil_w-Lam_pil_e, C % (Maxy-Miny+1-pil_s-pil_n)) call dgemm('N','T', (maxy-miny+1-pil_s-pil_n) , . gni-Lam_pil_w-Lam_pil_e, . gni-Lam_pil_w-Lam_pil_e, . 1._8, fwft_8(Miny+pil_s,k,1+Lam_pil_w), . (maxy-miny+1) * (maxx1-minx1+1),F_Xevec_8, . gni-Lam_pil_w-Lam_pil_e, . 0._8,fdg1_8(Miny+pil_s,k,1+Lam_pil_w), . (maxy-miny+1) * (maxx1-minx1+1)) enddo !$omp enddo * !$omp do do i = 1, Gni do k = minx1, maxx1 do j = Miny , Maxy fwft_8(j,k,i)=ZERO_8 enddo do j = njl+1-pil_n, Maxy fdg1_8(j,k,i)= ZERO_8 enddo do j = Miny, 0+pil_s fdg1_8(j,k,i)= ZERO_8 enddo enddo enddo !$omp enddo * !$omp do do i = 1+Lam_pil_w, Gni-Lam_pil_e do k = 1, nkl do j = 1+pil_s, njl-pil_n fdg1_8(j,k,i) = F_opsxp0_8(Gni+i)*fdg1_8(j,k,i) enddo enddo enddo !$omp enddo * !$omp end parallel * fact_8=((-1)**F_pwr)*dble(F_cdiff) call rpn_comm_transpose48 ( F_sol, Minx, Maxx, Gni,1, (Maxy-Miny+1), % (Maxy-Miny+1),minx1, maxx1, Gnk, fdg1_8,-1, C % fact_8,0.d0) % 1.d0,0.d0) * !$omp parallel do do k = 1, Gnk do j = 1+pil_s, njl-pil_n do i = 1+pil_w, nil-pil_e F_sol(i,j,k) = fact_8*F_sol(i,j,k) enddo enddo enddo !$omp end parallel do * * __________________________________________________________________ * return end