!-------------------------------------- LICENCE BEGIN ------------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer, 
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms 
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer 
!version 3 or (at your option) any later version that should be found at: 
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html 
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software; 
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), 
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
***s/r adw_main_3_intlag_ad - ADJ of adw_main_3_intlag_tl when Adw_lag3d_L=.TRUE. 
*
#include "model_macros_f.h"
*

      subroutine adw_main_3_intlag_ad ( F_u, F_v, F_w, F_um, F_vm, F_wm ) 1,24
*
      implicit none
*
      real F_u (*),F_v (*),F_w (*)
      real F_um(*),F_vm(*),F_wm(*)
*
*author
*     monique tanguay
*
*revision
* v2_31 - Tanguay M.        - initial MPI version
* v3_00 - Tanguay M.        - adapt to restructured adw_main 
* v3_00 - Tanguay M.        - restore vectorization in adjoint of semi-Lag.
* v3_02 - Tanguay M.        - restore tracers monotone if V4dg_conf.ne.0
* v3_03 - Tanguay M.        - Adjoint Lam and Nohyd configuration 
* v3_11 - Tanguay M.        - Remove restoration of vectorization in adjoint of semi-Lag
*                           - Introduce key Adw_mono_L
* v3_11 - Lee V.            - OpenMP for ADW_MAIN_3_INT_AD
* v3_20 - Tanguay M.        - Tracers and basic fields in same OPENMP loop 
*                           - Lagrange 3D
* v3_21 - Tanguay / Valin   - Use OPENMP as Salmond (ECMWF) 
*                           - Call adw_main_3_intlag_ad based on Adw_lag3d_L
* v3_22 - Tanguay M.        - Correction reproductibility  
* v3_30 - Tanguay M.        - Validation for LAM version 
*                           - correct calculation for LAM when Glb_pil gt 7
*                           - correction vmmlod/vmmget when OPENMP 
* v3_31 - Tanguay M.        - Introduce time extrapolation
* v3_31 - Tanguay M.        - new scope for operator + adw_cliptraj (LAM)
* v3_31 - Tanguay M.        - Do adjoint of outsiders in advection
*
*language
*     fortran 77
*
*object
*     see id section
*
*ADJ of
*arguments
*______________________________________________________________________
*        |                                                       |     |
* NAME   | DESCRIPTION                                           | I/O |
*--------|-------------------------------------------------------|-----|
*        |                                                       |     |
*        |                                                       |     |
* F_u,F_v| 3 components of upstream positions at t1 at input     |  iw |
* F_w    | used as work field afterward                          |     |
*________|_______________________________________________________|_____|
*
*implicits
#include "glb_ld.cdk"
#include "glb_pil.cdk"
#include "ptopo.cdk"
#include "geomg.cdk"
#include "orh.cdk"
#include "lun.cdk"
#include "schm.cdk"
#include "offc.cdk"
#include "cstv.cdk"
#include "rhsc.cdk"
#include "v4dg.cdk"
#include "adw.cdk"
#include "tr3d.cdk"
#include "vt1.cdk"
#include "vt0.cdk"
#include "adwm.cdk"
#include "rhscm.cdk"
#include "vt1m.cdk"
#include "vt0m.cdk"
*
*modules
      integer  vmmlod, vmmget, vmmuld
      external vmmlod, vmmget, vmmuld
************************************************************************
      integer i0,in,j0,jn,n,nij,nijk,nijkag,i,j,k,dest_ni,nn,ii,pnerr,
     $        pnlkey1(10+Tr3d_ntr),pnlkey2(10+Tr3d_ntr),
     $        pnlkey3(10+Tr3d_ntr),pnlkeys(10+Tr3d_ntr),pnlkey4(10+Tr3d_ntr),
     $        key1(Tr3d_ntr),key0(Tr3d_ntr),key0m(Tr3d_ntr), 
     $        key1_,key0_,key0m_,pnlod,err,dim
*
      real*8 aaa_8
      real*8, parameter :: ZERO_8 = 0.0
*
      logical wind_L, mono_L
*
      integer*8, dimension(:)  ,allocatable :: pointer_1,pointer_3 
*
      real, dimension(:,:)  ,allocatable :: u,um 
      real, dimension(:,:)  ,allocatable :: capx1,capy1,capz1 
      real, dimension(:,:)  ,allocatable :: capx2,capy2,capz2 
      real, dimension(:,:)  ,allocatable :: wrkc_K 
*
      real, dimension(:)    ,allocatable :: F_vm_K 
*
      real, dimension(:,:,:,:)  ,allocatable :: in0 
*
      real fin1 (LDIST_SHAPE,l_nk),finm1(LDIST_SHAPE,l_nk),fin,finm,
     %     work1(LDIST_SHAPE,l_nk),worx1(LDIST_SHAPE,l_nk),work,worx
      pointer (pafin,  fin(LDIST_SHAPE,*)),(pafinm,finm(LDIST_SHAPE,*)),
     %        (pawork,work(LDIST_SHAPE,*)),(paworx,worx(LDIST_SHAPE,*))
*
      real vmm(LDIST_SHAPE,l_nk)
      pointer (pavmm, vmm)
*
      logical residual_L, enough_L
      integer max_field,max_thread,max_serie,bnd_serie,lev,ind,ff,tt,ss,
     %        f0,t0,s0,left_ff,ratio,rr,others
*
      integer, dimension(:,:)  ,allocatable :: field,level 
      integer, dimension(:  )  ,allocatable :: thread 
*
      real wrkb(l_ni*l_nj*l_nk),wrkc(l_ni*l_nj*l_nk)
*
      logical done_L
      data done_L /.false./
      save done_L
*
      real dummy
*
      integer Adw_fro_n_TR,Adw_for_n_TR,Adw_fro_s_TR,Adw_for_s_TR
*
      real F_um_ref(l_ni*l_nj*l_nk),F_vm_ref(l_ni*l_nj*l_nk)
*
*     ______________________________________________________
*
      if (Lun_debug_L) write (Lun_out,1000)
*
*     Initializations 
*     ---------------
      nij    = l_ni   *l_nj
      nijk   = l_ni   *l_nj   *l_nk
      nijkag = Adw_nit*Adw_njt*l_nk
*
      if (.not.G_lam) then
*
          call hpalloc(Adwm_c1m_    ,nijk,      err,1)
          call hpalloc(Adwm_n1m_    ,nijk,      err,1)
          call hpalloc(Adwm_xdd1m_  ,nijk,      err,1)
          call hpalloc(Adwm_xgg1m_  ,nijk,      err,1)
*
          call hpalloc(Adw_n1_      ,nijk,      err,1)
          call hpalloc(Adw_xdd1_    ,nijk,      err,1)
          call hpalloc(Adw_xgg1_    ,nijk,      err,1)
*
          allocate ( F_vm_K(nijk) )
*
*         Zero adjoint variables
*         ---------------------- 
          wrkb = 0.
*
          do n=1,nijk
             Adw_n1  (n) = 0.
             Adw_xdd1(n) = 0.
             Adw_xgg1(n) = 0.
          enddo
*
      endif
*
*     Zero adjoint variables
*     ---------------------- 
      wrkc = 0.
*
      i0=1
      in=l_ni
      j0=1
      jn=l_nj
      if (G_lam) then
          if (l_west)  i0=pil_w
          if (l_east)  in=l_niu - pil_e + 2
          if (l_south) j0=pil_s
          if (l_north) jn=l_njv - pil_n + 2
*
          do i = 1,l_ni*l_nj*l_nk
             F_um_ref(i) = F_um(i)
             F_vm_ref(i) = F_vm(i)
          enddo
*
          call adw_cliptraj (  F_um, F_vm, i0, in, j0, jn, 'IN3_TR')
      endif
*
      if (.not.G_lam) then
*
         call adw_exch_1_tr ( Adwm_n1m,Adwm_xgg1m,Adwm_xdd1m,Adwm_c1m,
     $                        F_um,F_vm,F_wm,F_vm_K)
*
         dim = max(1,Adw_fro_a)
         call hpalloc(Adw_wrka_    ,dim,      err,1)
*
*        Zero adjoint variables
*        ---------------------- 
         do n=1,dim
            Adw_wrka (n) = 0.
         enddo
*
*        TRAJECTORY
*        ----------
         call hpalloc(Adwm_capx2m_   ,dim,      err,1)
         call hpalloc(Adwm_capy2m_   ,dim,      err,1)
         call hpalloc(Adwm_capz2m_   ,dim,      err,1)
*
         call adw_exch_2 ( Adwm_capx2m,Adwm_capy2m,Adwm_capz2m,
     %                     Adwm_n1m,   Adwm_xgg1m, Adwm_xdd1m,
     %                     Adw_fro_n, Adw_fro_s, Adw_fro_a,
     %                     Adw_for_n, Adw_for_s, Adw_for_a, 3)
*
      endif
*
      if (G_lam) then
          nn=0
          dest_ni=l_ni
      else
          nn=999
          dest_ni=G_ni
      endif
*
*     Define keys for fields to interpolate
*     -------------------------------------
      pnlod = 0
*
      if (.not. Schm_hydro_L) then
*
         pnlod = pnlod+1
         pnlkey1(pnlod) = VMM_KEY(rvv)
         pnlkey2(pnlod) = VMM_KEY(rvvm)
         pnlkey3(pnlod) = 0
         pnlkey4(pnlod) = 0
         pnlkeys(pnlod) = 2
*
         pnlod = pnlod+1
         pnlkey1(pnlod) = VMM_KEY(rw)
         pnlkey2(pnlod) = VMM_KEY(rwm)
         pnlkey3(pnlod) = 0
         pnlkey4(pnlod) = 0
         pnlkeys(pnlod) = 2
*
      endif
*
         pnlod = pnlod+1
         pnlkey1(pnlod) = VMM_KEY(rth)
         pnlkey2(pnlod) = VMM_KEY(rthm)
         pnlkey3(pnlod) = 0
         pnlkey4(pnlod) = 0
         pnlkeys(pnlod) = 2

         pnlod = pnlod+1
         pnlkey1(pnlod) = VMM_KEY(rcn)
         pnlkey2(pnlod) = VMM_KEY(rcnm)
         pnlkey3(pnlod) = 0
         pnlkey4(pnlod) = 0
         pnlkeys(pnlod) = 2

         pnlod = pnlod+1
         pnlkey1(pnlod) = VMM_KEY(rvw1)
         pnlkey2(pnlod) = VMM_KEY(rvw1m)
         pnlkey3(pnlod) = VMM_KEY(rvw2)
         pnlkey4(pnlod) = 0 
         pnlkeys(pnlod) = 3

         pnlod = pnlod+1
         pnlkey1(pnlod) = VMM_KEY(ruw1)
         pnlkey2(pnlod) = VMM_KEY(ruw1m)
         pnlkey3(pnlod) = VMM_KEY(ruw2)
         pnlkey4(pnlod) = 0 
         pnlkeys(pnlod) = 3

      if ( Orh_icn .eq. Schm_itcn. or. .not.Orh_crank_L ) then
*
         aaa_8 = ( Offc_a1_8 / Offc_b0_8 )/ Cstv_dt_8
*
         key1_ = VMM_KEY (trt1)
         key0_ = VMM_KEY (trt0)
         key0m_= VMM_KEY (trt0m)
*
         do n=1,Tr3d_ntr
            key1 (n)= key1_  + n
            key0 (n)= key0_  + n
            key0m(n)= key0m_ + n
            pnlod = pnlod+1
            pnlkey1(pnlod) = key0 (n)
            pnlkey2(pnlod) = key0m(n)
            pnlkey3(pnlod) = 0 
            pnlkey4(pnlod) = key1 (n)
            pnlkeys(pnlod) = 1
         end do
*
      endif
*
      max_field = pnlod
*
*     Allocate arrays for fields to interpolate 
*     -----------------------------------------
      allocate ( u (Adw_nit*Adw_njt*l_nk, max_field) )
      allocate ( um(Adw_nit*Adw_njt*l_nk, max_field) )
*
      allocate ( pointer_1(max_field) )
      allocate ( pointer_3(max_field) )
*
      if (G_lam) allocate ( in0(LDIST_SHAPE,l_nk,max_field) )
*
      allocate ( capx1 (nijk,max_field) )
      allocate ( capy1 (nijk,max_field) )
      allocate ( capz1 (nijk,max_field) )
*
      if (.not.G_lam) then
          allocate ( capx2 (dim,max_field) )
          allocate ( capy2 (dim,max_field) )
          allocate ( capz2 (dim,max_field) )
      endif
*
      allocate ( wrkc_K(l_ni*l_nj*l_nk,max_field) )
*
*     Allocate arrays for series of threads 
*     -------------------------------------
      bnd_serie = (max_field * l_nk) / Ptopo_npeOpenMP + 5 
*
      allocate ( field  (Ptopo_npeOpenMP, bnd_serie) )
      allocate ( level  (Ptopo_npeOpenMP, bnd_serie) )
      allocate ( thread (                 bnd_serie) )
*
*     Zero adjoint variables
*     ----------------------
      do ff=1,max_field
*
!$omp parallel
!$omp do
        do ii=1,Adw_nit*Adw_njt*l_nk
           u(ii,ff) = ZERO_8
        enddo
!$omp enddo
!$omp do
        do n=1,nijk
           capx1(n,ff) = ZERO_8
           capy1(n,ff) = ZERO_8
           capz1(n,ff) = ZERO_8
        enddo
!$omp enddo
*
      if (.not.G_lam) then
!$omp do
          do n=1,dim
             capx2(n,ff) = ZERO_8 
             capy2(n,ff) = ZERO_8 
             capz2(n,ff) = ZERO_8 
          enddo
!$omp enddo
      endif
*
!$omp do
        do n=1,l_ni*l_nj*l_nk
           wrkc_K(n,ff) = ZERO_8
        enddo
!$omp enddo
*
!$omp end parallel
*
      enddo
*
*     ------------------------------------- 
*     Calculations on fields to interpolate 
*     BEFORE adjoint interpolation 
*     ------------------------------------- 
      do ff=1,max_field
*
*     Tracers calculation #1
*     ----------------------
      if (pnlkeys(ff).eq.1) then
*
           err = vmmlod(pnlkey1(ff),1)
           pafin = loc(fin1)
           pnerr = vmmget(pnlkey1(ff),pafin,fin)
*
!$omp parallel do 
           do k=1,G_nk
           do j=1,l_nj
           do i=1,l_ni
              fin(i,j,k) = Cstv_tau_8*fin(i,j,k)
           end do
           end do
           end do
!$omp end parallel do 
*
           err = vmmuld(pnlkey1(ff),1)
*
      endif
*
*     Exchange Haloes for TRAJECTORY 
*     ------------------------------
          err = vmmlod(pnlkey2(ff),1)
          pafinm = loc(finm1)
          pnerr = vmmget(pnlkey2(ff),pafinm,finm)
* 
          call rpn_comm_xch_halox (finm, LDIST_DIM, l_ni, l_nj, l_nk,
     %       Adw_halox, Adw_haloy, G_periodx, G_periody,um(1,ff),-Adw_halox+1,
     %       Adw_nic+Adw_halox, -Adw_haloy+1, Adw_njc+Adw_haloy, dest_ni, nn)
*
          err = vmmuld(pnlkey2(ff),1)
*
*     Extend values at the poles for TRAJECTORY 
*     -----------------------------------------
      wind_L = .false.
      if (pnlkeys(ff).eq.3) wind_L = .true.
*
!$omp parallel 
      if (.not.G_lam) then
        if ( l_south ) then
*
           if ( wind_L ) then
              call adw_pol0 (um(1,ff), 0, Adw_nic,Adw_halox,Adw_njc,
     %                       Adw_haloy,l_nk)
           else
              call adw_pols (um(1,ff),Adw_wx_8, 0, Adw_nic,Adw_halox,
     %                       Adw_njc,Adw_haloy,l_nk)
           endif
              call adw_polx (um(1,ff),Adw_xg_8,.true.,Adw_nic,Adw_halox,
     %                       Adw_njc,Adw_haloy,l_nk)
        endif
        if ( l_north ) then
*
           if ( wind_L ) then
              call adw_pol0 (um(1,ff),Adw_njc+1,Adw_nic,Adw_halox,Adw_njc,
     %                       Adw_haloy,l_nk)
           else
              call adw_pols (um(1,ff),Adw_wx_8, Adw_njc+1,Adw_nic,Adw_halox,
     %                       Adw_njc,Adw_haloy,l_nk)
           endif
              call adw_polx (um(1,ff),Adw_xg_8,.false.,Adw_nic,Adw_halox,
     %                       Adw_njc,Adw_haloy,l_nk)
        endif
      endif
!$omp end parallel 
*
      enddo 
*
*     Do HERE adjoint of interpolation for OUTSIDERS 
*     ----------------------------------------------
      do ff = 1, max_field 
*
             if (pnlkeys(ff).eq.3) then
*
                err = vmmlod(pnlkey3(ff),1)
                err = vmmget(pnlkey3(ff),pavmm,vmm)
                pointer_3(ff) = pavmm
*
             else
*
                err = vmmlod(pnlkey1(ff),1)
                err = vmmget(pnlkey1(ff),pavmm,vmm)
                pointer_1(ff) = pavmm
*
             endif
*
             if (pnlkeys(ff).eq.3) then
                 pawork = pointer_3(ff)
             else 
                 pawork = pointer_1(ff)
             endif
*
             mono_L = .false.
             if (pnlkeys(ff).eq.1) mono_L = Adw_mono_L
*
!$omp parallel do 
             do k = l_nk,1,-1 
             do j = jn,j0,-1
             do i = in,i0,-1
                wrkc ( (k-1)*nij+(j-1)*l_ni+i ) = work(i,j,k)
             enddo
             enddo
             enddo
!$omp end parallel do 
*
*            Adjoint of interpolation for outsiders
*            --------------------------------------
             if (.not.G_lam) then
*
             if ( Adw_for_a .gt. 0 )
     %       call adw_exch_3_ad ( wrkc,dummy,wrkb,dummy,dummy,Adwm_c1m,1)
*
             call adw_exch_2_ad ( wrkb,    dummy,dummy,
     %                            Adw_wrka,dummy,dummy,
     %                            Adw_for_n, Adw_for_s, Adw_for_a,
     %                            Adw_fro_n, Adw_fro_s, Adw_fro_a, 1)
*
             if ( Adw_fro_a .gt. 0 ) then
             call adw_tricub_lag3d_ad ( Adw_wrka,
     %                                  u (1,ff),capx2(1,ff),capy2(1,ff),capz2(1,ff),
     %                                  um(1,ff),Adwm_capx2m,Adwm_capy2m,Adwm_capz2m,
     %                                  Adw_fro_a, mono_L,1,Adw_fro_a,1,1,1)
             endif
*
             endif
*
!$omp parallel do 
             do k = l_nk,1,-1
             do j = jn,j0,-1
             do i = in,i0,-1
                wrkc_K ( (k-1)*nij+(j-1)*l_ni+i, ff ) = wrkc ( (k-1)*nij+(j-1)*l_ni+i ) 
             enddo
             enddo
             enddo
!$omp end parallel do 
*
             if (pnlkeys(ff).eq.3) then
                 err = vmmuld(pnlkey3(ff),1)
             else
                 err = vmmuld(pnlkey1(ff),1)
             endif
*
      enddo
*
*     ------------------------------------------
*     Give a field to interpolate to each thread 
*     if all threads could be occupied
*     ------------------------------------------
      ratio = max_field/Ptopo_npeOpenMP
*
      left_ff = 1
*
      if( ratio.ne.0 ) then
*
*         Verify if the threads that won't be treated are enough
*         ------------------------------------------------------
          others = max_field - ratio * Ptopo_npeOpenMP
          enough_L = .true.
          if( others.ne.0.and.2*others.lt.Ptopo_npeOpenMP ) enough_L = .false.
*
          if (Ptopo_myproc.eq.0.and.Lun_out.gt.0.and..not.done_L.and.ratio.ne.0)
     %        write(Lun_out,*) 'CN = ',Orh_icn,': ADW_TRICUB_AD: Enough threads for series',
     %        ' if we allow Interpolations with all threads occupied =',
     %        enough_L
*
      endif
*
      if( ratio.ne.0.and.enough_L ) then
*
      do rr = 1,ratio
*
!$omp parallel do private(mono_L,pnerr,err,i,j,
!$omp$            work1,pawork,wrkc,ff,lev,tt,ind)
!$omp$  shared(l_nk,nij,i0,in,j0,jn,u,um,capx1,capy1,capz1,
!$omp$         pnlkey1,pnlkey3,pnlkeys,field,level,ss,thread,left_ff,
!$omp$         pointer_1,pointer_3)
*
          do ff = left_ff, left_ff + Ptopo_npeOpenMP - 1
*
             mono_L = .false.
             if (pnlkeys(ff).eq.1) mono_L = Adw_mono_L
*
             do k = l_nk,1,-1
             do j = jn,j0,-1
             do i = in,i0,-1
                wrkc ( (k-1)*nij+(j-1)*l_ni+i ) = wrkc_K ( (k-1)*nij+(j-1)*l_ni+i, ff )
             enddo
             enddo
             enddo
*
*            Adjoint of interpolation
*            ------------------------
             call adw_tricub_lag3d_ad ( wrkc,
     %                            u(1,ff), capx1(1,ff), capy1(1,ff), capz1(1,ff),
     %                           um(1,ff),F_um,F_vm,F_wm,
     %                           nij, mono_L,i0,in,j0,jn,l_nk)
*
          enddo
*
!$omp end parallel do
*
         left_ff = left_ff + Ptopo_npeOpenMP 
*
      enddo
*
      else
*
      left_ff = 1
*
      endif
*
      if (Ptopo_myproc.eq.0.and.Lun_out.gt.0.and..not.done_L) 
     %    write(Lun_out,*) 'CN = ',Orh_icn,': ADW_TRICUB_AD: Interpolations done with all threads occupied =',
     %                     left_ff - 1
*
      if (Ptopo_myproc.eq.0.and.Lun_out.gt.0.and..not.done_L) 
     %    write(Lun_out,*) 'CN = ',Orh_icn,': ADW_TRICUB_AD: Interpolations done with series of threads    =',
     %                     max_field - left_ff + 1 
*
      if( left_ff.le.max_field ) then
*
*     --------------------------------------------------------------------
*     Allocate untreated field to interpolate at prescribed vertical level 
*     for each thread in a series   
*     --------------------------------------------------------------------
      ff  = left_ff - 1 
      lev = 1 
*
      residual_L = .false.
*
      do ss = 1,bnd_serie
*
*         Set number of threads in a series 
*         ---------------------------------
          thread(ss) = Ptopo_npeOpenMP
*
          do tt = 1,Ptopo_npeOpenMP
*
             ff = ff + 1
*            
             if(ff.gt.max_field) then
                ff = left_ff 
                if(lev.le.l_nk/2) then
                   lev = lev + l_nk/2 
                   if(lev.gt.l_nk) then
                      thread(ss) = tt-1 
                      max_serie = ss
                      goto 100
                   endif
                else
                   lev = lev - l_nk/2 + 1 
                   if(lev.gt.l_nk/2) then
*
*                  Residual levels  
*                  ---------------
                   if(lev + l_nk/2.le.l_nk) then
*
                      residual_L = .true.
*
                      lev = lev + l_nk/2
*
                         if (Ptopo_myproc.eq.0.and.Lun_out.gt.0.and..not.done_L) 
     %                       write(Lun_out,1001) Orh_icn,l_nk - lev + 1
*
                      s0 = ss 
                      f0 = ff 
                      t0 = tt 
*
                      if(t0.ne.1) then
*
                         thread(s0) = t0-1 
*
                         if (Ptopo_myproc.eq.0.and.Lun_out.gt.0.and..not.done_L) 
     %                       write(Lun_out,1002) Orh_icn,s0 
                         if (Ptopo_myproc.eq.0.and.Lun_out.gt.0.and..not.done_L) 
     %                       write(Lun_out,1003) Orh_icn,thread(s0) 
*
                         s0 = s0 + 1
                         t0 = 1
                      endif
*
  200                 continue            
*
                      field(t0,s0) = f0 
                      level(t0,s0) = lev 
*
                      if (f0.eq.max_field) then 
                          thread(s0) = t0 
                          lev = lev + 1
                          if(lev.gt.l_nk) then  
                             max_serie = s0
                             goto 100
                          endif
                      endif
                      if (t0.eq.Ptopo_npeOpenMP) then
                          thread(s0) = t0 
                          s0 = s0 + 1 
                          t0 = 0 
                      endif
                      f0 = f0 + 1
                      t0 = t0 + 1
                      goto 200
*
                   else
                      thread(ss) = tt-1 
                      max_serie = ss
                      goto 100
                   endif
                   endif
                endif
             endif
*
             field(tt,ss) = ff 
             level(tt,ss) = lev 
*
          enddo
*
      enddo
*
  100 continue
*
      if ( thread(max_serie).eq.0 ) max_serie = max_serie - 1 
*
      if (Ptopo_myproc.eq.0.and.Lun_out.gt.0.and..not.done_L.and..not.residual_L) then 
          write(Lun_out,*) 'CN = ',Orh_icn,': ADW_TRICUB_AD: No Residual levels                             '
          write(Lun_out,*) 'CN = ',Orh_icn,': ADW_TRICUB_AD: Number of series when No Residual levels      =',
     %                     max_serie
      endif
*
*     ---------------------------------------------------
*     Do adjoint interpolation for each series of threads   
*     ---------------------------------------------------
      do ss = 1,max_serie
*
!$omp parallel do private(mono_L,pnerr,err,i,j,
!$omp$            work1,pawork,wrkc,ff,lev,tt,ind)
!$omp$  shared(nij,i0,in,j0,jn,u,um,capx1,capy1,capz1,
!$omp$         pnlkey1,pnlkey3,pnlkeys,field,level,ss,thread,
!$omp$         pointer_1,pointer_3)
*
          do tt = 1,thread(ss)
*
             ff  = field(tt,ss)
             lev = level(tt,ss)
*
             mono_L = .false.
             if (pnlkeys(ff).eq.1) mono_L = Adw_mono_L
*
             ind = (lev-1)*nij+1
*
             do j = jn,j0,-1
             do i = in,i0,-1
                wrkc ( (j-1)*l_ni+i ) = wrkc_K ( (lev-1)*nij+(j-1)*l_ni+i, ff )
             enddo
             enddo
*
*            Adjoint of interpolation
*            ------------------------
             call adw_tricub_lag3d_ad ( wrkc,
     %                           u(1,ff), capx1(ind,ff), capy1(ind,ff), capz1(ind,ff),
     %                           um(1,ff),F_um(ind),F_vm(ind),F_wm(ind),
     %                           nij, mono_L,i0,in,j0,jn,1)
*
          enddo 
*
!$omp end parallel do 
*
      enddo
*
      endif
*
*     ----------------------------------------- 
*     Calculations on fields to be interpolated 
*     AFTER adjoint interpolation 
*     ----------------------------------------- 
      do ff=1,max_field
*
*       Zeroing field 
*       -------------
        pawork = loc(work1)
*
        if (pnlkeys(ff).eq.3) then
            err = vmmlod(pnlkey3(ff),1)
            pnerr = vmmget(pnlkey3(ff),pawork,work)
        else
            err = vmmlod(pnlkey1(ff),1)
            pnerr = vmmget(pnlkey1(ff),pawork,work)
        endif
*
!$omp parallel do 
        do k=1,l_nk
           do j=l_miny,l_maxy
              do i=l_minx,l_maxx
                  work(i,j,k) = ZERO_8
               enddo
           enddo
        enddo
!$omp end parallel do 
*
        if (pnlkeys(ff).eq.3) then
            err = vmmuld(pnlkey3(ff),1)
        else
            err = vmmuld(pnlkey1(ff),1)
        endif
*
*       Extend values at the poles 
*       --------------------------
        wind_L = .false.
        if (pnlkeys(ff).eq.3) wind_L = .true.
*
!$omp parallel 
        if (.not.G_lam) then
          if ( l_north ) then
                call adw_polx_ad (u(1,ff),Adw_xg_8,.false.,Adw_nic,Adw_halox,
     %                            Adw_njc,Adw_haloy,l_nk)
*
              if ( wind_L ) then
                call adw_pol0_ad (u(1,ff),Adw_njc+1,Adw_nic,Adw_halox,Adw_njc,
     %                            Adw_haloy,l_nk)
              else
                call adw_pols_ad (u(1,ff),Adw_wx_8, Adw_njc+1,Adw_nic,Adw_halox,
     %                            Adw_njc,Adw_haloy,l_nk)
              endif
*
          endif
          if ( l_south ) then
                call adw_polx_ad (u(1,ff),Adw_xg_8,.true.,Adw_nic,Adw_halox,
     %                            Adw_njc,Adw_haloy,l_nk)
*
              if ( wind_L ) then
                call adw_pol0_ad (u(1,ff), 0, Adw_nic,Adw_halox,Adw_njc,
     %                            Adw_haloy,l_nk)
              else
                call adw_pols_ad (u(1,ff),Adw_wx_8, 0, Adw_nic,Adw_halox,
     %                            Adw_njc,Adw_haloy,l_nk)
              endif
          endif
        endif
!$omp end parallel 
*
*       Exchange Haloes 
*       ---------------
        err = vmmlod(pnlkey1(ff),1)
        pafin = loc(fin1)
        pnerr = vmmget(pnlkey1(ff),pafin,fin)
*
        if (G_lam) then
!$omp parallel do 
            do k=1,l_nk
            do j=l_miny,l_maxy
            do i=l_minx,l_maxx
            in0(i,j,k,ff) = fin(i,j,k)
            end do
            end do
            end do
!$omp end parallel do 
        endif
*
        call rpn_comm_adj_halox (fin, LDIST_DIM, l_ni, l_nj, l_nk,
     %  Adw_halox, Adw_haloy, G_periodx, G_periody, u(1,ff),       -Adw_halox+1,
     %  Adw_nic+Adw_halox, -Adw_haloy+1, Adw_njc+Adw_haloy, dest_ni, nn)
*
        err = vmmuld(pnlkey1(ff),1)
*
*       Tracers calculation #2
*       ----------------------
        if (pnlkeys(ff).eq.1) then
*
            err = vmmlod(pnlkey1(ff),1)
            err = vmmlod(pnlkey4(ff),1)
*
            pafin  = loc(fin1)
            paworx = loc(worx1)
*
            pnerr = vmmget(pnlkey1(ff),pafin,fin)
            pnerr = vmmget(pnlkey4(ff),paworx,worx)
*
!$omp parallel do 
            do k=1,G_nk
            do j=1,l_nj
            do i=1,l_ni
               worx(i,j,k) = - aaa_8*fin(i,j,k) + worx(i,j,k)
               fin (i,j,k) = ZERO_8
            end do
            end do
            end do
!$omp end parallel do 
*
            err = vmmuld(pnlkey1(ff),1)
            err = vmmuld(pnlkey4(ff),1)
*
        endif
*
         if (G_lam) then
             pnerr = vmmget(pnlkey1(ff),pafin,fin)
!$omp parallel do
             do k=1,l_nk
             do j=l_miny,l_maxy
             do i=l_minx,l_maxx
             fin(i,j,k)    = in0(i,j,k,ff) + fin(i,j,k)
C            in0(i,j,k,ff) = ZERO_8
             end do
             end do
             end do
!$omp end parallel do
             err = vmmuld(pnlkey1(ff),1)
         endif
*
      enddo
*
************************************************************************
*
*     -----------------------------------
*     ADJ of Keep positions in CAP fields
*     -----------------------------------
      do ff=1,max_field
*
!$omp parallel do private(n)
      do k=1,l_nk
      do j=j0,jn
      do i=i0,in
         n = (k-1)*nij + ((j-1)*l_ni) + i
*
         F_w(n) = capz1(n,ff) + F_w(n)
         F_v(n) = capy1(n,ff) + F_v(n)
         F_u(n) = capx1(n,ff) + F_u(n)
*
         capz1(n,ff) = ZERO_8
         capy1(n,ff) = ZERO_8
         capx1(n,ff) = ZERO_8
      end do
      end do
      end do
!$omp end parallel do
*
      end do
*
      if (.not.G_lam) then
*
         do ff=max_field,1,-1
*
         call adw_exch_2_ad ( capx2(1,ff),capy2(1,ff),capz2(1,ff),
     %                        Adw_n1,     Adw_xgg1,   Adw_xdd1,
     %                        Adw_fro_n,  Adw_fro_s,  Adw_fro_a,
     %                        Adw_for_n,  Adw_for_s,  Adw_for_a, 3)
*
         end do
*
      endif
*
      if (.not.G_lam) then
*
         Adw_fro_n_TR = Adw_fro_n
         Adw_for_n_TR = Adw_for_n
         Adw_fro_s_TR = Adw_fro_s
         Adw_for_s_TR = Adw_for_s
*
         call adw_exch_1_tr ( dummy,dummy,dummy,Adwm_c1m,
     $                        F_um,F_vm,F_wm,F_vm_K)
*
         Adw_fro_n = Adw_fro_n_TR
         Adw_for_n = Adw_for_n_TR
         Adw_fro_s = Adw_fro_s_TR
         Adw_for_s = Adw_for_s_TR
*
         call adw_exch_1_ad ( Adw_n1,Adw_xgg1,Adw_xdd1,
     $                        F_u,F_v,F_w,Adwm_c1m,F_vm_K)
*
      endif
*
      if (G_lam) call adw_cliptraj_ad (  F_u, F_v, F_um_ref, F_vm_ref, i0, in, j0, jn, 'IN3_AD')
*
***********************************************************************
* Deallocate
***********************************************************************
      if (.not.G_lam) then
          call hpdeallc(Adwm_c1m_    ,err,1)
          call hpdeallc(Adwm_n1m_    ,err,1)
          call hpdeallc(Adwm_xdd1m_  ,err,1)
          call hpdeallc(Adwm_xgg1m_  ,err,1)
*
          call hpdeallc(Adw_n1_      ,err,1)
          call hpdeallc(Adw_xdd1_    ,err,1)
          call hpdeallc(Adw_xgg1_    ,err,1)
*
          deallocate ( F_vm_K)
*
          deallocate ( capx2,capy2,capz2  )
          call hpdeallc(Adw_wrka_    ,err,1)
          call hpdeallc(Adwm_capx2m_ ,err,1)
          call hpdeallc(Adwm_capy2m_ ,err,1)
          call hpdeallc(Adwm_capz2m_ ,err,1)
*
      endif
*
      deallocate ( wrkc_K,u,um  )
      deallocate ( capx1,capy1,capz1   )
      deallocate ( field,level,thread  )
      deallocate ( pointer_1,pointer_3 )
*
      if (G_lam) deallocate ( in0 )
*
      if (Orh_icn.eq.1) done_L = .true.
*
 1000 format(3X,'ADJ of ADVECT THE RIGHT-HAND-SIDES: (S/R ADW_MAIN_3_INTLAG_AD)')
*
 1001 format(' CN = ',i2,' : ADW_TRICUB_AD: Number of Residual Levels                     =',i2)
 1002 format(' CN = ',i2,' : ADW_TRICUB_AD: Number of series       before Residual levels =',i2)
 1003 format(' CN = ',i2,' : ADW_TRICUB_AD: Threads on last series before Residual levels =',i2)
*
      return
      end