!--------------------------------------- LICENCE BEGIN -----------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer,
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer
!version 3 or (at your option) any later version that should be found at:
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software;
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec),
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------

!--------------------------------------------------------------------------
! > MODULE LamAnalysisGrid (prefix="lag")
!
! Purpose: Performs horizontal grid-point variable transforms 
!          for the limited-area computational analysis grids (extended and non-extended)
!
! - Subroutines
!    lag_SetupFromHCO    (public)
!    lag_PsiChiToUV      (public)
!    lag_PsiChiToUVAdj   (public)
!    lag_UVToVortDiv     (public)
!    lag_mach            (public)
!
! - Public variables
!
! - Dependencies
!   horizontalGrid_mod 
!--------------------------------------------------------------------------

module LamAnalysisGrid_mod 4,4
  use earthconstants_mod, only : RA
  use MathPhysConstants_mod, only: MPC_DEGREES_PER_RADIAN_R8, MPC_PI_R8
  use HorizontalCoord_mod
  use mpivar_mod
  implicit none
  save
  private

  ! public procedures
  public :: lag_SetupFromHCO, lag_mach
  public :: lag_PsiChiToUV, lag_PsiChiToUVAdj, lag_UVToVortDiv

  ! Definition of some parameters characterizing the geometry of
  ! the Limited-Area (LA) analysis grid and associated metric factors
  type :: struct_glmf
     real(8), allocatable :: rlat   (:) ! Latitudes of Scalar gridpoints
     real(8), allocatable :: rlon   (:) ! Longitudes of Scalar gridpoints
     real(8)              :: rdlon      ! Latitude difference of gridpoints
     real(8)              :: rdlat      ! Longitude differences of gridpoints
     real(8), allocatable :: cos2 (:)   ! Grid metric for Psi-Chi to U-V
     real(8), allocatable :: cos2h(:)   ! Grid metric for Psi-Chi to U-V
     real(8), allocatable :: cos2vd (:) ! Grid metric for U-V to Vort-Div
     real(8), allocatable :: cos2hvd(:) ! Grid metric for U-V to Vort-Div
     real(8), allocatable :: idmuh(:)   ! Grid metric for U-V to Vort-Div
     real(8), allocatable :: idmu (:)   ! Grid metric for U-V to Vort-Div
     real(8)              :: dx         ! Grid-point spacing (uniform)
     real(8), allocatable :: conphy (:) ! to go from Wind Images to true winds
     real(8), allocatable :: conima (:) ! to go from true winds to Wind Images
  end type struct_glmf

  type(struct_hco), pointer :: hco_core
  type(struct_hco), pointer :: hco_ext
  type(struct_glmf):: glmf

  integer :: ni_ext, nj_ext   ! With    Extension for Bi-Fourrier
  integer :: ni_core, nj_core ! Without Extension for Bi-Fourrier
  integer :: ext_i , ext_j    ! Extension gridpoints

  integer :: ni_ext_per, nj_ext_per

  integer :: istart, iend, jstart, jend

  integer :: LonPerPE, myLonBeg, myLonEnd
  integer :: LatPerPE, myLatBeg, myLatEnd

  logical :: initialized = .false.

  contains

!--------------------------------------------------------------------------
! lag_SetupFromHCO
!--------------------------------------------------------------------------

  subroutine lag_SetupFromHCO(ExtendedAnalysisGridName,AnalysisGridName) 3,19
    implicit none

    character(len=*), intent(in) :: ExtendedAnalysisGridName
    character(len=*), intent(in) :: AnalysisGridName

    real(8), allocatable :: rlath  (:) ! Latitudes of half grid-points of gridpoints in lat-direction
    real(8), allocatable :: rrcos  (:) ! 1.0/Cos(Latitudes of gridpoints)
    real(8), allocatable :: rrcosh (:) ! 1.0/Cos(Half-Latitudes of gridpoints)
    real(8), allocatable :: rdmu   (:) ! Differences of mu=sin(lat)
    real(8), allocatable :: rdmuh  (:) ! Differences of muh=sin(lath)
    real(8), allocatable :: r1mmu2 (:) ! (1.-mu**2)
    real(8), allocatable :: r1mmu2h(:) ! (1.-muh**2)

    real(8) :: lon_test, lat_test
    real(8) :: dlon_test, dlat_test

    integer :: i, j, njper

    write(*,*)
    write(*,*) 'lag_SetupFromHCO: Starting...'

    initialized = .true.

    !
    !- 1.  Get the appropriate hco structures
    !
    hco_core => hco_Get(AnalysisGridName)
    hco_ext => hco_Get(ExtendedAnalysisGridName)

    if ( (.not. hco_core%initialized) .or.  & 
         (.not. hco_ext%initialized) ) then
      write(*,*)
      write(*,*) 'lag_SetupFromHCO: At least one hco structure was not initilzed'
      write(*,*) 'hco_core = ', hco_core%initialized
      write(*,*) 'hco_ext = ', hco_ext%initialized
      call abort3d('lag_SetupFromHCO: abort')
    end if

    if ( (hco_core%global) .or.  & 
         (hco_ext%global) ) then
      write(*,*)
      write(*,*) 'lag_SetupFromHCO: At least one hco structure is from a global grid'
      write(*,*) 'hco_core = ', hco_core%global
      write(*,*) 'hco_ext = ', hco_ext%global
      call abort3d('lag_SetupFromHCO: abort')
    end if

    !
    !- 2.  Dimension settings and Memory allocation
    !

    !- 2.1 Dimensions
    ni_core = hco_core%ni
    nj_core = hco_core%nj

    ni_ext = hco_ext%ni
    nj_ext = hco_ext%nj

    ext_i  = ni_ext - ni_core
    ext_j  = nj_ext - nj_core

    if ( (ext_i < 10) .or. (ext_j < 10) ) then
      write(*,*)
      write(*,*) 'lag_SetupFromHCO: Domain extension is less than 10 gridpoints'
      write(*,*) ' ext_i = ', ext_i,' ext_j = ', ext_j
      call abort3d('lag_SetupFromHCO: abort')
    end if

    ni_ext_per = ni_ext + 1  ! Fields will be periodic (i = 1 repeated) at ni_ext+1
    nj_ext_per = nj_ext + 1  ! Fields will be periodic (j = 1 repeated) at nj_ext+1

    !- 2.2 First Make sure we have a uniform grid in x and y direction

    !- 2.2.1 Analysis core grid
    dlon_test = hco_core%lon(2) - hco_core%lon(1)
    do i = 1, ni_core
      lon_test = hco_core%lon(1) + dlon_test * real((i-1),8)
      if ( lon_test - hco_core%lon(i) > dlon_test/100.0d0 ) then
        call abort3d('lag_SetupFromHCO: Core grid spacing is not uniform in x-direction')
      end if
    end do
    
    dlat_test = hco_core%lat(2) - hco_core%lat(1)
    do j = 1, nj_core
      lat_test = hco_core%lat(1) + dlat_test * real((j-1),8)
      if ( lat_test - hco_core%lat(j) > dlat_test/100.0d0 ) then
        call abort3d('lag_SetupFromHCO: Core grid spacing is not uniform in y-direction')
      end if
    end do

    !- 2.2.2 Extended Analysis grid
    dlon_test = hco_ext%lon(2) - hco_ext%lon(1)
    do i = 1, ni_ext
      lon_test = hco_ext%lon(1) + dlon_test * real((i-1),8)
      if ( lon_test - hco_ext%lon(i) > dlon_test/100.0d0 ) then
        call abort3d('lag_SetupFromHCO: Extended grid spacing is not uniform in x-direction')
      end if
    end do

    dlat_test = hco_ext%lat(2) - hco_ext%lat(1)
    do j = 1, nj_ext
      lat_test = hco_ext%lat(1) + dlat_test * real((j-1),8)
      if ( lat_test - hco_ext%lat(j) > dlat_test/100.0d0 ) then
        call abort3d('lag_SetupFromHCO: Extended grid spacing is not uniform in y-direction')
      end if
    end do

    !- 2.3 Dimensions for variables needed to be symmetric
    istart = -4          ! 5 gridpoints padding in West direction
    iend   = ni_ext + 4  ! 4 gridpoints padding in East direction
    jstart = -4          ! 5 gridpoints padding in South direction
    jend   = nj_ext + 4  ! 4 gridpoints padding in North direction

    !- 2.4 Allocations
    allocate(glmf%rlon(istart:iend))
    allocate(glmf%rlat(jstart:jend))
    allocate(rlath    (jstart:jend))
    allocate(rrcos    (jstart:jend))
    allocate(rrcosh   (jstart:jend))
    allocate(rdmu     (jstart:jend))
    allocate(rdmuh    (jstart:jend))
    allocate(r1mmu2   (jstart:jend))
    allocate(r1mmu2h  (jstart:jend))

    !
    !- 3.  Set Lat-Lon of the computational grid
    !

    !  3.1 Set (constant) Grid spacing
    glmf%rdlon = (hco_ext%lon(2) - hco_ext%lon(1))
    glmf%rdlat = (hco_ext%lat(2) - hco_ext%lat(1))

    !- 3.2 Lat-Lon

    !  3.2.1 Extract the lat-lon from the core grid
    glmf%rlon(1:ni_ext) = hco_ext%lon(1:ni_ext)
    glmf%rlat(1:nj_ext) = hco_ext%lat(1:nj_ext)

    !  3.2.2 Extend to the full computational extended grid

    ! West
    do i = istart, 0
      glmf%rlon(i) = glmf%rlon(1) + (i-1) * glmf%rdlon
    end do
    ! East
    do i = ni_ext+1, iend
      glmf%rlon(i) = glmf%rlon(ni_ext) + (i-ni_ext) * glmf%rdlon
    end do
    ! North
    do j = nj_ext+1, jend
      glmf%rlat(j) = glmf%rlat(nj_ext) + (j-nj_ext) * glmf%rdlat
    end do
    ! South
    do j = jstart, 0
      glmf%rlat(j) = glmf%rlat(1) + (j-1) * glmf%rdlat
    end do

    !- 3.2 Half Lat-Lon
    do j = jstart+1, jend-1
      rlath(j) = ( glmf%rlat(j) + glmf%rlat(j+1) ) / 2.0d0
    end do

    !
    !- 4. Set Metric Factors
    !

    !- 4.1  Compute local factors
    do j = jstart+1, jend-2
      rrcos  (j) = 1.0d0 / cos(glmf%rlat (j))
      rrcosh (j) = 1.0d0 / cos(rlath(j))
      rdmu   (j) = sin(glmf%rlat (j+1)) - sin(glmf%rlat (j))
      rdmuh  (j) = sin(rlath(j+1)) - sin(rlath(j))
      r1mmu2 (j) = (cos(glmf%rlat (j)))**2
      r1mmu2h(j) = (cos(rlath(j)))**2
    end do

    !- 4.2  Bi-periodize and symmetrize Metric coefficients
    call lag_mach(rdmu   (1:nj_ext), & ! INOUT
                   1, nj_ext,1)        ! IN
    call lag_mach(rdmuh  (1:nj_ext), & ! INOUT
                   1, nj_ext,1)        ! IN 
    call lag_mach(r1mmu2 (1:nj_ext), & ! INOUT
                   1, nj_ext,1)        ! IN
    call lag_mach(r1mmu2h(1:nj_ext), & ! INOUT
                   1, nj_ext,1)        ! IN

    call symmetrize_coef(rdmu   ) ! INOUT
    call symmetrize_coef(rdmuh  ) ! INOUT
    call symmetrize_coef(r1mmu2 ) ! INOUT
    call symmetrize_coef(r1mmu2h) ! INOUT

    !- 4.3 Compute global factors
    glmf%dx  = 1.d0 / (RA * glmf%rdlon)

    allocate(glmf%cos2   ( 0:nj_ext+1))
    allocate(glmf%cos2h  ( 0:nj_ext+1))
    do j = 0, nj_ext+1
      glmf%cos2 (j) = r1mmu2 (j) / (RA * rdmuh(j-1))
      glmf%cos2h(j) = r1mmu2h(j) / (RA * rdmu (j  ))
    end do

    allocate(glmf%idmuh  (0:nj_ext-1))
    allocate(glmf%idmu   (1:nj_ext  ))
    allocate(glmf%cos2vd (1:nj_ext  ))
    allocate(glmf%cos2hvd(1:nj_ext  ))

    do j = 1, nj_ext
      glmf%idmuh  (j-1) = 1.d0 / rdmuh(j-1)
      glmf%idmu   (j)   = 1.d0 / rdmu   (j)
      glmf%cos2vd (j)   = 1.d0 / r1mmu2 (j)
      glmf%cos2hvd(j)   = 1.d0 / r1mmu2h(j)
    end do

    !
    ! Conversion of wind images to physical winds and vice-versa
    ! N.B.: Those are geometrical factors of the COMPUTATIONAL grid 
    !        ==> only computational latitude variation...
    !
    allocate(glmf%conphy(nj_ext))
    allocate(glmf%conima(nj_ext))

    do j = 1, nj_ext
      glmf%conphy(j) = rrcos(j)                  ! to go from Wind Images to true winds
      glmf%conima(j) = 1.0d0 / glmf%conphy(j)  ! to go from true winds to Wind Images
    end do

    !
    !- 5. MPI partitionning
    !
    call mpivar_setup_lonbands(ni_ext,                      & ! IN
                               lonPerPE, myLonBeg, myLonEnd ) ! OUT

    call mpivar_setup_latbands(nj_ext,                      & ! IN
                               latPerPE, myLatBeg, myLatEnd ) ! OUT

    !
    !- 6.  Ending
    !
    deallocate(rlath  )
    deallocate(rrcos  )
    deallocate(rrcosh )
    deallocate(rdmu   )
    deallocate(rdmuh  )
    deallocate(r1mmu2 )
    deallocate(r1mmu2h)

    write(*,*)
    write(*,*) 'lag_SetupFromHCO: Done!'

  end subroutine lag_SetupFromHCO

!--------------------------------------------------------------------------
! symmetrize_coef
!--------------------------------------------------------------------------

  subroutine symmetrize_coef(coef_inout) 4
    implicit none
    !
    !**s/r symmetrize_coef  - Extend symmetrically Metric coefficients.
    !
    !Author  : Luc Fillion - MSC/CAN - 23 Apr 05.

    real(8), intent(inout) :: coef_inout(jstart:jend)

    integer j
    
    do j = jstart, 0
      coef_inout(j) = coef_inout(nj_ext+j)
    end do

    do j = nj_ext+1, jend
      coef_inout(j) = coef_inout(j-nj_ext) 
    end do

  end subroutine symmetrize_coef

!--------------------------------------------------------------------------
! lag_PsiChiToUV
!--------------------------------------------------------------------------

  subroutine lag_PsiChiToUV(psi, chi, uphy, vphy, nk) 1,4
    implicit none

    integer,          intent(in)  :: nk

    real(8),          intent(in)  :: psi(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)
    real(8),          intent(in)  :: chi(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)

    real(8),          intent(out) :: uphy(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)
    real(8),          intent(out) :: vphy(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)

    real(8), allocatable :: psi_ext(:,:,:)
    real(8), allocatable :: chi_ext(:,:,:)
    real(8), allocatable :: uimg(:,:,:)
    real(8), allocatable :: vimg(:,:,:)
    real(8), allocatable :: uimgs(:,:,:)
    real(8), allocatable :: vimgs(:,:,:)

    integer :: i,j,k

    if ( .not. initialized ) then
      call abort3d('lag_PsiChiToUV: LamAnalysisGrid not initialized')
    endif

    

    allocate(psi_ext( (myLonBeg-1):(myLonEnd+1), (myLatBeg-1):(myLatEnd+1), nk))
    allocate(chi_ext( (myLonBeg-1):(myLonEnd+1), (myLatBeg-1):(myLatEnd+1), nk))
    allocate(uimg   ( myLonBeg:myLonEnd        , myLatBeg:myLatEnd        , nk))
    allocate(vimg   ( myLonBeg:myLonEnd        , myLatBeg:myLatEnd        , nk))
    allocate(uimgs  ( (myLonBeg-1):myLonEnd    , myLatBeg:(myLatEnd+1)    , nk))
    allocate(vimgs  ( myLonBeg:(myLonEnd+1)    , (myLatBeg-1):myLatEnd    , nk))

    !
    !- 1.  Symmetrize
    !
    call symmetrize( psi_ext,                                        & ! OUT
                     psi, myLonBeg, myLonEnd, myLatBeg, myLatEnd, nk ) ! IN
    call symmetrize( chi_ext,                                        & ! OUT
                     chi, myLonBeg, myLonEnd, myLatBeg, myLatEnd, nk ) ! IN

!    psi_ext = 0.d0
!    chi_ext = 0.d0
!    psi_ext(1:ni_ext,myLatBeg:myLatEnd,:) = psi(1:ni_ext,myLatBeg:myLatEnd,:)
!    chi_ext(1:ni_ext,myLatBeg:myLatEnd,:) = chi(1:ni_ext,myLatBeg:myLatEnd,:)

    !
    !- 2.  Compute Wind on staggered grid
    !

!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i)
    do k = 1, nk
      !- 2.1 u-wind component
       do j = myLatBeg, myLatEnd+1
        do i = myLonBeg-1, myLonEnd
           uimgs(i,j,k) =   glmf%dx      * ( chi_ext(i+1,j,k) - chi_ext(i,j,k)   ) &
                          - glmf%cos2(j) * ( psi_ext(i,j,k)   - psi_ext(i,j-1,k) )
        end do
      end do
      !- 2.2 v-wind component
      do j = myLatBeg-1, myLatEnd
        do i = myLonBeg, myLonEnd+1
           vimgs(i,j,k) =   glmf%cos2h(j) * ( chi_ext(i,j+1,k) - chi_ext(i,j,k)   ) &
                          + glmf%dx       * ( psi_ext(i,j,k)   - psi_ext(i-1,j,k) )
        end do
      end do
    end do
!$OMP END DO
!$OMP END PARALLEL

    !
    !- 3.  Move to collocated (scalar) grid
    !
    call uvStagToColloc( uimgs, vimgs,                              & ! IN
                         uimg , vimg ,                              & ! OUT
                         myLonBeg, myLonEnd, myLatBeg, myLatEnd, nk ) ! IN

    !
    !- 4.  Convert Wind images to Physical (true) winds
    !
    do j = myLatBeg, myLatEnd
      uphy(:,j,:) =  glmf%conphy(j) * uimg(:,j,:)
      vphy(:,j,:) =  glmf%conphy(j) * vimg(:,j,:)
    end do

    deallocate(psi_ext)
    deallocate(chi_ext)
    deallocate(uimg)
    deallocate(vimg)
    deallocate(uimgs)
    deallocate(vimgs)

  end subroutine lag_PsiChiToUV

!--------------------------------------------------------------------------
! lag_PsiChiToUVAdj
!--------------------------------------------------------------------------

  subroutine lag_PsiChiToUVAdj(psi, chi, uphy, vphy, nk) 1,4
    implicit none

    integer,          intent(in)  :: nk

    real(8),          intent(out) :: psi(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)
    real(8),          intent(out) :: chi(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)
    
    real(8),          intent(in)  :: uphy(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)
    real(8),          intent(in)  :: vphy(myLonBeg:myLonEnd,myLatBeg:myLatEnd,1:nk)

    real(8), allocatable :: psi_ext(:,:,:)
    real(8), allocatable :: chi_ext(:,:,:)
    real(8), allocatable :: uimg(:,:,:)
    real(8), allocatable :: vimg(:,:,:)
    real(8), allocatable :: uimgs(:,:,:)
    real(8), allocatable :: vimgs(:,:,:)

    integer :: i,j,k

    if ( .not. initialized ) then
      call abort3d('lag_PsiChiToUV: LamAnalysisGrid not initialized')
    endif

    allocate(psi_ext( (myLonBeg-1):(myLonEnd+1), (myLatBeg-1):(myLatEnd+1), nk))
    allocate(chi_ext( (myLonBeg-1):(myLonEnd+1), (myLatBeg-1):(myLatEnd+1), nk))
    allocate(uimg   ( myLonBeg:myLonEnd        , myLatBeg:myLatEnd        , nk))
    allocate(vimg   ( myLonBeg:myLonEnd        , myLatBeg:myLatEnd        , nk))
    allocate(uimgs  ( (myLonBeg-1):myLonEnd    , myLatBeg:(myLatEnd+1)    , nk))
    allocate(vimgs  ( myLonBeg:(myLonEnd+1)    , (myLatBeg-1):myLatEnd    , nk))

    !
    !- 4.  Convert Physical (true) winds to Wind images
    !
    do j = myLatBeg, myLatEnd
      uimg(:,j,:) =  glmf%conphy(j) * uphy(:,j,:)
      vimg(:,j,:) =  glmf%conphy(j) * vphy(:,j,:)
    end do

    !
    !- 3.  Move to stagerred grid
    !
    uimgs(:,:,:) = 0.d0
    vimgs(:,:,:) = 0.d0

    call uvStagToCollocAdj( uimgs, vimgs,                              & ! OUT
                            uimg , vimg ,                              & ! IN
                            myLonBeg, myLonEnd, myLatBeg, myLatEnd, nk ) ! IN

    !
    !- 2.  Compute Psi-Chi
    !
    chi_ext(:,:,:) = 0.d0
    psi_ext(:,:,:) = 0.d0

!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i)
    do k = 1, nk
      !- 2.2 from v-wind component
      do j = myLatEnd, myLatBeg-1, -1
        do i = myLonEnd+1, myLonBeg, -1
          chi_ext(i  ,j+1,k) = chi_ext(i  ,j+1,k) + vimgs(i,j,k) * glmf%cos2h(j)
          chi_ext(i  ,j  ,k) = chi_ext(i  ,j  ,k) - vimgs(i,j,k) * glmf%cos2h(j)
          psi_ext(i  ,j  ,k) = psi_ext(i  ,j  ,k) + vimgs(i,j,k) * glmf%dx
          psi_ext(i-1,j  ,k) = psi_ext(i-1,j  ,k) - vimgs(i,j,k) * glmf%dx
        end do
      end do
      !- 2.1 from u-wind component
      do j = myLatEnd+1, myLatBeg, -1
        do i = myLonEnd, myLonBeg-1, -1
          chi_ext(i+1,j  ,k) = chi_ext(i+1,j  ,k) + uimgs(i,j,k) * glmf%dx
          chi_ext(i  ,j  ,k) = chi_ext(i  ,j  ,k) - uimgs(i,j,k) * glmf%dx
          psi_ext(i  ,j  ,k) = psi_ext(i  ,j  ,k) - uimgs(i,j,k) * glmf%cos2(j)
          psi_ext(i  ,j-1,k) = psi_ext(i  ,j-1,k) + uimgs(i,j,k) * glmf%cos2(j)
        end do
     end do
    end do
!$OMP END DO
!$OMP END PARALLEL

    !
    !- 1.  De-Symmetrize
    !
    chi(:,:,:) = 0.d0
    psi(:,:,:) = 0.d0

    call symmetrizeAdj( psi_ext,                                   & ! IN
                        psi,                                       & ! OUT
                        myLonBeg, myLonEnd, myLatBeg, myLatEnd, nk ) ! IN
    call symmetrizeAdj( chi_ext,                                   & ! IN
                        chi,                                       & ! OUT
                        myLonBeg, myLonEnd, myLatBeg, myLatEnd, nk ) ! IN

!    psi(1:ni_ext,myLatBeg:myLatEnd,:) = psi_ext(1:ni_ext,myLatBeg:myLatEnd,:)
!    chi(1:ni_ext,myLatBeg:myLatEnd,:) = chi_ext(1:ni_ext,myLatBeg:myLatEnd,:)

    deallocate(psi_ext)
    deallocate(chi_ext)
    deallocate(uimg)
    deallocate(vimg)
    deallocate(uimgs)
    deallocate(vimgs)

  end subroutine lag_PsiChiToUVAdj

!--------------------------------------------------------------------------
! uvStagToColloc
!--------------------------------------------------------------------------

  subroutine uvStagToColloc(uStag, vStag, uColloc, vColloc, iBeg, iEnd, jBeg, jEnd , nk) 1
    implicit none

    integer,          intent(in)  :: iBeg, iEnd, jBeg, JEnd, nk
    real(8),          intent(out) :: uColloc(iBeg:iEnd  ,jBeg  :jEnd  ,1:nk)
    real(8),          intent(out) :: vColloc(iBeg:iEnd  ,jBeg  :jEnd  ,1:nk)
    real(8),          intent(in)  :: uStag  (iBeg-1:iEnd,jBeg  :jEnd+1,1:nk)
    real(8),          intent(in)  :: vStag  (iBeg:iEnd+1,jBeg-1:jEnd  ,1:nk)

    integer :: i,j,k

!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i)
    do k = 1, nk
      do j = jBeg, jEnd
        do i = iBeg, iEnd
          uColloc(i,j,k) = ( uStag(i-1,j  ,k) + uStag(i,j,k) ) / 2.0d0
          vColloc(i,j,k) = ( vStag(i  ,j-1,k) + vStag(i,j,k) ) / 2.0d0
        end do
      end do
    end do
!$OMP END DO
!$OMP END PARALLEL

  end subroutine uvStagToColloc

!--------------------------------------------------------------------------
! uvStagToCollocAdj
!--------------------------------------------------------------------------

  subroutine uvStagToCollocAdj(uStag, vStag, uColloc, vColloc, iBeg, iEnd, jBeg, jEnd , nk) 1
    implicit none

    integer,          intent(in)  :: iBeg, iEnd, jBeg, jEnd, nk
    real(8),          intent(in)  :: uColloc(iBeg:iEnd  ,jBeg  :jEnd  ,1:nk)
    real(8),          intent(in)  :: vColloc(iBeg:iEnd  ,jBeg  :jEnd  ,1:nk)
    real(8),          intent(out) :: uStag  (iBeg-1:iEnd,jBeg  :jEnd+1,1:nk)
    real(8),          intent(out) :: vStag  (iBeg:iEnd+1,jBeg-1:jEnd  ,1:nk)

    integer :: i, j, k

!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i)
    do k = 1, nk
      do j = jEnd, jBeg, -1
        do i = iEnd, iBeg, -1
          uStag(i-1,j  ,k) = uStag(i-1,j  ,k) + uColloc(i,j,k) / 2.0d0
          uStag(i  ,j  ,k) = uStag(i  ,j  ,k) + uColloc(i,j,k) / 2.0d0
          vStag(i  ,j-1,k) = vStag(i  ,j-1,k) + vColloc(i,j,k) / 2.0d0
          vStag(i  ,j  ,k) = vStag(i  ,j  ,k) + vColloc(i,j,k) / 2.0d0
        end do
      end do
    end do
!$OMP END DO
!$OMP END PARALLEL

  end subroutine uvStagToCollocAdj

!--------------------------------------------------------------------------
! Symmetrize
!--------------------------------------------------------------------------

  subroutine symmetrize(field_out, field_in, iBeg, iEnd, jBeg, jEnd, nk) 2
    implicit none
    !
    ! Extend symmetrically outside 1 grid point all around 
    ! LAM-boundary ready for finite differences
    !
    integer, intent(in) :: iBeg, iEnd, jBeg, jEnd, nk
    real(8), intent(out):: field_out(iBeg-1:iEnd+1,jBeg-1:jEnd+1, nk)
    real(8), intent(in) :: field_in(iBeg:iEnd,jBeg:jEnd,nk)

    real, allocatable :: field_out4(:,:,:)

    integer :: i,j,k,ni,nj

    ni = iEnd-iBeg+1
    nj = jEnd-jBeg+1

    field_out(:,:,:) = 0.d0
    
    field_out(iBeg:iEnd,jBeg:jEnd,1:nk) = field_in(iBeg:iEnd,jBeg:jEnd,1:nk)

    allocate(field_out4(0:ni+1,0:nj+1, nk))
    field_out4(0:ni+1,0:nj+1,:) = real(field_out(iBeg-1:iEnd+1,jBeg-1:jEnd+1,:),4)

    call RPN_COMM_xch_halo (field_out4,             & ! INOUT
                            0,ni+1,0,nj+1,ni,nj,nk, & ! IN
                            1,1,.true.,.true.,ni,0)   ! IN

    field_out(iBeg-1:iEnd+1,jBeg-1:jEnd+1,:) = real(field_out4(0:ni+1,0:nj+1,:),8)
    deallocate(field_out4)

  end subroutine symmetrize

!--------------------------------------------------------------------------
! SymmetrizeAdj
!--------------------------------------------------------------------------

  subroutine symmetrizeAdj(field_in, field_out, iBeg, iEnd, jBeg, jEnd, nk) 2
    implicit none
    !
    ! Adjoint of sub. symmetrize.
    !
    integer, intent(in)    :: iBeg, iEnd, jBeg, jEnd, nk
    real(8), intent(inout) :: field_in(iBeg-1:iEnd+1,jBeg-1:jEnd+1, nk)
    real(8), intent(out)   :: field_out(iBeg:iEnd,jBeg:jEnd,nk)

    real, allocatable :: field_in4(:,:,:)

    integer :: i,j,k,ni,nj

    ni = iEnd-iBeg+1
    nj = jEnd-jBeg+1

    allocate(field_in4(0:ni+1,0:nj+1, nk))
    field_in4(0:ni+1,0:nj+1,:) = real(field_in(iBeg-1:iEnd+1,jBeg-1:jEnd+1,:),4)

    call RPN_COMM_adj_halo (field_in4,               & ! INOUT
                            0,ni+1,0,nj+1,ni,nj,nk,  & ! IN
                            1,1,.true.,.true.,ni,0)    ! IN

    field_in(iBeg-1:iEnd+1,jBeg-1:jEnd+1,:) = real(field_in4(0:ni+1,0:nj+1,:),8)
    deallocate(field_in4)

!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i)
    do k = 1, nk
       do j = jBeg, jEnd
          do i = iBeg, iEnd
             field_out(i,j,k) = field_out(i,j,k) + field_in(i,j,k)
          end do
       end do
    end do
!$OMP END DO
!$OMP END PARALLEL

  end subroutine symmetrizeAdj
  
!--------------------------------------------------------------------------
! lag_Mach
!--------------------------------------------------------------------------

  subroutine lag_mach(gd,ni,nj,nk) 7,2
    implicit none

    ! Arguments:
    !      ni    ! Maximum I-dimension where the input array is assumed to carry information.
    !               Will be used as I-limit where backward derivatives will be evaluated
    !      nj    ! Maximum J-dimension where the input array is assumed to carry information.
    !               Will be used as J-limit where backward derivatives will be evaluated
    !
    integer,          intent(in)    :: ni, nj, nk
    real(8),          intent(inout) :: gd(ni,nj,nk)

    integer :: istart, jstart
    integer :: i,j,k

    real(8) :: con, xp, yp, a0, a1, b1, b2
    real(8) :: deriv_istart, deriv_jstart, deriv_i0, deriv_j0, del

    if ( .not. initialized ) then
      call abort3d('lag_Mach2: LamAnalysisGrid not initialized')
    endif

    if ( (ni /= 1 .and. ni /= ni_ext) .or. &
         (nj /= 1 .and. nj /= nj_ext) ) then
      call abort3d('lag_Mach2 : Invalid Dimensions')
    end if

!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i,istart,jstart,con,a0,a1,b1,b2,del,deriv_istart,deriv_i0,deriv_jstart,deriv_j0,xp,yp)
    do k = 1, nk
    !
    !- 1.  Periodicized in x-direction from ni_core to ni
    !    
    if ( ni > 1 ) then
      istart = ni_core - 2  ! I-limit where backward derivatives will be evaluated
      con = 1.d0 / ( glmf%rdlon * MPC_DEGREES_PER_RADIAN_R8 * 111.d+3)
      do j = 1, nj
        a0           = 0.5d0 * ( gd(istart,j,k)  + gd(1,j,k)       )
        a1           = 0.5d0 * ( gd(istart,j,k)  - gd(1,j,k)       )
        deriv_istart = con   * ( gd(istart,j,k)  - gd(istart-1,j,k))
        deriv_i0     = con   * ( gd(2,j,k)       - gd(1,j,k)       )
        b1           = 0.5d0 * ( deriv_istart  - deriv_i0       )
        b2           = 0.25d0* ( deriv_istart  + deriv_i0       )
        del          = real(ni_ext_per-istart,8)
        do i = istart, ni
          xp = MPC_PI_R8 * real(i-istart,8) / del
          gd(i,j,k) = a0 + a1*cos(xp) + b1*sin(xp) + b2*sin(2.d0*xp)
        end do
      end do
    end if
    !
    !- 2.  Periodicized in y-direction from nj_core to nj
    !
    if ( nj > 1 ) then
      jstart = nj_core - 2
      con = 1.d0 / (glmf%rdlat * MPC_DEGREES_PER_RADIAN_R8 * 111.d+3)
      do i = 1, ni
        a0           = 0.5d0 * ( gd(i,jstart,k) + gd(i,1,k)       )
        a1           = 0.5d0 * ( gd(i,jstart,k) - gd(i,1,k)       )
        deriv_jstart = con   * ( gd(i,jstart,k) - gd(i,jstart-1,k))
        deriv_j0     = con   * ( gd(i,2,k)      - gd(i,1,k)       )
        b1           = 0.5d0 * ( deriv_jstart - deriv_j0     )
        b2           = 0.25d0* ( deriv_jstart + deriv_j0     )
        del          = real(nj_ext_per-jstart,8)
        do j = jstart , nj
          yp = MPC_PI_R8 * real(j-jstart,8) / del
          gd(i,j,k) = a0 + a1*cos(yp) + b1*sin(yp) + b2*sin(2.d0*yp)
        end do
      end do
    end if

    end do
!$OMP END DO
!$OMP END PARALLEL

  end subroutine lag_mach

!--------------------------------------------------------------------------
! lag_UVToVortDiv
!--------------------------------------------------------------------------

  subroutine lag_UVToVortDiv(Vorticity, Divergence, uphy, vphy, nk) 1,5
    implicit none

    integer,          intent(in)  :: nk

    real(8),          intent(out)  :: Vorticity (1:ni_ext,1:nj_ext,1:nk)
    real(8),          intent(out)  :: Divergence(1:ni_ext,1:nj_ext,1:nk)

    real(8),          intent(in) :: uphy(1:ni_ext,1:nj_ext,1:nk)
    real(8),          intent(in) :: vphy(1:ni_ext,1:nj_ext,1:nk)

    real(8), allocatable :: uimg(:,:,:)
    real(8), allocatable :: vimg(:,:,:)
    real(8), allocatable :: uimg_sym(:,:,:)
    real(8), allocatable :: vimg_sym(:,:,:)

    integer :: i,j,k

    if ( .not. initialized ) then
      call abort3d('lag_UVToVortDiv: LamAnalysisGrid not initialized')
    endif

    allocate(uimg_sym(-1:ni_ext+2,-1:nj_ext+2,nk))
    allocate(vimg_sym(-1:ni_ext+2,-1:nj_ext+2,nk))

    allocate(uimg ( 1:ni_ext,1:nj_ext,nk))
    allocate(vimg ( 1:ni_ext,1:nj_ext,nk))

    !
    !- 2.  Convert Physical (true) winds to Wind images
    !
    do j = 1, nj_ext
      uimg(:,j,:) =  glmf%conima(j) * uphy(:,j,:)
      vimg(:,j,:) =  glmf%conima(j) * vphy(:,j,:)
    end do

    !- Redo bi-periodization
    call lag_mach(uimg,             & ! INOUT
                  ni_ext, nj_ext,nk)  ! IN
    call lag_mach(vimg,             & ! INOUT
                  ni_ext, nj_ext,nk)  ! IN

    !
    !- 3.  Symmetrize
    !
    call symmetrize_nompi( uimg_sym,            & ! OUT
                     uimg, ni_ext, nj_ext, nk )   ! IN
    call symmetrize_nompi( vimg_sym,            & ! OUT
                     vimg, ni_ext, nj_ext, nk )   ! IN
    deallocate(uimg,vimg)

    !
    !- 4.  Compute Vorticity and Divergence
    !

!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i)
     do k = 1, nk
       do j = 1, nj_ext
         do i = 1, ni_ext
           vorticity (i,j,k) = &
                glmf%cos2hvd(j)  *  glmf%dx * (vimg_sym(i+1,j,k)-vimg_sym(i,j,k)) - &
                (1.d0 / RA)  *   glmf%idmu(j) * (uimg_sym(i,j+1,k)-uimg_sym(i,j,k))

           divergence(i,j,k) = &
                glmf%cos2vd(j)   *  glmf%dx * (uimg_sym(i,j,k)-uimg_sym(i-1,j,k)) + &
                (1.d0 / RA) * glmf%idmuh(j-1) * (vimg_sym(i,j,k)-vimg_sym(i,j-1,k))
         end do
       end do
     end do
!$OMP END DO
!$OMP END PARALLEL

    deallocate(uimg_sym)
    deallocate(vimg_sym)

  end subroutine lag_UVToVortDiv

!--------------------------------------------------------------------------
! Symmetrize_nompi
!--------------------------------------------------------------------------

  subroutine symmetrize_nompi(field_out, field_in, ni, nj, nk) 2
    implicit none
    !
    ! Extend symmetrically outside 2 grid point all around 
    ! LAM-boundary ready for finite differences up to 2nd order.
    ! N.B.: The fields are periodic on ni+1 points for instance;
    ! but the LAM-fields only contain "ni" points without explicit periodicity.
    !
    ! Author  : Luc Fillion - MSC/CAN - 12 Oct 04.
    !

    integer, intent(in) :: ni, nj, nk
    real(8), intent(out):: field_out(-1:ni+2,-1:nj+2, nk)
    real(8), intent(in) :: field_in(ni,nj,nk)

    integer :: i,j,k

    field_out(:,:,:) = 0.d0
    
    field_out(1:ni,1:nj,1:nk) = field_in(1:ni,1:nj,1:nk)

    !
    !- Symmetrize
    !
!$OMP PARALLEL
!$OMP DO PRIVATE (k,j,i)
    do k = 1, nk
      ! South Boundary
      do  i = 1, ni
        field_out(i,0,k)    = field_out(i,nj,k)
        field_out(i,-1,k)   = field_out(i,nj-1,k)
      end do
      ! West Boundary
      do j = -1, nj
        field_out(0,j,k)    = field_out(ni,j,k)
        field_out(-1,j,k)   = field_out(ni-1,j,k)
      end do
      ! East Boundary
      do j = -1, nj
        field_out(ni+1,j,k) = field_out(1,j,k)
        field_out(ni+2,j,k) = field_out(2,j,k)
      end do
      ! North Boundary
      do  i = -1, ni+2
        field_out(i,nj+1,k) = field_out(i,1,k)
        field_out(i,nj+2,k) = field_out(i,2,k)
      end do
    end do
!$OMP END DO
!$OMP END PARALLEL

  end subroutine symmetrize_nompi

end module LamAnalysisGrid_mod