!--------------------------------------- LICENCE BEGIN -----------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer,
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer
!version 3 or (at your option) any later version that should be found at:
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software;
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec),
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------

!--------------------------------------------------------------------------
! MODULE globalSpectralTransform (prefix = "gst")
!
! Purpose: Perform global spectral transform. Uses the fft module for
!          the FFT part of the transform.
!
! Subroutines:
!    gst_setup (public)
!    gst_speree    (public)
!    gst_reespe    (public)
!    gst_spgd      (public)
!    gst_spgda     (public)
!    gst_gdsp      (public)
!    spereepar
!    reespepar
!    spgdpar
!    spgdapar
!    gdsppar
!    legdir3
!    leginv3
!    allocate_comleg
!    suleg
!    gauss8
!    ordleg8
!    sualp
!    getalp
!    allp
!    allp2
!
! Public variables:
!    gst_rmu
!    gst_rwt
!    gst_nind
!    gst_rlati
!    gst_r1qm2
!    gst_rsqm2
!
! Dependencies:
!    fft
!--------------------------------------------------------------------------

MODULE globalSpectralTransform 3,4
  use mpivar_mod
  use fft
  use MathPhysConstants_mod
  use earthConstants_mod
  implicit none
  save
  private

  ! public subroutines
  public :: gst_speree, gst_speree4, gst_speree5, gst_reespe, gst_reespe4, gst_reespe5, &
            gst_setup, gst_spgd, gst_spgd4, gst_spgda, gst_spgda4, gst_gdsp, gst_zlegpol, &
            gst_setID, gst_setDefaultID, gst_setToDefaultID, &
            gst_ilaList_mpilocal, gst_ilaList_mpiglobal, gst_nList_mpilocal
  ! public functions
  public :: gst_getRmu, gst_getRwt, gst_getnind, gst_getrlati, gst_getr1qm2, gst_getrsqm2, &
            gst_getrnnp1, gst_getr1snp1


  type  :: T_gst
    real(8),allocatable   :: rmu(:)
    real(8),allocatable   :: rwt(:)
    real(8),allocatable   :: rsqm2(:)
    real(8),allocatable   :: r1qm2(:)
    real(8),allocatable   :: rlati(:)
    integer,allocatable   :: nind(:)
    integer,allocatable   :: nindrh(:)
    real(8),allocatable   :: dalp(:,:)
    real(8),allocatable   :: dealp(:,:)
    real(8),allocatable   :: rwocs(:)
    real(8),allocatable   :: r1mu2(:)
    real(8),allocatable   :: rcolat(:)
    real(8),allocatable   :: r1mui(:)
    real(8),allocatable   :: r1mua(:)
    integer,allocatable   :: nclm(:)
    real(8),allocatable   :: r1snp1(:)
    real(8),allocatable   :: rnnp1(:)
    integer               :: ntrunc
    integer               :: nla
    integer               :: nlarh
    integer               :: njlath
    integer               :: ni
    integer               :: nj
    integer               :: nk
    integer               :: mpiMode
    integer               :: myLatBeg,myLatEnd,latPerPE
    integer,allocatable   :: allLatBeg(:),allLatEnd(:)
    integer               :: myLonBeg,myLonEnd,lonPerPE
    integer,allocatable   :: allLonBeg(:),allLonEnd(:)
    integer               :: myLatHalfBeg,myLatHalfEnd
    integer               :: mymBeg,mymEnd,mymSkip,mymCount,maxmCount
    integer               :: mynBeg,mynEnd,mynSkip,mynCount
    integer               :: myNla,maxMyNla
    integer,allocatable   :: allNla(:)
    integer,allocatable   :: mymIndex(:)
    integer,pointer       :: ilaList(:),allIlaList(:,:)
    integer,allocatable   :: allmBeg(:),allmEnd(:),allmSkip(:)
    integer,allocatable   :: allnBeg(:),allnEnd(:),allnSkip(:)
    integer               :: myLevBeg,myLevEnd,myLevCount,maxLevCount
    integer,allocatable   :: allLevBeg(:),allLevEnd(:)
    integer               :: fftID
  end type T_gst

  integer,parameter :: nMaxGst = 10
  integer      :: gstIdDefault = -1
  integer      :: nGstAlreadyAllocated = 0
  integer      :: gstID = 0
  type(T_gst)  :: gst(nmaxgst)

  integer :: nlatbd = 8

CONTAINS


  SUBROUTINE GST_setID(gstID_in) 15
    implicit none

    integer :: gstID_in
  
    gstID = gstID_in

  END SUBROUTINE GST_setID



  SUBROUTINE GST_setDefaultID(gstID_in) 1
    implicit none

    integer :: gstID_in
  
    gstIDDefault = gstID_in

  END SUBROUTINE GST_setDefaultID



  SUBROUTINE GST_setToDefaultID
    implicit none

    gstID = gstIdDefault

  END SUBROUTINE GST_setToDefaultID



  real(8) FUNCTION GST_getRmu(latIndex,gstID_in) 9
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in

    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getRmu = gst(gstID_l)%rmu(latIndex)

  END FUNCTION GST_getRmu



  real(8) FUNCTION GST_getRnnp1(latIndex,gstID_in) 10
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in

    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getRnnp1 = gst(gstID_l)%rnnp1(latIndex)

  END FUNCTION GST_getRnnp1



  real(8) FUNCTION GST_getR1snp1(latIndex,gstID_in) 4
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in

    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getR1snp1 = gst(gstID_l)%r1snp1(latIndex)

  END FUNCTION GST_getR1snp1



  real(8) FUNCTION GST_getRwt(latIndex,gstID_in) 2
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in
  
    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getRwt = gst(gstID_l)%rwt(latIndex)

  END FUNCTION GST_getRwt



  integer FUNCTION GST_getNind(latIndex,gstID_in) 20
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in
  
    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getNind = gst(gstID_l)%nind(latIndex)

  END FUNCTION GST_getNind



  real(8) FUNCTION GST_getRlati(latIndex,gstID_in) 2
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in
 
    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getRlati = gst(gstID_l)%rlati(latIndex)

  END FUNCTION GST_getRlati



  real(8) FUNCTION GST_getR1qm2(latIndex,gstID_in)
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in
  
    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getR1qm2 = gst(gstID_l)%r1qm2(latIndex)

  END FUNCTION GST_getR1qm2



  real(8) FUNCTION GST_getRsqm2(latIndex,gstID_in) 2
    implicit none

    integer :: latIndex,gstID_l
    integer,optional :: gstID_in

    if(present(gstID_in)) then
      gstID_l = gstID_in
    else
      gstID_l = gstIdDefault
    endif

    gst_getRsqm2 = gst(gstID_l)%rsqm2(latIndex)

  END FUNCTION GST_getRsqm2



  subroutine GST_ilaList_mpiglobal(ilaList,myNla,maxMyNla,gstID_in,mymBeg,mymEnd,mymSkip,mynBeg,mynEnd,mynSkip) 3,1
    ! produce an array to convert an mpilocal "ila" into an mpiglobal "ila"
    implicit none
    integer, pointer :: ilaList(:)
    integer          :: myNla, maxMyNla
    integer          :: gstID_in, mymBeg, mymEnd, mymSkip, mynBeg, mynEnd, mynSkip
    integer          :: jm, jn, ierr

    ! compute mpilocal value of nla
    myNla = 0
    do jm = mymBeg, mymEnd, mymSkip
      do jn = mynBeg, mynEnd, mynSkip
        if(jm.le.jn) then
          myNla = myNla + 1
        endif
      enddo
    enddo

    ! determine maximum value of myNla over all processors (used for dimensioning)
    call rpn_comm_allreduce(myNla,maxMyNla,1,"MPI_INTEGER","MPI_MAX","GRID",ierr)

    allocate(ilaList(maxMyNla))
    ilaList(:) = 0

    myNla = 0
    do jm = mymBeg, mymEnd, mymSkip
      do jn = mynBeg, mynEnd, mynSkip
        if(jm.le.jn) then
          myNla = myNla + 1
          ilaList(myNla) = gst_getNind(jm,gstID_in) + jn - jm
        endif
      enddo
    enddo

  end SUBROUTINE GST_ilaList_mpiglobal



  subroutine GST_ilaList_mpilocal(ilaList,gstID_in,mymBeg,mymEnd,mymSkip,mynBeg,mynEnd,mynSkip) 2,1
    ! produce an array to convert an mpiglobal "ila" into an mpilocal "ila"
    implicit none
    integer, pointer :: ilaList(:)
    integer          :: gstID_in, mymBeg, mymEnd, mymSkip, mynBeg, mynEnd, mynSkip
    integer          :: jm, jn, ierr, myNla

    ! assume mpiglobal value of nla already set in gst structure
    allocate(ilaList(gst(gstID_in)%nla))
    ilaList(:) = 0

    myNla = 0
    do jm = mymBeg, mymEnd, mymSkip
      do jn = mynBeg, mynEnd, mynSkip
        if(jm.le.jn) then
          myNla = myNla + 1
          ilaList(gst_getNind(jm,gstID_in) + jn - jm) = myNla
        endif
      enddo
    enddo

  end SUBROUTINE GST_ilaList_mpilocal



  subroutine GST_nList_mpilocal(nList,gstID_in,mymBeg,mymEnd,mymSkip,mynBeg,mynEnd,mynSkip)
    ! produce an array to get value of n corresponding with mpilocal "ila"
    implicit none
    integer, pointer :: nList(:)
    integer          :: gstID_in, mymBeg, mymEnd, mymSkip, mynBeg, mynEnd, mynSkip
    integer          :: jm, jn, ierr, myNla

    ! compute mpilocal value of nla
    myNla = 0
    do jm = mymBeg, mymEnd, mymSkip
      do jn = mynBeg, mynEnd, mynSkip
        if(jm.le.jn) then
          myNla = myNla + 1
        endif
      enddo
    enddo

    allocate(nList(myNla))
    nList(:) = 0

    myNla = 0
    do jm = mymBeg, mymEnd, mymSkip
      do jn = mynBeg, mynEnd, mynSkip
        if(jm.le.jn) then
          myNla = myNla + 1
          nList(myNla) = jn
        endif
      enddo
    enddo

  end SUBROUTINE GST_nList_mpilocal



  integer FUNCTION GST_SETUP(NI_IN,NJ_IN,NTRUNC_IN,MPIMODE_IN,MAXLEVELS_IN) 4,14
    implicit none
    integer  :: ni_in,nj_in,ntrunc_in,mpiMode_in
    integer,optional :: maxlevels_in
    integer  :: jn,jm,ila,ierr
    integer  :: latPerPE,myLatBeg,myLatEnd,myLatHalfBeg,myLatHalfEnd
    integer  :: lonPerPE,myLonBeg,myLonEnd
    integer  :: myLevBeg,myLevEnd,myLevCount
    integer  :: mymBeg,mymEnd,mymSkip,mymCount
    integer  :: mynBeg,mynEnd,mynSkip,mynCount
    real(8)  :: znnp1,z1snp1

    if(nGstAlreadyAllocated.eq.nMaxGst) then
      if(mpi_myid.eq.0) write(*,*) 'gst_setup: The maxmimum number of spectral transform have already been allocated! ',nMaxGst
      call exit(1)
    endif

    nGstAlreadyAllocated = nGstAlreadyAllocated+1
    gstID = nGstAlreadyAllocated
    call gst_setDefaultID(gstID)
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: Now setting up spectral transform #',gstID

    gst(gstID)%ni = ni_in
    gst(gstID)%nj = nj_in
    gst(gstID)%njlath = (gst(gstID)%nj + 1)/2

    gst(gstID)%ntrunc = ntrunc_in
    gst(gstID)%nla = (gst(gstID)%ntrunc + 1)*(gst(gstID)%ntrunc +2)/2
    gst(gstID)%nlarh = (gst(gstID)%ntrunc+1)*(gst(gstID)%ntrunc+1)
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: ntrunc=',gst(gstID)%ntrunc
    gst(gstID)%mpiMode = mpiMode_in
    write(*,*) 'gst_setup: mpiMode=',mpiMode_in

    if(gst(gstID)%mpiMode.ne.0) then
      call mpivar_setup_latbands(gst(gstID)%nj,latPerPE,myLatBeg,myLatEnd,myLatHalfBeg,myLatHalfEnd)
      call mpivar_setup_lonbands(gst(gstID)%ni,lonPerPE,myLonBeg,myLonEnd)
    endif
    gst(gstID)%nk = 0
    if(present(maxlevels_in)) then
      gst(gstID)%nk = maxlevels_in
      if(gst(gstID)%mpiMode.gt.3) then
        ! 2D MPI decomposition: split levels across npex
        call mpivar_setup_levels_npex(maxlevels_in,myLevBeg,myLevEnd,myLevCount)
      else
        ! 1D MPI decomposition: split levels across npey
        call mpivar_setup_levels_npey(maxlevels_in,myLevBeg,myLevEnd,myLevCount)
      endif
      write(*,*) 'gst_setup: myLevBeg,End,Count=',myLevBeg,myLevEnd,myLevCount
    else
      ! set a dummy range of levels handled by this processor
      gst(gstID)%myLevBeg = -1
      gst(gstID)%myLevEnd = -1
      gst(gstID)%myLevCount = 0
    endif
    if(gst(gstID)%mpiMode.eq.0) then
      !!! No distribution of data over mpi tasks in either space
      ! range of lons handled by this processor
      gst(gstID)%myLonBeg = 1
      gst(gstID)%myLonEnd = gst(gstID)%ni
      gst(gstID)%lonPerPE = gst(gstID)%ni
      ! range of lats handled by this processor
      gst(gstID)%myLatBeg = 1
      gst(gstID)%myLatEnd = gst(gstID)%nj
      gst(gstID)%myLatHalfBeg = 1
      gst(gstID)%myLatHalfEnd = gst(gstID)%njlath
      gst(gstID)%latPerPE = gst(gstID)%nj
      ! range of n handled by this processor
      gst(gstID)%mynBeg = 0
      gst(gstID)%mynEnd = gst(gstID)%ntrunc
      gst(gstID)%mynSkip = 1
      gst(gstID)%mynCount = gst(gstID)%ntrunc+1
      ! range of m handled by this processor
      gst(gstID)%mymBeg = 0
      gst(gstID)%mymEnd = gst(gstID)%ntrunc
      gst(gstID)%mymSkip = 1
      gst(gstID)%mymCount = gst(gstID)%ntrunc+1
      gst(gstID)%maxmCount = gst(gstID)%mymCount
    elseif(gst(gstID)%mpiMode.eq.1) then
      !!! Distribution of latitude bands (gridpoint space) and no distribution (spectral space)
      ! range of lons handled by this processor
      gst(gstID)%myLonBeg = 1
      gst(gstID)%myLonEnd = gst(gstID)%ni
      gst(gstID)%lonPerPE = gst(gstID)%ni
      ! range of lats handled by this processor
      gst(gstID)%myLatBeg = myLatBeg
      gst(gstID)%myLatEnd = myLatEnd
      gst(gstID)%myLatHalfBeg = myLatHalfBeg
      gst(gstID)%myLatHalfEnd = myLatHalfEnd  
      gst(gstID)%latPerPE = latPerPE
      ! range of n handled by this processor
      gst(gstID)%mynBeg = 0
      gst(gstID)%mynEnd = gst(gstID)%ntrunc
      gst(gstID)%mynSkip = 1
      gst(gstID)%mynCount = gst(gstID)%ntrunc+1
      ! range of m handled by this processor
      gst(gstID)%mymBeg = 0
      gst(gstID)%mymEnd = gst(gstID)%ntrunc
      gst(gstID)%mymSkip = 1
      gst(gstID)%mymCount = gst(gstID)%ntrunc+1
      gst(gstID)%maxmCount = gst(gstID)%mymCount
    elseif(gst(gstID)%mpiMode.eq.2) then
      !!! Distribution of lat bands (gridpoint space) and wavenumber m (spectral space)
      ! range of lons handled by this processor
      gst(gstID)%myLonBeg = 1
      gst(gstID)%myLonEnd = gst(gstID)%ni
      gst(gstID)%lonPerPE = gst(gstID)%ni
      ! range of lats handled by this processor
      gst(gstID)%myLatBeg = myLatBeg
      gst(gstID)%myLatEnd = myLatEnd
      gst(gstID)%myLatHalfBeg = myLatHalfBeg
      gst(gstID)%myLatHalfEnd = myLatHalfEnd
      gst(gstID)%latPerPE = latPerPE 
      ! range of n handled by this processor
      gst(gstID)%mynBeg = 0
      gst(gstID)%mynEnd = gst(gstID)%ntrunc
      gst(gstID)%mynSkip = 1
      gst(gstID)%mynCount = gst(gstID)%ntrunc+1
      ! range of m handled by this processor
      call mpivar_setup_m(gst(gstID)%ntrunc,mymBeg,mymEnd,mymSkip,mymCount)
      gst(gstID)%mymBeg = mymBeg
      gst(gstID)%mymEnd = mymEnd
      gst(gstID)%mymSkip = mymSkip
      gst(gstID)%mymCount = mymCount
      call rpn_comm_allreduce(gst(gstID)%mymCount,gst(gstID)%maxmCount, &
                              1,"MPI_INTEGER","MPI_MAX","GRID",ierr)
    elseif(gst(gstID)%mpiMode.eq.3) then
      !!! Distribution of lat bands (gridpoint space) and levels (spectral space)
      if(.not.present(maxlevels_in)) then
        write(*,*) 'gst_setup: ERROR, number of levels must be specified with mpiMode 3'
        call flush(6)
        call exit(1)
      endif
      ! range of lons handled by this processor
      gst(gstID)%myLonBeg = 1
      gst(gstID)%myLonEnd = gst(gstID)%ni
      gst(gstID)%lonPerPE = gst(gstID)%ni
      ! range of lats handled by this processor
      gst(gstID)%myLatBeg = myLatBeg
      gst(gstID)%myLatEnd = myLatEnd
      gst(gstID)%myLatHalfBeg = myLatHalfBeg
      gst(gstID)%myLatHalfEnd = myLatHalfEnd
      gst(gstID)%latPerPE = latPerPE 
      ! range of n handled by this processor
      gst(gstID)%mynBeg = 0
      gst(gstID)%mynEnd = gst(gstID)%ntrunc
      gst(gstID)%mynSkip = 1
      gst(gstID)%mynCount = gst(gstID)%ntrunc+1
      ! range of m handled by this processor
      gst(gstID)%mymBeg = 0
      gst(gstID)%mymEnd = gst(gstID)%ntrunc
      gst(gstID)%mymSkip = 1
      gst(gstID)%mymCount = gst(gstID)%ntrunc+1
      gst(gstID)%maxmCount = gst(gstID)%mymCount
      ! range of levels handled by this processor when in spectral space
      gst(gstID)%myLevBeg = myLevBeg
      gst(gstID)%myLevEnd = myLevEnd      
      gst(gstID)%myLevCount = myLevCount
      call rpn_comm_allreduce(gst(gstID)%myLevCount,gst(gstID)%maxLevCount, &
                              1,"MPI_INTEGER","MPI_MAX","GRID",ierr)
    elseif(gst(gstID)%mpiMode.eq.4) then
      !!! Distribution of lon/lat tiles (gridpoint space) and n/m (spectral space)
      if(.not.present(maxlevels_in)) then
        write(*,*) 'gst_setup: ERROR, number of levels must be specified with mpiMode 4'
        call flush(6)
        call exit(1)
      endif
      ! range of lons handled by this processor
      gst(gstID)%myLonBeg = myLonBeg
      gst(gstID)%myLonEnd = myLonEnd
      gst(gstID)%lonPerPE = lonPerPE 
      ! range of lats handled by this processor
      gst(gstID)%myLatBeg = myLatBeg
      gst(gstID)%myLatEnd = myLatEnd
      gst(gstID)%myLatHalfBeg = myLatHalfBeg
      gst(gstID)%myLatHalfEnd = myLatHalfEnd
      gst(gstID)%latPerPE = latPerPE 
      ! range of n handled by this processor
      call mpivar_setup_n(gst(gstID)%ntrunc,mynBeg,mynEnd,mynSkip,mynCount)
      gst(gstID)%mynBeg = mynBeg
      gst(gstID)%mynEnd = mynEnd
      gst(gstID)%mynSkip = mynSkip
      gst(gstID)%mynCount = mynCount
      ! range of m handled by this processor
      call mpivar_setup_m(gst(gstID)%ntrunc,mymBeg,mymEnd,mymSkip,mymCount)
      gst(gstID)%mymBeg = mymBeg
      gst(gstID)%mymEnd = mymEnd
      gst(gstID)%mymSkip = mymSkip
      gst(gstID)%mymCount = mymCount
      call rpn_comm_allreduce(gst(gstID)%mymCount,gst(gstID)%maxmCount, &
                              1,"MPI_INTEGER","MPI_MAX","GRID",ierr)
      ! range of levels handled by this processor when in spectral space
      gst(gstID)%myLevBeg = myLevBeg
      gst(gstID)%myLevEnd = myLevEnd      
      gst(gstID)%myLevCount = myLevCount
      call rpn_comm_allreduce(gst(gstID)%myLevCount,gst(gstID)%maxLevCount, &
                              1,"MPI_INTEGER","MPI_MAX","GRID",ierr)
    elseif(gst(gstID)%mpiMode.eq.5) then
      !!! Distribution of lon/lat tiles (gridpoint space) and levels/m (spectral space)
      if(.not.present(maxlevels_in)) then
        write(*,*) 'gst_setup: ERROR, number of levels must be specified with mpiMode 5'
        call flush(6)
        call exit(1)
      endif
      ! range of lons handled by this processor
      gst(gstID)%myLonBeg = myLonBeg
      gst(gstID)%myLonEnd = myLonEnd
      gst(gstID)%lonPerPE = lonPerPE 
      ! range of lats handled by this processor
      gst(gstID)%myLatBeg = myLatBeg
      gst(gstID)%myLatEnd = myLatEnd
      gst(gstID)%myLatHalfBeg = myLatHalfBeg
      gst(gstID)%myLatHalfEnd = myLatHalfEnd
      gst(gstID)%latPerPE = latPerPE 
      ! range of n handled by this processor
      gst(gstID)%mynBeg = 0
      gst(gstID)%mynEnd = gst(gstID)%ntrunc
      gst(gstID)%mynSkip = 1
      gst(gstID)%mynCount = gst(gstID)%ntrunc+1
      ! range of m handled by this processor
      call mpivar_setup_m(gst(gstID)%ntrunc,mymBeg,mymEnd,mymSkip,mymCount)
      gst(gstID)%mymBeg = mymBeg
      gst(gstID)%mymEnd = mymEnd
      gst(gstID)%mymSkip = mymSkip
      gst(gstID)%mymCount = mymCount
      call rpn_comm_allreduce(gst(gstID)%mymCount,gst(gstID)%maxmCount, &
                              1,"MPI_INTEGER","MPI_MAX","GRID",ierr)
      ! range of levels handled by this processor when in spectral space
      gst(gstID)%myLevBeg = myLevBeg
      gst(gstID)%myLevEnd = myLevEnd      
      gst(gstID)%myLevCount = myLevCount
      call rpn_comm_allreduce(gst(gstID)%myLevCount,gst(gstID)%maxLevCount, &
                              1,"MPI_INTEGER","MPI_MAX","GRID",ierr)
    else
      if(mpi_myid.eq.0) write(*,*) 'gst_setup: Selected MPI Mode unknown!'
      call flush(6)
      call exit(1)
    endif

    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allocating comleg...'
    call allocate_comleg
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: calling suleg...'
    call suleg
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: calling sualp...'
    call sualp

    ! compute the list of spectral coefficient indices when distributed by n and m
    call gst_ilaList_mpiglobal(gst(gstID)%ilaList,gst(gstID)%myNla,gst(gstID)%maxMyNla,gstID,  &
                               gst(gstID)%mymBeg,gst(gstID)%mymEnd,gst(gstID)%mymSkip,  &
                               gst(gstID)%mynBeg,gst(gstID)%mynEnd,gst(gstID)%mynSkip)
    allocate(gst(gstID)%allNla(mpi_npex))
    call rpn_comm_allgather(gst(gstID)%myNla,1,"mpi_integer",  &
                            gst(gstID)%allNla,1,"mpi_integer","EW",ierr)
    allocate(gst(gstID)%allIlaList(gst(gstID)%maxMyNla,mpi_npex))
    call rpn_comm_allgather(gst(gstID)%ilaList,gst(gstID)%maxMyNla,"mpi_integer",  &
                            gst(gstID)%allIlaList,gst(gstID)%maxMyNla,"mpi_integer","EW",ierr)

    allocate(gst(gstID)%allLonBeg(mpi_npex))
    CALL rpn_comm_allgather(gst(gstID)%myLonBeg,1,"mpi_integer",       &
                            gst(gstID)%allLonBeg,1,"mpi_integer","EW",ierr)
    allocate(gst(gstID)%allLonEnd(mpi_npex))
    CALL rpn_comm_allgather(gst(gstID)%myLonEnd,1,"mpi_integer",       &
                            gst(gstID)%allLonEnd,1,"mpi_integer","EW",ierr)

    allocate(gst(gstID)%allLatBeg(mpi_npey))
    CALL rpn_comm_allgather(gst(gstID)%myLatBeg,1,"mpi_integer",       &
                            gst(gstID)%allLatBeg,1,"mpi_integer","NS",ierr)
    allocate(gst(gstID)%allLatEnd(mpi_npey))
    CALL rpn_comm_allgather(gst(gstID)%myLatEnd,1,"mpi_integer",       &
                            gst(gstID)%allLatEnd,1,"mpi_integer","NS",ierr)

    allocate(gst(gstID)%mymIndex(gst(gstID)%mymBeg:gst(gstID)%mymEnd))
    gst(gstID)%mymIndex(:) = 0
    do jm = gst(gstID)%mymBeg,gst(gstID)%mymEnd,gst(gstID)%mymSkip
      if(jm.eq.gst(gstID)%mymBeg) then
        gst(gstID)%mymIndex(jm) = 1
      else
        gst(gstID)%mymIndex(jm) = gst(gstID)%mymIndex(jm-gst(gstID)%mymSkip) + 1
      endif
      write(*,*) 'gst_setup: mymIndex(',jm,')=',gst(gstID)%mymIndex(jm)
    enddo

    allocate(gst(gstID)%allnBeg(mpi_npex))
    CALL rpn_comm_allgather(gst(gstID)%mynBeg,1,"mpi_integer",       &
                            gst(gstID)%allnBeg,1,"mpi_integer","EW",ierr)
    allocate(gst(gstID)%allnEnd(mpi_npex))
    CALL rpn_comm_allgather(gst(gstID)%mynEnd,1,"mpi_integer",       &
                            gst(gstID)%allnEnd,1,"mpi_integer","EW",ierr)
    allocate(gst(gstID)%allnSkip(mpi_npex))
    CALL rpn_comm_allgather(gst(gstID)%mynSkip,1,"mpi_integer",       &
                            gst(gstID)%allnSkip,1,"mpi_integer","EW",ierr)

    allocate(gst(gstID)%allmBeg(mpi_npey))
    CALL rpn_comm_allgather(gst(gstID)%mymBeg,1,"mpi_integer",       &
                            gst(gstID)%allmBeg,1,"mpi_integer","NS",ierr)
    allocate(gst(gstID)%allmEnd(mpi_npey))
    CALL rpn_comm_allgather(gst(gstID)%mymEnd,1,"mpi_integer",       &
                            gst(gstID)%allmEnd,1,"mpi_integer","NS",ierr)
    allocate(gst(gstID)%allmSkip(mpi_npey))
    CALL rpn_comm_allgather(gst(gstID)%mymSkip,1,"mpi_integer",       &
                            gst(gstID)%allmSkip,1,"mpi_integer","NS",ierr)

    if(gst(gstID)%mpiMode.gt.3) then
      allocate(gst(gstID)%allLevBeg(mpi_npex))
      CALL rpn_comm_allgather(gst(gstID)%myLevBeg,1,"mpi_integer",       &
                              gst(gstID)%allLevBeg,1,"mpi_integer","EW",ierr)
      allocate(gst(gstID)%allLevEnd(mpi_npex))
      CALL rpn_comm_allgather(gst(gstID)%myLevEnd,1,"mpi_integer",       &
                              gst(gstID)%allLevEnd,1,"mpi_integer","EW",ierr)
    else
      allocate(gst(gstID)%allLevBeg(mpi_npey))
      CALL rpn_comm_allgather(gst(gstID)%myLevBeg,1,"mpi_integer",       &
                              gst(gstID)%allLevBeg,1,"mpi_integer","NS",ierr)
      allocate(gst(gstID)%allLevEnd(mpi_npey))
      CALL rpn_comm_allgather(gst(gstID)%myLevEnd,1,"mpi_integer",       &
                              gst(gstID)%allLevEnd,1,"mpi_integer","NS",ierr)
    endif

    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allLonBeg=',gst(gstID)%allLonBeg
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allLonEnd=',gst(gstID)%allLonEnd
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allLatBeg=',gst(gstID)%allLatBeg
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allLatEnd=',gst(gstID)%allLatEnd
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allnBeg=',gst(gstID)%allnBeg
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allnEnd=',gst(gstID)%allnEnd
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allnSkip=',gst(gstID)%allnSkip
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allmBeg=',gst(gstID)%allmBeg
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allmEnd=',gst(gstID)%allmEnd
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allmSkip=',gst(gstID)%allmSkip
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allLevBeg=',gst(gstID)%allLevBeg
    if(mpi_myid.eq.0) write(*,*) 'gst_setup: allLevEnd=',gst(gstID)%allLevEnd
    write(*,*) 'gst_setup: mymCount=',gst(gstID)%mymCount
    write(*,*) 'gst_setup: maxmCount=',gst(gstID)%maxmCount
    write(*,*) 'gst_setup: myNla=',gst(gstID)%myNla
    write(*,*) 'gst_setup: maxMyNla=',gst(gstID)%maxMyNla

    if(mpi_myid.eq.0) write(*,*) 'gst_setup: calling sufft...'
    gst(gstID)%fftID = fft_sufft(6,gst(gstID)%ni,gst(gstID)%nj,           &
                       gst(gstID)%myLatBeg,gst(gstID)%myLatEnd)

    allocate(gst(gstID)%r1snp1(gst(gstID)%nla))
    allocate(gst(gstID)%rnnp1(gst(gstID)%nla))
    gst(gstID)%r1snp1(1) = 0.d0
    gst(gstID)%rnnp1(1) = 0.d0
    do jn = 1, gst(gstID)%ntrunc
       znnp1  = -real(jn,8)*real(jn+1,8)
       z1snp1 =  1.d0/znnp1
       do jm = 0, jn
          ila = gst(gstID)%nind(jm) + jn - jm
          gst(gstID)%r1snp1(ila) = z1snp1
          gst(gstID)%rnnp1(ila) = znnp1
       enddo
    enddo
    gst_setup = gstID
  
  END FUNCTION GST_SETUP

!------------------------------------------------------------------
! Data transposes with respect to 1 of 2 dimensions (i.e. NS or EW)
!------------------------------------------------------------------


  SUBROUTINE transpose2d_NtoLev(psp_in,psp_out) 3
    implicit none
    real(8) :: psp_in (gst(gstID)%maxMyNla, 2, gst(gstID)%nk)
    real(8) :: psp_out(gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)

    real(8) :: sp_send(gst(gstID)%maxMyNla, 2, gst(gstID)%maxLevCount, mpi_npex)
    real(8) :: sp_recv(gst(gstID)%maxMyNla, 2, gst(gstID)%maxLevCount, mpi_npex)
    integer :: yourid,ila,icount,nsize,ierr,jlev,jlev2

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(26,'TRANSP_2D_NtoLEV')

!$OMP PARALLEL DO PRIVATE(yourid,jlev,jlev2,icount)
    do yourid = 0, (mpi_npex-1)
      do jlev = gst(gstID)%allLevBeg(yourid+1), gst(gstID)%allLevEnd(yourid+1)
        jlev2 = jlev - gst(gstID)%allLevBeg(yourid+1) + 1
        do icount = 1, gst(gstID)%myNla
          sp_send(icount,1,jlev2,yourid+1) = psp_in(icount,1,jlev)
          sp_send(icount,2,jlev2,yourid+1) = psp_in(icount,2,jlev)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%maxMyNla*2*gst(gstID)%maxLevCount
    if(mpi_npex.gt.1) then
      call rpn_comm_alltoall(sp_send,nsize,"mpi_double_precision",  &
                             sp_recv,nsize,"mpi_double_precision","EW",ierr)
    else
      sp_recv(:,:,:,1) = sp_send(:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlev,jlev2,icount,ila)
    do yourid = 0, (mpi_npex-1)
      do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
        jlev2 = jlev - gst(gstID)%myLevBeg + 1
        do icount = 1, gst(gstID)%allNla(yourid+1)
          ila = gst(gstID)%allIlaList(icount,yourid+1)
          psp_out(ila,1,jlev) = sp_recv(icount,1,jlev2,yourid+1)
          psp_out(ila,2,jlev) = sp_recv(icount,2,jlev2,yourid+1)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(26)

  END SUBROUTINE transpose2d_NtoLev



  SUBROUTINE transpose2d_LevtoN(psp_in,psp_out) 3
    implicit none
    real(8) :: psp_in (gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: psp_out(gst(gstID)%maxMyNla, 2, gst(gstID)%nk)

    real(8) :: sp_send(gst(gstID)%maxMyNla, 2, gst(gstID)%maxLevCount, mpi_npex)
    real(8) :: sp_recv(gst(gstID)%maxMyNla, 2, gst(gstID)%maxLevCount, mpi_npex)
    integer :: yourid,ila,icount,nsize,ierr,jlev,jlev2

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(26,'TRANSP_2D_NtoLEV')

!$OMP PARALLEL DO PRIVATE(yourid,jlev,jlev2,icount,ila)
    do yourid = 0, (mpi_npex-1)
      do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
        jlev2 = jlev - gst(gstID)%myLevBeg + 1
        do icount = 1, gst(gstID)%allNla(yourid+1)
          ila = gst(gstID)%allIlaList(icount,yourid+1)
          sp_send(icount,1,jlev2,yourid+1) = psp_in(ila,1,jlev)
          sp_send(icount,2,jlev2,yourid+1) = psp_in(ila,2,jlev)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%maxMyNla*2*gst(gstID)%maxLevCount
    if(mpi_npex.gt.1) then
      call rpn_comm_alltoall(sp_send,nsize,"mpi_double_precision",  &
                             sp_recv,nsize,"mpi_double_precision","EW",ierr)
    else
      sp_recv(:,:,:,1) = sp_send(:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlev,jlev2,icount)
    do yourid = 0, (mpi_npex-1)
      do jlev = gst(gstID)%allLevBeg(yourid+1), gst(gstID)%allLevEnd(yourid+1)
        jlev2 = jlev - gst(gstID)%allLevBeg(yourid+1) + 1
        do icount = 1, gst(gstID)%myNla
          psp_out(icount,1,jlev) = sp_recv(icount,1,jlev2,yourid+1)
          psp_out(icount,2,jlev) = sp_recv(icount,2,jlev2,yourid+1)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(26)

  END SUBROUTINE transpose2d_LevtoN



  SUBROUTINE transpose2d_MtoLat(pgd_in,pgd_out) 4
    implicit none
    real(8) :: pgd_in(2*gst(gstID)%maxmCount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj)
    real(8) :: pgd_out(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8) :: gd_send(gst(gstID)%maxmCount, 2, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npey)
    real(8) :: gd_recv(gst(gstID)%maxmCount, 2, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npey)
    integer :: yourid,jm,icount,nsize,ierr,jlev,jlev2,jlat,jlat2

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(27,'TRANSP_2D_MtoLAT')

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,icount,jm)
    do yourid = 0, (mpi_npey-1)
      do jlat = gst(gstID)%allLatBeg(yourid+1), gst(gstID)%allLatEnd(yourid+1)
        jlat2 = jlat - gst(gstID)%allLatBeg(yourid+1) + 1
        do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          jlev2 = jlev - gst(gstID)%myLevBeg + 1
          icount = 0
          do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip
            icount = icount + 1
            gd_send(icount,1,jlev2,jlat2,yourid+1) = pgd_in(2*gst(gstID)%mymIndex(jm)-1,jlev,jlat)
            gd_send(icount,2,jlev2,jlat2,yourid+1) = pgd_in(2*gst(gstID)%mymIndex(jm)  ,jlev,jlat)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%maxmCount*2*gst(gstID)%maxLevCount*gst(gstID)%latPerPE
    if(mpi_npey.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","NS",ierr)
    else
      gd_recv(:,:,:,:,1) = gd_send(:,:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,icount,jm)
    do yourid = 0, (mpi_npey-1)
      do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
        jlat2 = jlat - gst(gstID)%myLatBeg + 1
        do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          jlev2 = jlev - gst(gstID)%myLevBeg + 1
          icount = 0
          do jm = gst(gstID)%allmBeg(yourid+1), gst(gstID)%allmEnd(yourid+1), gst(gstID)%allmSkip(yourid+1)
            icount = icount + 1
            pgd_out(2*jm+1,jlev,jlat) = gd_recv(icount,1,jlev2,jlat2,yourid+1)
            pgd_out(2*jm+2,jlev,jlat) = gd_recv(icount,2,jlev2,jlat2,yourid+1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(27)

  END SUBROUTINE transpose2d_MtoLat



  SUBROUTINE transpose2d_LattoM(pgd_in,pgd_out) 4
    implicit none
    real(8) :: pgd_in(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)
    real(8) :: pgd_out(2*gst(gstID)%maxmCount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj)

    real(8) :: gd_send(gst(gstID)%maxmCount, 2, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npey)
    real(8) :: gd_recv(gst(gstID)%maxmCount, 2, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npey)
    integer :: yourid,jm,icount,nsize,ierr,jlev,jlev2,jlat,jlat2

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(27,'TRANSP_2D_MtoLAT')

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,icount,jm)
    do yourid = 0, (mpi_npey-1)
      do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
        jlat2 = jlat - gst(gstID)%myLatBeg + 1
        do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          jlev2 = jlev - gst(gstID)%myLevBeg + 1
          icount = 0
          do jm = gst(gstID)%allmBeg(yourid+1), gst(gstID)%allmEnd(yourid+1), gst(gstID)%allmSkip(yourid+1)
            icount = icount + 1
            gd_send(icount,1,jlev2,jlat2,yourid+1) = pgd_in(2*jm+1,jlev,jlat)
            gd_send(icount,2,jlev2,jlat2,yourid+1) = pgd_in(2*jm+2,jlev,jlat)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%maxmCount*2*gst(gstID)%maxLevCount*gst(gstID)%latPerPE
    if(mpi_npey.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","NS",ierr)
    else
      gd_recv(:,:,:,:,1) = gd_send(:,:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,icount,jm)
    do yourid = 0, (mpi_npey-1)
      do jlat = gst(gstID)%allLatBeg(yourid+1), gst(gstID)%allLatEnd(yourid+1)
        jlat2 = jlat - gst(gstID)%allLatBeg(yourid+1) + 1
        do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          jlev2 = jlev - gst(gstID)%myLevBeg + 1
          icount = 0
          do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip
            icount = icount + 1
            pgd_out(2*gst(gstID)%mymIndex(jm)-1,jlev,jlat) = gd_recv(icount,1,jlev2,jlat2,yourid+1)
            pgd_out(2*gst(gstID)%mymIndex(jm)  ,jlev,jlat) = gd_recv(icount,2,jlev2,jlat2,yourid+1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(27)

  END SUBROUTINE transpose2d_LattoM



  SUBROUTINE transpose2d_LevtoLon(pgd_in,pgd_out) 4
    implicit none
    real(8) :: pgd_in(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)
    real(8) :: pgd_out(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8) :: gd_send(gst(gstID)%lonPerPE, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npex)
    real(8) :: gd_recv(gst(gstID)%lonPerPE, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npex)
    integer :: yourid,nsize,ierr,jlev,jlev2,jlat,jlat2,jlon,jlon2

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(28,'TRANSP_2D_LEVtoLON')

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,jlon,jlon2)
    do yourid = 0, (mpi_npex-1)
      do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
        jlat2 = jlat - gst(gstID)%myLatBeg + 1
        do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          jlev2 = jlev - gst(gstID)%myLevBeg + 1
          do jlon = gst(gstID)%allLonBeg(yourid+1), gst(gstID)%allLonEnd(yourid+1)
            jlon2 = jlon - gst(gstID)%allLonBeg(yourid+1) + 1
            gd_send(jlon2,jlev2,jlat2,yourid+1) = pgd_in(jlon,jlev,jlat)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%lonPerPE*gst(gstID)%maxLevCount*gst(gstID)%latPerPE
    if(mpi_npex.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","EW",ierr)
    else
      gd_recv(:,:,:,1) = gd_send(:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,jlon,jlon2)
    do yourid = 0, (mpi_npex-1)
      do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
        jlat2 = jlat - gst(gstID)%myLatBeg + 1
        do jlev=gst(gstID)%allLevBeg(yourid+1),gst(gstID)%allLevEnd(yourid+1)
          jlev2=jlev-gst(gstID)%allLevBeg(yourid+1)+1
          do jlon = gst(gstID)%myLonBeg, gst(gstID)%myLonEnd
            jlon2 = jlon - gst(gstID)%myLonBeg + 1
            pgd_out(jlon,jlev,jlat) = gd_recv(jlon2,jlev2,jlat2,yourid+1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(28)

  END SUBROUTINE transpose2d_LevtoLon



  SUBROUTINE transpose2d_LontoLev(pgd_in,pgd_out) 4
    implicit none
    real(8) :: pgd_in(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)
    real(8) :: pgd_out(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8) :: gd_send(gst(gstID)%lonPerPE, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npex)
    real(8) :: gd_recv(gst(gstID)%lonPerPE, gst(gstID)%maxLevCount, gst(gstID)%latPerPE, mpi_npex)
    integer :: yourid,nsize,ierr,jlev,jlev2,jlat,jlat2,jlon,jlon2

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(28,'TRANSP_2D_LEVtoLON')

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,jlon,jlon2)
    do yourid = 0, (mpi_npex-1)
      do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
        jlat2 = jlat - gst(gstID)%myLatBeg + 1
        do jlev=gst(gstID)%allLevBeg(yourid+1),gst(gstID)%allLevEnd(yourid+1)
          jlev2=jlev-gst(gstID)%allLevBeg(yourid+1)+1
          do jlon = gst(gstID)%myLonBeg, gst(gstID)%myLonEnd
            jlon2 = jlon - gst(gstID)%myLonBeg + 1
            gd_send(jlon2,jlev2,jlat2,yourid+1) = pgd_in(jlon,jlev,jlat)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%lonPerPE*gst(gstID)%maxLevCount*gst(gstID)%latPerPE
    if(mpi_npex.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","EW",ierr)
    else
      gd_recv(:,:,:,1) = gd_send(:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlat2,jlev,jlev2,jlon,jlon2)
    do yourid = 0, (mpi_npex-1)
      do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
        jlat2 = jlat - gst(gstID)%myLatBeg + 1
        do jlev = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          jlev2 = jlev - gst(gstID)%myLevBeg + 1
          do jlon = gst(gstID)%allLonBeg(yourid+1), gst(gstID)%allLonEnd(yourid+1)
            jlon2 = jlon - gst(gstID)%allLonBeg(yourid+1) + 1
            pgd_out(jlon,jlev,jlat) = gd_recv(jlon2,jlev2,jlat2,yourid+1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(28)

  END SUBROUTINE transpose2d_LontoLev

!-----------------------------------------------------------------
! Global transposes
!-----------------------------------------------------------------


  SUBROUTINE transpose_LattoM(pgd2,pgd,kfield,kdim) 2
    implicit none
    integer :: kfield,kdim,yourid,jm,icount,nsize,ierr,jlev,jlat
    real(8) :: pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj)
    real(8) :: pgd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)
    real(8) :: gd_recv(gst(gstID)%maxmCount,2,kfield,gst(gstID)%nj)
    real(8) :: gd_send(gst(gstID)%maxmCount,2,kfield,gst(gstID)%latPerPE,mpi_nprocs)

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(58,'TRANSP_LattoM')

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlev,icount,jm)
    do yourid = 0,(mpi_nprocs-1)
      do jlat = 1,gst(gstID)%latPerPE
        do jlev = 1,kfield
          icount = 0
          do jm = gst(gstID)%allmBeg(yourid+1),gst(gstID)%allmEnd(yourid+1),gst(gstID)%allmSkip(yourid+1)
            icount = icount+1
            gd_send(icount,1,jlev,jlat,yourid+1) = pgd(2*jm+1,jlev,gst(gstID)%myLatBeg+jlat-1)
            gd_send(icount,2,jlev,jlat,yourid+1) = pgd(2*jm+2,jlev,gst(gstID)%myLatBeg+jlat-1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%maxmCount*2*kfield*gst(gstID)%latPerPE
    if(mpi_nprocs.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","GRID",ierr)
    else
      gd_recv(:,:,:,:) = gd_send(:,:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(jlat,jlev,icount,jm)
    do jlat = 1,gst(gstID)%nj
      do jlev = 1,kfield
        icount = 0
        do jm = gst(gstID)%mymBeg,gst(gstID)%mymEnd,gst(gstID)%mymSkip
          icount = icount+1
          pgd2(2*gst(gstID)%mymIndex(jm)-1,jlev,jlat) = gd_recv(icount,1,jlev,jlat)
          pgd2(2*gst(gstID)%mymIndex(jm)  ,jlev,jlat) = gd_recv(icount,2,jlev,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(58)

  END SUBROUTINE transpose_LattoM



  SUBROUTINE transpose_MtoLat(pgd2,pgd,kfield,kdim) 2
    implicit none
    integer :: kfield,kdim,yourid,jm,icount,nsize,ierr,jlev,jlat
    real(8) :: pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj)
    real(8) :: pgd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)
    real(8) :: gd_send(gst(gstID)%maxmCount,2,kfield,gst(gstID)%nj)
    real(8) :: gd_recv(gst(gstID)%maxmCount,2,kfield,gst(gstID)%latPerPE,mpi_nprocs)

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(58,'TRANSP_LattoM')

!$OMP PARALLEL DO PRIVATE(jlat,jlev,icount,jm)
    do jlat = 1,gst(gstID)%nj
      do jlev = 1,kfield
        icount = 0
        do jm = gst(gstID)%mymBeg,gst(gstID)%mymEnd,gst(gstID)%mymSkip
          icount = icount+1
          gd_send(icount,1,jlev,jlat) = pgd2(2*gst(gstID)%mymIndex(jm)-1,jlev,jlat)
          gd_send(icount,2,jlev,jlat) = pgd2(2*gst(gstID)%mymIndex(jm)  ,jlev,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = gst(gstID)%maxmCount*2*kfield*gst(gstID)%latPerPE
    if(mpi_nprocs.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","GRID",ierr)
    else
      gd_recv(:,:,:,:,1) = gd_send(:,:,:,:)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlev,icount,jm)
    do yourid = 0,(mpi_nprocs-1)
      do jlat = 1,gst(gstID)%latPerPE
        do jlev = 1,kfield
          icount = 0
          do jm = gst(gstID)%allmBeg(yourid+1),gst(gstID)%allmEnd(yourid+1),gst(gstID)%allmSkip(yourid+1)
            icount = icount+1
            pgd(2*jm+1,jlev,gst(gstID)%myLatBeg+jlat-1) = gd_recv(icount,1,jlev,jlat,yourid+1)
            pgd(2*jm+2,jlev,gst(gstID)%myLatBeg+jlat-1) = gd_recv(icount,2,jlev,jlat,yourid+1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(58)

  END SUBROUTINE transpose_MtoLat



  SUBROUTINE transpose_LattoLev(pgd2,pgd,kfield,kdim) 1
    implicit none
    integer :: kfield,kdim,yourid,jm,icount,nsize,ierr,jlev,jlev2,jlat
    real(8) :: pgd2(2*(gst(gstID)%ntrunc+1),gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)
    real(8) :: pgd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)
    real(8) :: gd_recv(gst(gstID)%ntrunc+1,2,gst(gstID)%maxLevCount,gst(gstID)%nj)
    real(8) :: gd_send(gst(gstID)%ntrunc+1,2,gst(gstID)%maxLevCount,gst(gstID)%latPerPE,mpi_nprocs)

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(66,'TRANSP_LATtoLEV')

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlev,jlev2,icount,jm)
    do yourid = 0,(mpi_nprocs-1)
      do jlat = 1,gst(gstID)%latPerPE
        do jlev = gst(gstID)%allLevBeg(yourid+1),gst(gstID)%allLevEnd(yourid+1)
          jlev2 = jlev-gst(gstID)%allLevBeg(yourid+1)+1
          icount = 0
          do jm = 0,gst(gstID)%ntrunc
            icount = icount+1
            gd_send(icount,1,jlev2,jlat,yourid+1) = pgd(2*jm+1,jlev,gst(gstID)%myLatBeg+jlat-1)
            gd_send(icount,2,jlev2,jlat,yourid+1) = pgd(2*jm+2,jlev,gst(gstID)%myLatBeg+jlat-1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = (gst(gstID)%ntrunc+1)*2*gst(gstID)%maxLevCount*gst(gstID)%latPerPE
    if(mpi_nprocs.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","GRID",ierr)
    else
      gd_recv(:,:,:,:) = gd_send(:,:,:,:,1)
    endif

!$OMP PARALLEL DO PRIVATE(jlat,jlev,jlev2,icount,jm)
    do jlat = 1,gst(gstID)%nj
      do jlev = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
        jlev2 = jlev-gst(gstID)%myLevBeg+1
        icount = 0
        do jm = 0,gst(gstID)%ntrunc
          icount = icount+1
          pgd2(2*jm+1,jlev,jlat) = gd_recv(icount,1,jlev2,jlat)
          pgd2(2*jm+2,jlev,jlat) = gd_recv(icount,2,jlev2,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(66)

  END SUBROUTINE transpose_LattoLev



  SUBROUTINE transpose_LevtoLat(pgd2,pgd,kfield,kdim) 1
    implicit none
    real(8) :: pgd2(2*(gst(gstID)%ntrunc+1),gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)
    real(8) :: pgd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8) :: gd_send(gst(gstID)%ntrunc+1,2,gst(gstID)%maxLevCount,gst(gstID)%nj)
    real(8) :: gd_recv(gst(gstID)%ntrunc+1,2,gst(gstID)%maxLevCount,gst(gstID)%latPerPE,mpi_nprocs)
    integer :: kfield,kdim,yourid,jm,icount,nsize,ierr,jlev,jlev2,jlat

    call rpn_comm_barrier("GRID",ierr)

    call tmg_start(66,'TRANSP_LATtoLEV')

!$OMP PARALLEL DO PRIVATE(jlat,jlev,jlev2,icount,jm)
    do jlat = 1,gst(gstID)%nj
      do jlev = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
        jlev2 = jlev-gst(gstID)%myLevBeg+1
        icount = 0
        do jm = 0,gst(gstID)%ntrunc
          icount = icount+1
          gd_send(icount,1,jlev2,jlat) = pgd2(2*jm+1,jlev,jlat)
          gd_send(icount,2,jlev2,jlat) = pgd2(2*jm+2,jlev,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    nsize = (gst(gstID)%ntrunc+1)*2*gst(gstID)%maxLevCount*gst(gstID)%latPerPE
    if(mpi_nprocs.gt.1) then
      call rpn_comm_alltoall(gd_send,nsize,"mpi_double_precision",  &
                             gd_recv,nsize,"mpi_double_precision","GRID",ierr)
    else
      gd_recv(:,:,:,:,1) = gd_send(:,:,:,:)
    endif

!$OMP PARALLEL DO PRIVATE(yourid,jlat,jlev,jlev2,icount,jm)
    do yourid = 0,(mpi_nprocs-1)
      do jlat = 1,gst(gstID)%latPerPE
        do jlev = gst(gstID)%allLevBeg(yourid+1),gst(gstID)%allLevEnd(yourid+1)
          jlev2 = jlev-gst(gstID)%allLevBeg(yourid+1)+1
          icount = 0
          do jm = 0,gst(gstID)%ntrunc
            icount = icount+1
            pgd(2*jm+1,jlev,gst(gstID)%myLatBeg+jlat-1) = gd_recv(icount,1,jlev2,jlat,yourid+1)
            pgd(2*jm+2,jlev,gst(gstID)%myLatBeg+jlat-1) = gd_recv(icount,2,jlev2,jlat,yourid+1)
          enddo
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_stop(66)

  END SUBROUTINE transpose_LevtoLat

!--------------------------------------------------------------------------------------
! Subroutines to re-order the u and v wind components for mpi version of spgd and spgda
!--------------------------------------------------------------------------------------


  SUBROUTINE interleaveWinds_sp(psp,nflev) 1
    implicit none
    integer :: nflev
    real(8) :: psp(gst(gstID)%maxMyNla,2,gst(gstID)%nk)

    real(8) :: tempvalues(2,nflev*2)
    integer :: jk, ila

!$OMP PARALLEL 
!$OMP DO PRIVATE (ILA,JK,TEMPVALUES)
    do ila = 1, gst(gstID)%maxMyNla
       do jk = 1, nflev
          ! place u in new position in temporary array
          tempvalues(:,(jk*2)-1) = psp(ila,:,jk)
          ! place v in new position in temporary array
          tempvalues(:,jk*2)     = psp(ila,:,jk+nflev)
       enddo
       ! move contents of temporary array back to original array
       psp(ila,:,1:2*nflev) = tempvalues(:,1:2*nflev)
    enddo
!$OMP END DO
!$OMP END PARALLEL

  END SUBROUTINE interleaveWinds_sp



  SUBROUTINE unInterleaveWinds_sp(psp,nflev) 1
    implicit none
    integer :: nflev
    real(8) :: psp(gst(gstID)%maxMyNla,2,gst(gstID)%nk)

    real(8) :: tempvalues(2,nflev*2)
    integer :: jk, ila

!$OMP PARALLEL 
!$OMP DO PRIVATE (ILA,JK,TEMPVALUES)
    do ila = 1, gst(gstID)%maxMyNla
       do jk = 1, nflev
          ! place u in original position in temporary array
          tempvalues(:,jk)       = psp(ila,:,(jk*2)-1)
          ! place v in original position in temporary array
          tempvalues(:,jk+nflev) = psp(ila,:,jk*2)
       enddo
       ! move contents of temporary array back to original array
       psp(ila,:,1:2*nflev) = tempvalues(:,1:2*nflev)
    enddo
!$OMP END DO
!$OMP END PARALLEL

  END SUBROUTINE unInterleaveWinds_sp



  SUBROUTINE interleaveWinds_gd(pgd,nflev) 1
    implicit none
    integer :: nflev
    real(8) :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8) :: tempvalues(nflev*2)
    integer :: jlat, jk, jlon

!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK,TEMPVALUES)
    do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
       do jlon = gst(gstID)%myLonBeg, gst(gstID)%myLonEnd
          do jk = 1, nflev
             ! place u in original position in temporary array
             tempvalues((jk*2)-1) = pgd(jlon,jk,jlat)
             ! place v in original position in temporary array
             tempvalues(jk*2)     = pgd(jlon,jk+nflev,jlat)
          enddo
          ! move contents of temporary array back to original array
          pgd(jlon,1:2*nflev,jlat) = tempvalues(1:2*nflev)
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

  END SUBROUTINE interleaveWinds_gd



  SUBROUTINE unInterleaveWinds_gd(pgd,nflev) 1
    implicit none
    integer :: nflev
    real(8) :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8) :: tempvalues(nflev*2)
    integer :: jlat, jk, jlon

!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK,TEMPVALUES)
    do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
       do jlon = gst(gstID)%myLonBeg, gst(gstID)%myLonEnd
          do jk = 1, nflev
             ! place u in original position in temporary array
             tempvalues(jk)       = pgd(jlon,(jk*2)-1,jlat)
             ! place v in original position in temporary array
             tempvalues(jk+nflev) = pgd(jlon,jk*2,jlat)
          enddo
          ! move contents of temporary array back to original array
          pgd(jlon,1:2*nflev,jlat) = tempvalues(1:2*nflev)
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

  END SUBROUTINE unInterleaveWinds_gd

!--------------------------------------------------------------------------------------
! Main spectral transform subroutines
!--------------------------------------------------------------------------------------


  SUBROUTINE GST_SPGD(SP,GD,KFIELD,NFLEV,KDIM),3
    implicit none

    integer :: kfield, nflev, kdim
    real(8) :: sp(gst(gstID)%nla,2,kdim)
    real(8) :: gd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: pgd2(:,:,:)
    integer :: jlat, jk, jlon

    ! 1. Inverse Legendre transform
    if(gst(gstID)%mpiMode.eq.2) then
      allocate(pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj))
      call tmg_start(72+gst(gstID)%mpiMode,'LT')
      call spgdpar_mpi2(sp,pgd2,kfield,nflev,kdim)
      call tmg_stop(72+gst(gstID)%mpiMode)
      call transpose_MtoLat(pgd2,gd,kfield,kdim)
      deallocate(pgd2)
    else
      write(*,*) 'GST_SPGD: ONLY MPIMODE 2 SUPPORTED! ',gst(gstID)%mpiMode
      call flush(6)
      call exit(1)
    endif

!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK)
    ! 2.1 Reset to zero the modes that are not part of the truncation
    do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
       do jlon = 2*(gst(gstID)%ntrunc+1)+1, gst(gstID)%ni
          do  jk = 1, kfield
             gd(jlon,jk,jlat) = 0.d0
          enddo
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

    ! 2.2 Apply the FFT 
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(gd,kfield,kdim,+1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

  END SUBROUTINE GST_SPGD



  SUBROUTINE GST_SPGD4(PSP,PGD,NFLEV) 1,9
    implicit none

    integer :: nflev
    real(8) :: psp(gst(gstID)%maxMyNla,2,gst(gstID)%nk)
    real(8) :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: psp2(:,:,:),pgd2(:,:,:),pgd3(:,:,:)
    integer :: jlat, jk, jlon

    if(gst(gstID)%mpiMode.ne.4) call abort3d('GST_SPGD4: only mpiMode 4 is allowed!')

    ! check if this mpi task will deal with winds during Legendre transform
    if(gst(gstID)%myLevBeg.le.2*nflev) then
      ! ensure that the number of levels on this mpi task is even to allow interleaving of u and v
      if(mod(gst(gstID)%myLevCount,2).ne.0) then
        write(*,*) 'GST_SPGD5: myLevCount = ',gst(gstID)%myLevCount
        call abort3d('GST_SPGD5: Number of levels on this mpi task must be even!')
      endif
    endif

    allocate(psp2(gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd))
    allocate(pgd2(2*gst(gstID)%maxmcount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj))
    allocate(pgd3(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd))

    ! 1.0 First reorder wind components to have u and v for same level on same mpi task
    call tmg_start(29,'GST_INTERLEAVE')
    call interleaveWinds_sp(psp,nflev)
    call tmg_stop(29)

    ! 1.1 Transpose data along npex from N to Levels
    call transpose2d_NtoLev(psp,psp2)

    ! 1.2 Inverse Legendre transform
    call tmg_start(72+gst(gstID)%mpiMode,'LT')
    call spgdpar_mpi2d(psp2,pgd2,nflev)
    call tmg_stop(72+gst(gstID)%mpiMode)
    deallocate(psp2)

    ! 1.3 Transpose data along npey from M to Latitudes
    call transpose2d_MtoLat(pgd2,pgd3)
    deallocate(pgd2)

!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK)
    ! 2.1 Reset to zero the modes that are not part of the truncation
    do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
       do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          do jlon = 2*(gst(gstID)%ntrunc+1)+1, gst(gstID)%ni
             pgd3(jlon,jk,jlat) = 0.d0
          enddo
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

    ! 2.2 Apply the FFT 
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd3,gst(gstID)%myLevCount,gst(gstID)%myLevCount,+1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! 2.3 Transpose data along npex from Levels to Longitudes
    call transpose2d_LevtoLon(pgd3,pgd)
    deallocate(pgd3)

    ! 2.4 Now undo reordering of wind components 
    call tmg_start(29,'GST_INTERLEAVE')
    call unInterleaveWinds_gd(pgd,nflev)
    call tmg_stop(29)

  END SUBROUTINE GST_SPGD4


  ! NO MPI MODIFICATIONS YET

  SUBROUTINE GST_GDSP(SP,GD,KFIELD,NFLEV,KDIM) 1,2
    implicit none

    integer :: kfield, nflev, kdim
    real(8) :: sp(gst(gstID)%nla,2,kdim)
    real(8) :: gd(gst(gstID)%ni,kdim,gst(gstID)%nj)

    integer :: jlat, jk, jlon
    real(8) :: zfm(gst(gstID)%ni,kfield,gst(gstID)%nj)

!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK)
    do jlat = 1, gst(gstID)%nj
       do jk = 1, kfield
          do jlon = 1, gst(gstID)%ni
             zfm(jlon,jk,jlat) = gd(jlon,jk,jlat)
          enddo
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

    ! 1. Fourier transform all fields for all latitudes
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar(zfm,kfield,kdim,-1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! 2. Direct Legendre transform including wind transformations
    call gdsppar(sp,zfm,kfield,nflev,kdim)

  END SUBROUTINE GST_GDSP



  SUBROUTINE SPGDPAR_MPI2(SP,PGD2,KFIELD,NFLEV,KDIM) 1,3
!**s/r SPGDPAR  - Inverse spectral transform(PARALLEL LOOP)

    implicit none

    integer :: kfield, nflev, kdim
    real(8) :: sp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj)

    ! local variables
    integer :: jj, jj2, jm, jn
    integer :: ilonr, iloni, jk, jk2, ila, inm

    real(8) :: zjm
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma(gst(gstID)%njlath+1,2,kfield)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)

    integer :: thdid, numthd, omp_get_thread_num, omp_get_num_threads

    ! Inverse Legendre transform

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JJ,JJ2,ZJM,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

          ! 2.1 Copy global spectral state into local spectral state
          do jn = jm, gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn - jm
             inm = jn - jm
             ! Scalar fields
             do jk = 2*nflev+1, kfield
                dlsp(inm,1,jk) = sp(ila,1,jk)
                dlsp(inm,2,jk) = sp(ila,2,jk)
             enddo
             ! Vector fields
             do jk = 1, 2*nflev
                dlsp(inm,1,jk) = sp(ila,1,jk)*gst(gstID)%r1snp1(ila)
                dlsp(inm,2,jk) = sp(ila,2,jk)*gst(gstID)%r1snp1(ila)
             enddo
          enddo

          ! 2.2  Get Legendre polynomial (and its derivative) for all latitudes
          !      but for the chosen value of "m" from the global array
          call getalp (dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

          ! 2.3  Perform the inverse Legendre transform for all fields
          call leginv4(jm,zfms,zfma,dlsp,dlalp,kfield,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          ! 2.4 Passage to Fourier space
          ilonr = 2*gst(gstID)%mymIndex(jm)-1
          iloni = 2*gst(gstID)%mymIndex(jm)
          zjm = real(jm,8)
          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj - jj + 1
             if(jj.le.gst(gstID)%njlath) then
                ! Scalar fields
                do jk = 2*nflev+1, kfield
                   pgd2(ilonr,jk,jj)  = zfms(jj,1,jk) + zfma(jj,1,jk)
                   pgd2(iloni,jk,jj) = zfms(jj,2,jk) + zfma(jj,2,jk)
                enddo
                ! Vector fields
                do jk = 1, nflev
                   jk2= jk + nflev
                   pgd2(ilonr,jk,jj)   = -zjm*(zfms(jj,2,jk2) + zfma(jj,2,jk2))
                   pgd2(iloni,jk,jj)   =  zjm*(zfms(jj,1,jk2) + zfma(jj,1,jk2))
                   pgd2(ilonr,jk2,jj)  = -zjm*(zfms(jj,2,jk ) + zfma(jj,2,jk ))
                   pgd2(iloni,jk2,jj)  =  zjm*(zfms(jj,1,jk ) + zfma(jj,1,jk ))
                enddo
             else
                ! Scalar fields
                do jk = 2*nflev+1, kfield
                   pgd2(ilonr,jk,jj) = zfms(jj2,1,jk) - zfma(jj2,1,jk)
                   pgd2(iloni,jk,jj) = zfms(jj2,2,jk) - zfma(jj2,2,jk)
                enddo
                ! Vector fields
                do jk = 1, nflev
                   jk2 = jk + nflev
                   pgd2(ilonr,jk ,jj) = -zjm*(zfms(jj2,2,jk2) - zfma(jj2,2,jk2))
                   pgd2(iloni,jk ,jj) =  zjm*(zfms(jj2,1,jk2) - zfma(jj2,1,jk2))
                   pgd2(ilonr,jk2,jj) = -zjm*(zfms(jj2,2,jk ) - zfma(jj2,2,jk ))
                   pgd2(iloni,jk2,jj) =  zjm*(zfms(jj2,1,jk ) - zfma(jj2,1,jk ))
                enddo
             endif
          enddo

          ! 2.5 Completion of the computation of the winds in Fourier space
          call leginv4(jm,zfma,zfms,dlsp,dldalp,2*nflev,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj - jj + 1
             if(jj.ne.jj2) then
                ! For latitudes not exactly at equator
                if(jj.le.gst(gstID)%njlath) then
                   do jk = 1,nflev
                      jk2 = jk + nflev
                      ! northern latitudes
                      pgd2(ilonr,jk ,jj) = pgd2(ilonr,jk ,jj) -(zfms(jj,1,jk ) + zfma(jj,1,jk ))
                      pgd2(iloni,jk ,jj) = pgd2(iloni,jk ,jj) -(zfms(jj,2,jk ) + zfma(jj,2,jk ))
                      pgd2(ilonr,jk2,jj) = pgd2(ilonr,jk2,jj) +(zfms(jj,1,jk2) + zfma(jj,1,jk2))
                      pgd2(iloni,jk2,jj) = pgd2(iloni,jk2,jj) +(zfms(jj,2,jk2) + zfma(jj,2,jk2))
                   enddo
                else
                   do jk = 1,nflev
                      jk2 = jk + nflev
                      ! southern latitudes
                      pgd2(ilonr,jk ,jj) = pgd2(ilonr,jk ,jj) -(zfms(jj2,1,jk ) - zfma(jj2,1,jk ))
                      pgd2(iloni,jk ,jj) = pgd2(iloni,jk ,jj) -(zfms(jj2,2,jk ) - zfma(jj2,2,jk ))
                      pgd2(ilonr,jk2,jj) = pgd2(ilonr,jk2,jj) +(zfms(jj2,1,jk2) - zfma(jj2,1,jk2))
                      pgd2(iloni,jk2,jj) = pgd2(iloni,jk2,jj) +(zfms(jj2,2,jk2) - zfma(jj2,2,jk2))
                   enddo
                endif
             else
                ! Special case for the equator (jj.eq.jj2)
                write(*,*) 'SPGDPAR: special case of jj.eq.jj2!!!'
                do jk = 1, nflev
                   jk2 = jk + nflev
                   pgd2(ilonr,jk ,jj) = pgd2(ilonr,jk ,jj) -(zfms(jj,1,jk ) + zfma(jj,1,jk ))
                   pgd2(iloni,jk ,jj) = pgd2(iloni,jk ,jj) -(zfms(jj,2,jk ) + zfma(jj,2,jk ))
                   pgd2(ilonr,jk2,jj) = pgd2(ilonr,jk2,jj) +(zfms(jj,1,jk2) + zfma(jj,1,jk2))
                   pgd2(iloni,jk2,jj) = pgd2(iloni,jk2,jj) +(zfms(jj,2,jk2) + zfma(jj,2,jk2))
                enddo
             endif
          enddo

!   end loop on m
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE SPGDPAR_MPI2



  SUBROUTINE SPGDPAR_MPI2D(PSP,PGD2,NFLEV) 1,3
!**s/r SPGDPAR  - Inverse spectral transform(PARALLEL LOOP)
    implicit none

    integer :: nflev
    real(8) :: psp(gst(gstID)%nla,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: pgd2(2*gst(gstID)%maxmcount,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)

    ! local variables
    integer :: jj, jj2, jm, jn, ilonr, iloni, jk, jk2, ila, inm

    real(8) :: zjm
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: zfms(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: zfma(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)

    ! Inverse Legendre transform

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JJ,JJ2,ZJM,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

       ! 2.1 Copy global spectral state into local spectral state
       do jn = jm, gst(gstID)%ntrunc
          ila = gst(gstID)%nind(jm) + jn - jm
          inm = jn - jm
          do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
             if(jk .gt. 2*nflev) then
                ! Scalar fields
                dlsp(inm,1,jk) = psp(ila,1,jk)
                dlsp(inm,2,jk) = psp(ila,2,jk)
             else
                ! Vector fields                
                dlsp(inm,1,jk) = psp(ila,1,jk)*gst(gstID)%r1snp1(ila)
                dlsp(inm,2,jk) = psp(ila,2,jk)*gst(gstID)%r1snp1(ila)
             endif
          enddo
       enddo

       ! 2.2  Get Legendre polynomial (and its derivative) for all latitudes
       !      but for the chosen value of "m" from the global array
       call getalp (dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

       ! 2.3  Perform the inverse Legendre transform for all fields
       call leginv2d(jm,zfms,zfma,dlsp,dlalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

       ! 2.4 Passage to Fourier space
       ilonr = 2*gst(gstID)%mymIndex(jm)-1
       iloni = 2*gst(gstID)%mymIndex(jm)
       zjm = real(jm,8)

       do jj = 1, gst(gstID)%nj
          jj2 = gst(gstID)%nj - jj + 1
          if(jj.le.gst(gstID)%njlath) then
             ! Scalar fields
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                if(jk .gt. 2*nflev) then
                   pgd2(ilonr,jk,jj) = zfms(jj,1,jk) + zfma(jj,1,jk)
                   pgd2(iloni,jk,jj) = zfms(jj,2,jk) + zfma(jj,2,jk)
                endif
             enddo
             ! Vector fields: Note that u and v are interleaved in mode 5!
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                if(jk .le. 2*nflev) then
                   jk2 = jk + 1  ! jk is u, jk2 is v
                   pgd2(ilonr,jk,jj)  = -zjm*(zfms(jj,2,jk2) + zfma(jj,2,jk2))
                   pgd2(iloni,jk,jj)  =  zjm*(zfms(jj,1,jk2) + zfma(jj,1,jk2))
                   pgd2(ilonr,jk2,jj) = -zjm*(zfms(jj,2,jk)  + zfma(jj,2,jk))
                   pgd2(iloni,jk2,jj) =  zjm*(zfms(jj,1,jk)  + zfma(jj,1,jk))
                endif
             enddo
          else
             ! Scalar fields
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                if(jk .gt. 2*nflev) then
                   pgd2(ilonr,jk,jj) = zfms(jj2,1,jk) - zfma(jj2,1,jk)
                   pgd2(iloni,jk,jj) = zfms(jj2,2,jk) - zfma(jj2,2,jk)
                endif
             enddo
             ! Vector fields: Note that u and v are interleaved in mode 5!
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                if(jk .le. 2*nflev) then
                   jk2 = jk + 1  ! jk is u, jk2 is v
                   pgd2(ilonr,jk,jj)  = -zjm*(zfms(jj2,2,jk2) - zfma(jj2,2,jk2))
                   pgd2(iloni,jk,jj)  =  zjm*(zfms(jj2,1,jk2) - zfma(jj2,1,jk2))
                   pgd2(ilonr,jk2,jj) = -zjm*(zfms(jj2,2,jk)  - zfma(jj2,2,jk))
                   pgd2(iloni,jk2,jj) =  zjm*(zfms(jj2,1,jk)  - zfma(jj2,1,jk))
                endif
             enddo
          endif
       enddo

       ! 2.5 Completion of the computation of the winds in Fourier space
       call leginv2d(jm,zfma,zfms,dlsp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

       do jj = 1, gst(gstID)%nj
          jj2 = gst(gstID)%nj - jj + 1
          if(jj.ne.jj2) then
             ! For latitudes not exactly at equator
             if(jj.le.gst(gstID)%njlath) then
                ! northern latitudes
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                   if(jk .le. 2*nflev) then
                      ! u component
                      pgd2(ilonr,jk,jj)  = pgd2(ilonr,jk,jj)  - (zfms(jj,1,jk)  + zfma(jj,1,jk))
                      pgd2(iloni,jk,jj)  = pgd2(iloni,jk,jj)  - (zfms(jj,2,jk)  + zfma(jj,2,jk))
                      ! v component
                      jk2 = jk + 1  ! jk is u, jk2 is v
                      pgd2(ilonr,jk2,jj) = pgd2(ilonr,jk2,jj) + (zfms(jj,1,jk2) + zfma(jj,1,jk2))
                      pgd2(iloni,jk2,jj) = pgd2(iloni,jk2,jj) + (zfms(jj,2,jk2) + zfma(jj,2,jk2))
                   endif
                enddo
             else
                ! southern latitudes
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                   if(jk .le. 2*nflev) then
                      ! u component
                      pgd2(ilonr,jk ,jj) = pgd2(ilonr,jk ,jj) -(zfms(jj2,1,jk ) - zfma(jj2,1,jk ))
                      pgd2(iloni,jk ,jj) = pgd2(iloni,jk ,jj) -(zfms(jj2,2,jk ) - zfma(jj2,2,jk ))
                      ! v component
                      jk2 = jk + 1
                      pgd2(ilonr,jk2,jj) = pgd2(ilonr,jk2,jj) +(zfms(jj2,1,jk2) - zfma(jj2,1,jk2))
                      pgd2(iloni,jk2,jj) = pgd2(iloni,jk2,jj) +(zfms(jj2,2,jk2) - zfma(jj2,2,jk2))
                   endif
                enddo
             endif
          else
             ! Special case for the equator (jj.eq.jj2)
             write(*,*) 'SPGDPAR: special case of jj.eq.jj2!!!'
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                if(jk .le. 2*nflev) then
                   ! u component
                   pgd2(ilonr,jk ,jj) = pgd2(ilonr,jk ,jj) -(zfms(jj,1,jk ) + zfma(jj,1,jk ))
                   pgd2(iloni,jk ,jj) = pgd2(iloni,jk ,jj) -(zfms(jj,2,jk ) + zfma(jj,2,jk ))
                   ! v component
                   jk2 = jk + 1
                   pgd2(ilonr,jk2,jj) = pgd2(ilonr,jk2,jj) +(zfms(jj,1,jk2) + zfma(jj,1,jk2))
                   pgd2(iloni,jk2,jj) = pgd2(iloni,jk2,jj) +(zfms(jj,2,jk2) + zfma(jj,2,jk2))
                endif
             enddo
          endif
       enddo

    enddo  ! end loop on m
!$OMP END PARALLEL DO

  END SUBROUTINE SPGDPAR_MPI2D


  ! NO MPI MODIFICATIONS YET

  SUBROUTINE GDSPPAR(SP,PGD2,KFIELD,NFLEV,KDIM) 1,3
    implicit none

    INTEGER :: kfield,nflev,kdim
    real(8) :: sp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd2(gst(gstID)%ni,kdim,gst(gstID)%nj)

    integer :: jj, jk, ilonr, iloni, jm ,ila, inm, jn, jm0, ins, jns
    integer :: njlath,nj,ntrunc
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma(gst(gstID)%njlath+1,2,kfield)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)
    real(8) :: dlsp2(0:gst(gstID)%ntrunc,2,2*nflev)
    real(8) :: zjm, dlrwt(gst(gstID)%nj), dlrwocs(gst(gstID)%nj)

    ! 1. Adjustment needed when an odd number of latitudes is considered
    nj = gst(gstID)%nj
    njlath = gst(gstID)%njlath
    ntrunc = gst(gstID)%ntrunc
    dlrwt(:)   = gst(gstID)%rwt(:)
    dlrwocs(:) = gst(gstID)%rwocs(:)
    if (mod(nj,2).ne.0) then
       dlrwt(njlath)   = dlrwt(njlath)/2.d0
       dlrwocs(njlath) = dlrwocs(njlath)/2.d0
    end if

!$OMP PARALLEL PRIVATE(DLALP,DLDALP,JM0,DLSP,DLSP2,ZFMS,ZFMA)
!$OMP DO PRIVATE(INM,ILA,JM,JN,JK,JJ,ZJM,JNS,INS,ILONR,ILONI)
    do jm0 = 0, ntrunc/2
       ins = 1
       if(jm0.eq.ntrunc-jm0) ins = 0
       do jns = 0,ins
          jm = (1-jns)*jm0 + jns*(ntrunc - jm0)
          ilonr = 2 * jm + 1
          iloni = ilonr + 1
          zjm   = real(jm,8)

          ! 2.1 Fetch the Legendre functions and their derivatives for this choice of "m"
          call getalp(dlalp,dldalp,njlath,ntrunc,ntrunc,jm)

          ! 2.2  Build the symmetric and anti-symmetric Fourier coefficients including
          !      the appropriate quadrature weights (see scientific notes)
          do jj = 1, njlath

             ! 2.2.1  Coefficients for scalar fields
             do jk = 2*nflev+1, kfield
                ! symmetric coefficients
                zfms(jj,1,jk) = dlrwt(jj)*(pgd2(ilonr,jk,jj) + pgd2(ilonr,jk,1+nj-jj))
                zfms(jj,2,jk) = dlrwt(jj)*(pgd2(iloni,jk,jj) + pgd2(iloni,jk,1+nj-jj))
                ! antisymmetric coefficients
                zfma(jj,1,jk) = dlrwt(jj)*(pgd2(ilonr,jk,jj) - pgd2(ilonr,jk,1+nj-jj))
                zfma(jj,2,jk) = dlrwt(jj)*(pgd2(iloni,jk,jj) - pgd2(iloni,jk,1+nj-jj))
             enddo

             ! 2.2.2 Coefficients associated with the wind fields
             do jk = 1, nflev
                ! vorticity: symmetric coefficients
                zfms(jj,1,jk) = -zjm*dlrwocs(jj)*(pgd2(iloni,jk+nflev,jj)+pgd2(iloni,jk+nflev,1+nj-jj))
                zfms(jj,2,jk) = zjm*dlrwocs(jj)*(pgd2(ilonr,jk+nflev,jj) +pgd2(ilonr,jk+nflev,1+nj-jj))
                ! vorticity: antisymmetric coefficients
                zfma(jj,1,jk) = -zjm*dlrwocs(jj)*(pgd2(iloni,jk+nflev,jj)- pgd2(iloni,jk+nflev,1+nj-jj))
                zfma(jj,2,jk) = zjm*dlrwocs(jj) *(pgd2(ilonr,jk+nflev,jj)- pgd2(ilonr,jk+nflev,1+nj-jj))
                ! divergence: symmetric coefficients
                zfms(jj,1,jk+nflev) = -zjm*dlrwocs(jj)*(pgd2(iloni,jk,jj)+ pgd2(iloni,jk,1+nj-jj))
                zfms(jj,2,jk+nflev) = zjm*dlrwocs(jj)*(pgd2(ilonr,jk,jj)+ pgd2(ilonr,jk,1+nj-jj))
                ! divergence: antisymmetric coefficients
                zfma(jj,1,jk+nflev) = -zjm*dlrwocs(jj)*(pgd2(iloni,jk,jj)- pgd2(iloni,jk,1+nj-jj))
                zfma(jj,2,jk+nflev) = zjm*dlrwocs(jj)*(pgd2(ilonr,jk,jj)- pgd2(ilonr,jk,1+nj-jj))
             enddo
          enddo

          ! 2.3 First one with ALP for all scalar fields and for half the terms
          !     required to define the divergence and vorticity
          call legdir3 (jm, zfms,zfma,dlsp,dlalp,kfield,njlath,ntrunc,ntrunc)
     
          ! 2.4  Second transform with DALP to complete the construction of the
          !      vorticity and divergence fields
          do jj = 1, njlath
             do jk = 1, nflev
                ! symmetric coefficients for zonal wind
                zfms(jj,1,jk) = dlrwocs(jj)*(pgd2(ilonr,jk,jj) + pgd2(ilonr,jk,1+nj-jj))
                zfms(jj,2,jk) = dlrwocs(jj)*(pgd2(iloni,jk,jj) + pgd2(iloni,jk,1+nj-jj))
                ! antisymmetric coefficients for zonal wind
                zfma(jj,1,jk) = dlrwocs(jj)*(pgd2(ilonr,jk,jj) - pgd2(ilonr,jk,1+nj-jj))
                zfma(jj,2,jk) = dlrwocs(jj)*(pgd2(iloni,jk,jj) - pgd2(iloni,jk,1+nj-jj))
             enddo
             do jk = nflev+1, 2*nflev
                ! symmetric coefficients for zonal wind
                zfms(jj,1,jk) = -dlrwocs(jj)*(pgd2(ilonr,jk,jj) + pgd2(ilonr,jk,1+nj-jj))
                zfms(jj,2,jk) = -dlrwocs(jj)*(pgd2(iloni,jk,jj) + pgd2(iloni,jk,1+nj-jj))
                ! antisymmetric coefficients for zonal wind
                zfma(jj,1,jk) = -dlrwocs(jj)*(pgd2(ilonr,jk,jj) - pgd2(ilonr,jk,1+nj-jj))
                zfma(jj,2,jk) = -dlrwocs(jj)*(pgd2(iloni,jk,jj) - pgd2(iloni,jk,1+nj-jj))
             enddo
          enddo
          call legdir3(jm,zfma,zfms,dlsp2,dldalp,2*nflev,njlath,ntrunc,ntrunc)

          ! 2.5  Transfer the result in the global state
          do jn = jm, ntrunc
             ila = gst(gstid)%nind(jm) + jn - jm
             inm = jn - jm
             do jk = 1, 2*nflev
                sp(ila,1,jk) = dlsp(inm,1,jk) + dlsp2(inm,1,jk)
                sp(ila,2,jk) = dlsp(inm,2,jk) + dlsp2(inm,2,jk)
             enddo
             do jk = 2*nflev+1,kfield
                sp(ila,1,jk) = dlsp(inm,1,jk)
                sp(ila,2,jk) = dlsp(inm,2,jk)
             enddo
          enddo
       ! End of loop on zonal wavenumbers
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL
  END SUBROUTINE GDSPPAR



  SUBROUTINE GST_SPGDA(SP,GD,KFIELD,NFLEV,KDIM),3
    implicit none

    integer :: kfield, nflev, kdim
    real(8) :: sp(gst(gstID)%nla,2,kdim)
    real(8) :: gd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    integer :: jk, jlon, jlat
    real(8) :: zfm(gst(gstID)%ni,kfield,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)
    real(8), allocatable :: pgd2(:,:,:)

!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK)
    do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
       do jk = 1, kfield
          do jlon = 1, gst(gstID)%ni
             zfm(jlon,jk,jlat) = gd(jlon,jk,jlat)
          enddo
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

    ! 1. Fourier transform all fields for all latitudes
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(zfm,kfield,kdim,-1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! 2. Direct Legendre transform including wind transformations
    if(gst(gstID)%mpiMode.eq.2) then
      allocate(pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj))
      call transpose_LattoM(pgd2,zfm,kfield,kdim)
      call tmg_start(72+gst(gstID)%mpiMode,'LT')
      call spgdapar_mpi2(sp,pgd2,kfield,nflev,kdim)
      call tmg_stop(72+gst(gstID)%mpiMode)
      deallocate(pgd2)
    else
      !call spgdapar(sp,zfm,kfield,nflev,kdim)
      write(*,*) 'GST_SPGDA: ONLY MPIMODE 2 SUPPORTED! ',gst(gstID)%mpiMode
      call flush(6)
      call exit(1)
    endif

  END SUBROUTINE GST_SPGDA



  SUBROUTINE GST_SPGDA4(PSP,PGD,NFLEV) 1,9
    implicit none

    integer :: nflev
    real(8) :: psp(gst(gstID)%maxMyNla,2,gst(gstID)%nk)
    real(8) :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    integer :: jk, jlon, jlat
    real(8), allocatable :: psp2(:,:,:),pgd2(:,:,:),pgd3(:,:,:)

    if(gst(gstID)%mpiMode.ne.4) call abort3d('GST_SPGDA4: only mpiMode 4 is allowed!')

    ! check if this mpi task will deal with winds during Legendre transform
    if(gst(gstID)%myLevBeg.le.2*nflev) then
      ! ensure that the number of levels on this mpi task is even to allow interleaving of u and v
      if(mod(gst(gstID)%myLevCount,2).ne.0) then
        write(*,*) 'GST_SPGD5: myLevCount = ',gst(gstID)%myLevCount
        call abort3d('GST_SPGD5: Number of levels on this mpi task must be even!')
      endif
    endif

    allocate(psp2(gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd))
    allocate(pgd2(2*gst(gstID)%maxmcount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj))
    allocate(pgd3(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd))

    ! First reorder wind components to have u and v for same level on same mpi task
    call tmg_start(29,'GST_INTERLEAVE')
    call interleaveWinds_gd(pgd,nflev)
    call tmg_stop(29)

    ! Transpose data along npex from Longitudes to Levels
    call transpose2d_LontoLev(pgd,pgd3)

    ! Fourier transform all fields for all latitudes
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd3,gst(gstID)%myLevCount,gst(gstID)%myLevCount,-1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! Transpose data along npey from Latitudes to M
    call transpose2d_LattoM(pgd3,pgd2)
    deallocate(pgd3)

    ! Direct Legendre transform including wind transformations
    call tmg_start(72+gst(gstID)%mpiMode,'LT')
    call spgdapar_mpi2d(psp2,pgd2,nflev)
    call tmg_stop(72+gst(gstID)%mpiMode)
    deallocate(pgd2)

    ! Transpose data along npex from Levels to N
    call transpose2d_LevtoN(psp2,psp)
    deallocate(psp2)

    ! Now undo reordering of wind components 
    call tmg_start(29,'GST_INTERLEAVE')
    call unInterleaveWinds_sp(psp,nflev)
    call tmg_stop(29)

  END SUBROUTINE GST_SPGDA4



  SUBROUTINE SPGDAPAR_MPI2(SP,PGD2,KFIELD,NFLEV,KDIM) 1,3
    implicit none

    integer :: kfield,nflev,kdim
    real(8) :: sp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj)

    integer :: jj, jj2, jk, jk2, ilonr, iloni, jm ,ila, inm, jn
    real(8) :: zjm,dlrwt(gst(gstID)%nj)
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)
    real(8) :: dlsp2(0:gst(gstID)%ntrunc,2,2*nflev)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma(gst(gstID)%njlath+1,2,kfield)

    !    1. Set up according to the desired grid
    !       ---------------------
    dlrwt(:) = gst(gstID)%rwt(:)
    if (mod(gst(gstID)%nj,2).ne.0) then
       dlrwt(gst(gstID)%njlath) = dlrwt(gst(gstID)%njlath)/2.d0
    end if

    ! 2. Fourier transform all fields for all latitudes
!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,DLSP2,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JK2,JJ,JJ2,ZJM,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

          ilonr = 2*gst(gstID)%mymIndex(jm)-1
          iloni = 2*gst(gstID)%mymIndex(jm)
          zjm   = real(jm,8)

          ! 2.1 Fetch the Legendre functions and their derivatives for this choice of "m"
          call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

          ! 2.2  Build the symmetric and anti-symmetric Fourier coefficients including
          !      the appropriate quadrature weights (see scientific notes)
          do jj = 1, gst(gstID)%njlath
             do jk = 1, kfield
                zfms(jj,1,jk) = 0.0d0
                zfms(jj,2,jk) = 0.0d0
                zfma(jj,1,jk) = 0.0d0
                zfma(jj,2,jk) = 0.0d0
             enddo
          enddo

          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj-jj+1
             ! 2.2.1 Coefficients associated with the scalar fields
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = 2*nflev+1, kfield
                   zfms(jj,1,jk) = pgd2(ilonr,jk,jj)
                   zfms(jj,2,jk) = pgd2(iloni,jk,jj)
                   zfma(jj,1,jk) = pgd2(ilonr,jk,jj)
                   zfma(jj,2,jk) = pgd2(iloni,jk,jj)
                enddo
             else
                ! Southern hemisphere
                do jk = 2*nflev+1, kfield
                   zfms(jj2,1,jk) = zfms(jj2,1,jk) + pgd2(ilonr,jk,jj)
                   zfms(jj2,2,jk) = zfms(jj2,2,jk) + pgd2(iloni,jk,jj)
                   zfma(jj2,1,jk) = zfma(jj2,1,jk) - pgd2(ilonr,jk,jj)
                   zfma(jj2,2,jk) = zfma(jj2,2,jk) - pgd2(iloni,jk,jj)
                enddo
             endif

             ! 2.2.2 Coefficients associated with the wind fields
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! vorticity: symmetric coefficients
                   zfms(jj,1,jk ) = -pgd2(iloni,jk2,jj)
                   zfms(jj,2,jk ) =  pgd2(ilonr,jk2,jj)
                   ! vorticity: antisymmetric coefficients
                   zfma(jj,1,jk ) = -pgd2(iloni,jk2,jj)
                   zfma(jj,2,jk ) =  pgd2(ilonr,jk2,jj)
                   ! divergence: symmetric coefficients
                   zfms(jj,1,jk2) = -pgd2(iloni,jk ,jj)
                   zfms(jj,2,jk2) =  pgd2(ilonr,jk ,jj)
                   ! divergence: antisymmetric coefficients
                   zfma(jj,1,jk2) = -pgd2(iloni,jk ,jj)
                   zfma(jj,2,jk2) =  pgd2(ilonr,jk ,jj)
                enddo
             else
                ! Southern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! vorticity: symmetric coefficients
                   zfms(jj2,1,jk ) = zfms(jj2,1,jk ) - pgd2(iloni,jk2,jj)
                   zfms(jj2,2,jk ) = zfms(jj2,2,jk ) + pgd2(ilonr,jk2,jj)
                   ! vorticity: antisymmetric coefficients
                   zfma(jj2,1,jk ) = zfma(jj2,1,jk ) + pgd2(iloni,jk2,jj)
                   zfma(jj2,2,jk ) = zfma(jj2,2,jk ) - pgd2(ilonr,jk2,jj)
                   ! divergence: symmetric coefficients
                   zfms(jj2,1,jk2) = zfms(jj2,1,jk2) - pgd2(iloni,jk ,jj)
                   zfms(jj2,2,jk2) = zfms(jj2,2,jk2) + pgd2(ilonr,jk ,jj)
                   ! divergence: antisymmetric coefficients
                   zfma(jj2,1,jk2) = zfma(jj2,1,jk2) + pgd2(iloni,jk ,jj)
                   zfma(jj2,2,jk2) = zfma(jj2,2,jk2) - pgd2(ilonr,jk ,jj)
                enddo
             endif
          enddo

          do jj = 1, gst(gstID)%njlath
             do jk = 1, kfield
                zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
             enddo
             do jk = 1, 2*nflev
                zfms(jj,1,jk) = zjm*zfms(jj,1,jk)
                zfms(jj,2,jk) = zjm*zfms(jj,2,jk)
                zfma(jj,1,jk) = zjm*zfma(jj,1,jk)
                zfma(jj,2,jk) = zjm*zfma(jj,2,jk)
             enddo
          enddo

          ! 2.3 First one with ALP for all scalar fields and for half the terms
          !     required to define the divergence and vorticity
          call legdir4(jm,zfms,zfma,dlsp,dlalp,kfield,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)
                                
          ! 2.4  Second transform with DALP to complete the construction of the
          !      vorticity and divergence fields
          do jj = 1, gst(gstID)%njlath
             do jk = 1, 2*nflev
                zfms(jj,1,jk) = 0.0d0
                zfms(jj,2,jk) = 0.0d0
                zfma(jj,1,jk) = 0.0d0
                zfma(jj,2,jk) = 0.0d0
             enddo
          enddo

          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj-jj+1
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! symmetric coefficients for zonal wind
                   zfms(jj,1,jk ) =  pgd2(ilonr,jk ,jj)
                   zfms(jj,2,jk ) =  pgd2(iloni,jk ,jj)
                   ! antisymmetric coefficients for zonal wind
                   zfma(jj,1,jk ) =  pgd2(ilonr,jk ,jj)
                   zfma(jj,2,jk ) =  pgd2(iloni,jk ,jj)
                   ! symmetric coefficients for meridional wind
                   zfms(jj,1,jk2) = -pgd2(ilonr,jk2,jj)
                   zfms(jj,2,jk2) = -pgd2(iloni,jk2,jj)
                   ! antisymmetric coefficients for meridional wind
                   zfma(jj,1,jk2) = -pgd2(ilonr,jk2,jj)
                   zfma(jj,2,jk2) = -pgd2(iloni,jk2,jj)
                enddo
             else
                ! Southern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! symmetric coefficients for zonal wind
                   zfms(jj2,1,jk ) = zfms(jj2,1,jk ) + pgd2(ilonr,jk ,jj)
                   zfms(jj2,2,jk ) = zfms(jj2,2,jk ) + pgd2(iloni,jk ,jj)
                   ! antisymmetric coefficients for zonal wind
                   zfma(jj2,1,jk ) = zfma(jj2,1,jk ) - pgd2(ilonr,jk ,jj)
                   zfma(jj2,2,jk ) = zfma(jj2,2,jk ) - pgd2(iloni,jk ,jj)
                   ! symmetric coefficients for meridional wind
                   zfms(jj2,1,jk2) = zfms(jj2,1,jk2) - pgd2(ilonr,jk2,jj)
                   zfms(jj2,2,jk2) = zfms(jj2,2,jk2) - pgd2(iloni,jk2,jj)
                   ! antisymmetric coefficients for meridional wind
                   zfma(jj2,1,jk2) = zfma(jj2,1,jk2) + pgd2(ilonr,jk2,jj)
                   zfma(jj2,2,jk2) = zfma(jj2,2,jk2) + pgd2(iloni,jk2,jj)
                enddo
             endif
          enddo

          do jj = 1, gst(gstID)%njlath
             do jk = 1, 2*nflev
                zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
             enddo
          enddo

          call legdir4(jm,zfma,zfms,dlsp2,dldalp,2*nflev,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          ! 2.5  Transfer the result in the global state
          do jn = jm, gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn - jm
             inm = jn - jm
             do jk = 1, 2*nflev
                sp(ila,1,jk) = -gst(gstID)%r1snp1(ila)*(dlsp(inm,1,jk) + dlsp2(inm,1,jk))
                sp(ila,2,jk) = -gst(gstID)%r1snp1(ila)*(dlsp(inm,2,jk) + dlsp2(inm,2,jk))
             enddo
             do jk = 2*nflev+1,kfield
                sp(ila,1,jk) = dlsp(inm,1,jk)
                sp(ila,2,jk) = dlsp(inm,2,jk)
             enddo
          enddo

    ! End of loop on zonal wavenumbers
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE SPGDAPAR_MPI2



  SUBROUTINE SPGDAPAR_MPI2D(PSP,PGD2,NFLEV) 1,3
    implicit none

    integer :: nflev
    real(8) :: psp(gst(gstID)%nla,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: pgd2(2*gst(gstID)%maxmCount,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)

    integer :: jj, jj2, jk, jk2, ilonr, iloni, jm ,ila, inm, jn
    real(8) :: zjm,dlrwt(gst(gstID)%nj)
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: dlsp2(0:gst(gstID)%ntrunc,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: zfms(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: zfma(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)

    !    1. Set up according to the desired grid
    !       ---------------------
    dlrwt(:) = gst(gstID)%rwt(:)
    if (mod(gst(gstID)%nj,2).ne.0) then
       dlrwt(gst(gstID)%njlath) = dlrwt(gst(gstID)%njlath)/2.d0
    end if

    ! 2. Fourier transform all fields for all latitudes
!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,DLSP2,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JK2,JJ,JJ2,ZJM,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

          ilonr = 2*gst(gstID)%mymIndex(jm)-1
          iloni = 2*gst(gstID)%mymIndex(jm)
          zjm   = real(jm,8)

          ! 2.1 Fetch the Legendre functions and their derivatives for this choice of "m"
          call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

          ! 2.2  Build the symmetric and anti-symmetric Fourier coefficients including
          !      the appropriate quadrature weights (see scientific notes)
          do jj = 1, gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                zfms(jj,1,jk) = 0.0d0
                zfms(jj,2,jk) = 0.0d0
                zfma(jj,1,jk) = 0.0d0
                zfma(jj,2,jk) = 0.0d0
             enddo
          enddo

          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj-jj+1
             ! 2.2.1 Coefficients associated with the scalar fields
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                   if(jk .gt. 2*nflev) then
                      zfms(jj,1,jk) = pgd2(ilonr,jk,jj)
                      zfms(jj,2,jk) = pgd2(iloni,jk,jj)
                      zfma(jj,1,jk) = pgd2(ilonr,jk,jj)
                      zfma(jj,2,jk) = pgd2(iloni,jk,jj)
                   endif
                enddo
             else
                ! Southern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                    if(jk .gt. 2*nflev) then
                      zfms(jj2,1,jk) = zfms(jj2,1,jk) + pgd2(ilonr,jk,jj)
                      zfms(jj2,2,jk) = zfms(jj2,2,jk) + pgd2(iloni,jk,jj)
                      zfma(jj2,1,jk) = zfma(jj2,1,jk) - pgd2(ilonr,jk,jj)
                      zfma(jj2,2,jk) = zfma(jj2,2,jk) - pgd2(iloni,jk,jj)
                   endif
                enddo
             endif

             ! 2.2.2 Coefficients associated with the wind fields
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                   if(jk .le. 2*nflev) then
                      jk2 = jk + 1
                      ! vorticity: symmetric coefficients
                      zfms(jj,1,jk ) = -pgd2(iloni,jk2,jj)
                      zfms(jj,2,jk ) =  pgd2(ilonr,jk2,jj)
                      ! vorticity: antisymmetric coefficients
                      zfma(jj,1,jk ) = -pgd2(iloni,jk2,jj)
                      zfma(jj,2,jk ) =  pgd2(ilonr,jk2,jj)
                      ! divergence: symmetric coefficients
                      zfms(jj,1,jk2) = -pgd2(iloni,jk ,jj)
                      zfms(jj,2,jk2) =  pgd2(ilonr,jk ,jj)
                      ! divergence: antisymmetric coefficients
                      zfma(jj,1,jk2) = -pgd2(iloni,jk ,jj)
                      zfma(jj,2,jk2) =  pgd2(ilonr,jk ,jj)
                   endif
                enddo
             else
                ! Southern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                   if(jk .le. 2*nflev) then
                      jk2 = jk + 1
                      ! vorticity: symmetric coefficients
                      zfms(jj2,1,jk ) = zfms(jj2,1,jk ) - pgd2(iloni,jk2,jj)
                      zfms(jj2,2,jk ) = zfms(jj2,2,jk ) + pgd2(ilonr,jk2,jj)
                      ! vorticity: antisymmetric coefficients
                      zfma(jj2,1,jk ) = zfma(jj2,1,jk ) + pgd2(iloni,jk2,jj)
                      zfma(jj2,2,jk ) = zfma(jj2,2,jk ) - pgd2(ilonr,jk2,jj)
                      ! divergence: symmetric coefficients
                      zfms(jj2,1,jk2) = zfms(jj2,1,jk2) - pgd2(iloni,jk ,jj)
                      zfms(jj2,2,jk2) = zfms(jj2,2,jk2) + pgd2(ilonr,jk ,jj)
                      ! divergence: antisymmetric coefficients
                      zfma(jj2,1,jk2) = zfma(jj2,1,jk2) + pgd2(iloni,jk ,jj)
                      zfma(jj2,2,jk2) = zfma(jj2,2,jk2) - pgd2(ilonr,jk ,jj)
                   endif
                enddo
             endif
          enddo

          do jj = 1, gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
             enddo
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                if(jk .le. 2*nflev) then
                   zfms(jj,1,jk) = zjm*zfms(jj,1,jk)
                   zfms(jj,2,jk) = zjm*zfms(jj,2,jk)
                   zfma(jj,1,jk) = zjm*zfma(jj,1,jk)
                   zfma(jj,2,jk) = zjm*zfma(jj,2,jk)
                endif
             enddo
          enddo

          ! 2.3 First one with ALP for all scalar fields and for half the terms
          !     required to define the divergence and vorticity
          call legdir2d(jm,zfms,zfma,dlsp,dlalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)
                                
          ! 2.4  Second transform with DALP to complete the construction of the
          !      vorticity and divergence fields
          do jj = 1, gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                if(jk .le. 2*nflev) then
                   zfms(jj,1,jk) = 0.0d0
                   zfms(jj,2,jk) = 0.0d0
                   zfma(jj,1,jk) = 0.0d0
                   zfma(jj,2,jk) = 0.0d0
                endif
             enddo
          enddo

          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj-jj+1
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                   if(jk .le. 2*nflev) then
                      jk2 = jk + 1
                      ! symmetric coefficients for zonal wind
                      zfms(jj,1,jk ) =  pgd2(ilonr,jk ,jj)
                      zfms(jj,2,jk ) =  pgd2(iloni,jk ,jj)
                      ! antisymmetric coefficients for zonal wind
                      zfma(jj,1,jk ) =  pgd2(ilonr,jk ,jj)
                      zfma(jj,2,jk ) =  pgd2(iloni,jk ,jj)
                      ! symmetric coefficients for meridional wind
                      zfms(jj,1,jk2) = -pgd2(ilonr,jk2,jj)
                      zfms(jj,2,jk2) = -pgd2(iloni,jk2,jj)
                      ! antisymmetric coefficients for meridional wind
                      zfma(jj,1,jk2) = -pgd2(ilonr,jk2,jj)
                      zfma(jj,2,jk2) = -pgd2(iloni,jk2,jj)
                   endif
                enddo
             else
                ! Southern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd, 2
                   if(jk .le. 2*nflev) then
                      jk2 = jk + 1
                      ! symmetric coefficients for zonal wind
                      zfms(jj2,1,jk ) = zfms(jj2,1,jk ) + pgd2(ilonr,jk ,jj)
                      zfms(jj2,2,jk ) = zfms(jj2,2,jk ) + pgd2(iloni,jk ,jj)
                      ! antisymmetric coefficients for zonal wind
                      zfma(jj2,1,jk ) = zfma(jj2,1,jk ) - pgd2(ilonr,jk ,jj)
                      zfma(jj2,2,jk ) = zfma(jj2,2,jk ) - pgd2(iloni,jk ,jj)
                      ! symmetric coefficients for meridional wind
                      zfms(jj2,1,jk2) = zfms(jj2,1,jk2) - pgd2(ilonr,jk2,jj)
                      zfms(jj2,2,jk2) = zfms(jj2,2,jk2) - pgd2(iloni,jk2,jj)
                      ! antisymmetric coefficients for meridional wind
                      zfma(jj2,1,jk2) = zfma(jj2,1,jk2) + pgd2(ilonr,jk2,jj)
                      zfma(jj2,2,jk2) = zfma(jj2,2,jk2) + pgd2(iloni,jk2,jj)
                   endif
                enddo
             endif
          enddo

          do jj = 1, gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                if(jk .le. 2*nflev) then
                   zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                   zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                   zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                   zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
                endif
             enddo
          enddo

          call legdir2d(jm,zfma,zfms,dlsp2,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          ! 2.5  Transfer the result in the global state
          do jn = jm, gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn - jm
             inm = jn - jm
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                if(jk .le. 2*nflev) then
                   psp(ila,1,jk) = -gst(gstID)%r1snp1(ila)*(dlsp(inm,1,jk) + dlsp2(inm,1,jk))
                   psp(ila,2,jk) = -gst(gstID)%r1snp1(ila)*(dlsp(inm,2,jk) + dlsp2(inm,2,jk))
                else
                   psp(ila,1,jk) = dlsp(inm,1,jk)
                   psp(ila,2,jk) = dlsp(inm,2,jk)
                endif
             enddo
          enddo

    ! End of loop on zonal wavenumbers
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE SPGDAPAR_MPI2D



  SUBROUTINE SPGDAPAR(SP,ZFM,KFIELD,NFLEV,KDIM),3
    implicit none

    integer :: kfield,nflev,kdim
    real(8) :: sp(gst(gstID)%nla,2,kdim)
    real(8) :: zfm(gst(gstID)%ni,kdim,gst(gstID)%nj)

    integer :: jj, jj2, jk, jk2, ilonr, iloni, jm ,ila, inm, jn, jm0, ins, jns
    real(8) :: zjm,dlrwt(gst(gstID)%nj)
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)
    real(8) :: dlsp2(0:gst(gstID)%ntrunc,2,2*nflev)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma(gst(gstID)%njlath+1,2,kfield)
    integer :: thdid,numthd,omp_get_thread_num,omp_get_num_threads

    !    1. Set up according to the desired grid (staggered or not)
    !       ---------------------
    !write(*,fmt='(/,4x,A)')'SPGDAPAR- Adjoint of the inverse Legendre transform'
                                !
    dlrwt(:) = gst(gstID)%rwt(:)
    if (mod(gst(gstID)%nj,2).ne.0) then
       dlrwt(gst(gstID)%njlath) = dlrwt(gst(gstID)%njlath)/2.d0
    end if

    ! 2. Fourier transform all fields for all latitudes
!$OMP PARALLEL PRIVATE(DLALP,DLDALP,JM0,DLSP,DLSP2,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JK2,JJ,JJ2,ZJM,JNS,INS,ILONR,ILONI, &
!$OMP thdid,numthd)
    thdid = omp_get_thread_num()
    numthd = omp_get_num_threads()
    do jm0 = thdid, gst(gstID)%ntrunc/2,numthd
       ins = 1
       if(jm0.eq.gst(gstID)%ntrunc-jm0) ins = 0
       do jns = 0,ins
          jm = (1-jns)*jm0 + jns*(gst(gstID)%ntrunc - jm0)
          ilonr = 2 * jm + 1
          iloni = ilonr + 1
          zjm   = real(jm,8)

          ! 2.1 Fetch the Legendre functions and their derivatives for this choice of "m"
          call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

          ! 2.2  Build the symmetric and anti-symmetric Fourier coefficients including
          !      the appropriate quadrature weights (see scientific notes)
          do jj = gst(gstID)%myLatHalfBeg,gst(gstID)%myLatHalfEnd
             do jk = 1, kfield
                zfms(jj,1,jk) = 0.0d0
                zfms(jj,2,jk) = 0.0d0
                zfma(jj,1,jk) = 0.0d0
                zfma(jj,2,jk) = 0.0d0
             enddo
          enddo

          do jj = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
             jj2 = gst(gstID)%nj-jj+1
             ! 2.2.1 Coefficients associated with the scalar fields
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = 2*nflev+1, kfield
                   zfms(jj,1,jk) = zfm(ilonr,jk,jj)
                   zfms(jj,2,jk) = zfm(iloni,jk,jj)
                   zfma(jj,1,jk) = zfm(ilonr,jk,jj)
                   zfma(jj,2,jk) = zfm(iloni,jk,jj)
                enddo
             else
                ! Southern hemisphere
                do jk = 2*nflev+1, kfield
                   zfms(jj2,1,jk) = zfms(jj2,1,jk) + zfm(ilonr,jk,jj)
                   zfms(jj2,2,jk) = zfms(jj2,2,jk) + zfm(iloni,jk,jj)
                   zfma(jj2,1,jk) = zfma(jj2,1,jk) - zfm(ilonr,jk,jj)
                   zfma(jj2,2,jk) = zfma(jj2,2,jk) - zfm(iloni,jk,jj)
                enddo
             endif

             ! 2.2.2 Coefficients associated with the wind fields
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! vorticity: symmetric coefficients
                   zfms(jj,1,jk ) = -zfm(iloni,jk2,jj) 
                   zfms(jj,2,jk ) =  zfm(ilonr,jk2,jj)
                   ! vorticity: antisymmetric coefficients
                   zfma(jj,1,jk ) = -zfm(iloni,jk2,jj)
                   zfma(jj,2,jk ) =  zfm(ilonr,jk2,jj)
                   ! divergence: symmetric coefficients
                   zfms(jj,1,jk2) = -zfm(iloni,jk ,jj)
                   zfms(jj,2,jk2) =  zfm(ilonr,jk ,jj)
                   ! divergence: antisymmetric coefficients
                   zfma(jj,1,jk2) = -zfm(iloni,jk ,jj)
                   zfma(jj,2,jk2) =  zfm(ilonr,jk ,jj)
                enddo
             else
                ! Southern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! vorticity: symmetric coefficients
                   zfms(jj2,1,jk ) = zfms(jj2,1,jk ) - zfm(iloni,jk2,jj)
                   zfms(jj2,2,jk ) = zfms(jj2,2,jk ) + zfm(ilonr,jk2,jj)
                   ! vorticity: antisymmetric coefficients
                   zfma(jj2,1,jk ) = zfma(jj2,1,jk ) + zfm(iloni,jk2,jj)
                   zfma(jj2,2,jk ) = zfma(jj2,2,jk ) - zfm(ilonr,jk2,jj)
                   ! divergence: symmetric coefficients
                   zfms(jj2,1,jk2) = zfms(jj2,1,jk2) - zfm(iloni,jk ,jj)
                   zfms(jj2,2,jk2) = zfms(jj2,2,jk2) + zfm(ilonr,jk ,jj)
                   ! divergence: antisymmetric coefficients
                   zfma(jj2,1,jk2) = zfma(jj2,1,jk2) + zfm(iloni,jk ,jj)
                   zfma(jj2,2,jk2) = zfma(jj2,2,jk2) - zfm(ilonr,jk ,jj)
                enddo
             endif
          enddo

          do jj = gst(gstID)%myLatHalfBeg, gst(gstID)%myLatHalfEnd
             do jk = 1, kfield
                zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
             enddo
             do jk = 1, 2*nflev
                zfms(jj,1,jk) = zjm*zfms(jj,1,jk) 
                zfms(jj,2,jk) = zjm*zfms(jj,2,jk) 
                zfma(jj,1,jk) = zjm*zfma(jj,1,jk) 
                zfma(jj,2,jk) = zjm*zfma(jj,2,jk) 
             enddo
          enddo

          ! 2.3 First one with ALP for all scalar fields and for half the terms
          !     required to define the divergence and vorticity
          call legdir3 (jm,zfms,zfma,dlsp,dlalp,kfield,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)
                                !
          ! 2.4  Second transform with DALP to complete the construction of the
          !      vorticity and divergence fields
          do jj = gst(gstID)%myLatHalfBeg,gst(gstID)%myLatHalfEnd
             do jk = 1, 2*nflev
                zfms(jj,1,jk) = 0.0d0
                zfms(jj,2,jk) = 0.0d0
                zfma(jj,1,jk) = 0.0d0
                zfma(jj,2,jk) = 0.0d0
             enddo
          enddo

          do jj = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
             jj2 = gst(gstID)%nj-jj+1
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! symmetric coefficients for zonal wind
                   zfms(jj,1,jk ) =  zfm(ilonr,jk ,jj)
                   zfms(jj,2,jk ) =  zfm(iloni,jk ,jj) 
                   ! antisymmetric coefficients for zonal wind
                   zfma(jj,1,jk ) =  zfm(ilonr,jk ,jj) 
                   zfma(jj,2,jk ) =  zfm(iloni,jk ,jj) 
                   ! symmetric coefficients for meridional wind
                   zfms(jj,1,jk2) = -zfm(ilonr,jk2,jj) 
                   zfms(jj,2,jk2) = -zfm(iloni,jk2,jj) 
                   ! antisymmetric coefficients for meridional wind
                   zfma(jj,1,jk2) = -zfm(ilonr,jk2,jj) 
                   zfma(jj,2,jk2) = -zfm(iloni,jk2,jj) 
                enddo
             else
                ! Southern hemisphere
                do jk = 1, nflev
                   jk2 = jk+nflev
                   ! symmetric coefficients for zonal wind
                   zfms(jj2,1,jk ) = zfms(jj2,1,jk ) + zfm(ilonr,jk ,jj)
                   zfms(jj2,2,jk ) = zfms(jj2,2,jk ) + zfm(iloni,jk ,jj)
                   ! antisymmetric coefficients for zonal wind
                   zfma(jj2,1,jk ) = zfma(jj2,1,jk ) - zfm(ilonr,jk ,jj)
                   zfma(jj2,2,jk ) = zfma(jj2,2,jk ) - zfm(iloni,jk ,jj)
                   ! symmetric coefficients for meridional wind
                   zfms(jj2,1,jk2) = zfms(jj2,1,jk2) - zfm(ilonr,jk2,jj)
                   zfms(jj2,2,jk2) = zfms(jj2,2,jk2) - zfm(iloni,jk2,jj)
                   ! antisymmetric coefficients for meridional wind
                   zfma(jj2,1,jk2) = zfma(jj2,1,jk2) + zfm(ilonr,jk2,jj)
                   zfma(jj2,2,jk2) = zfma(jj2,2,jk2) + zfm(iloni,jk2,jj)
                enddo
             endif
          enddo

          do jj = gst(gstID)%myLatHalfBeg, gst(gstID)%myLatHalfEnd
             do jk = 1, 2*nflev
                zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
             enddo
          enddo

          call legdir3(jm,zfma,zfms,dlsp2,dldalp,2*nflev,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          ! 2.5  Transfer the result in the global state
          do jn = jm, gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn - jm
             inm = jn - jm
             do jk = 1, 2*nflev
                sp(ila,1,jk) = -gst(gstID)%r1snp1(ila)*(dlsp(inm,1,jk) + dlsp2(inm,1,jk))
                sp(ila,2,jk) = -gst(gstID)%r1snp1(ila)*(dlsp(inm,2,jk) + dlsp2(inm,2,jk))
             enddo
             do jk = 2*nflev+1,kfield
                sp(ila,1,jk) = dlsp(inm,1,jk)
                sp(ila,2,jk) = dlsp(inm,2,jk)
             enddo
          enddo
       ! End of loop on zonal wavenumbers
       enddo
    enddo
!$OMP END PARALLEL
  END SUBROUTINE SPGDAPAR



  SUBROUTINE GST_SPEREE(PSP,PGD,KFIELD,KDIM) 4,5
    implicit none

    integer :: kfield, kdim
    real(8) :: psp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: pgd2(:,:,:)
    integer :: jlat, jk, jlon

    ! 1.1 Inverse Legendre transform
    if(gst(gstID)%mpiMode.eq.2) then
      allocate(pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj))
      call tmg_start(72+gst(gstID)%mpiMode,'LT')
      call spereepar_mpi2(psp,pgd2,kfield,kdim)
      call tmg_stop(72+gst(gstID)%mpiMode)
      call transpose_MtoLat(pgd2,pgd,kfield,kdim)
      deallocate(pgd2)
    elseif(gst(gstID)%mpiMode.eq.3) then
      allocate(pgd2(2*(gst(gstID)%ntrunc+1),gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj))
      call tmg_start(72+gst(gstID)%mpiMode,'LT')
      call spereepar_mpi3(psp,pgd2,kfield,kdim)
      call tmg_stop(72+gst(gstID)%mpiMode)
      call transpose_LevtoLat(pgd2,pgd,kfield,kdim)
      deallocate(pgd2)
    else
      write(*,*) 'GST_SPEREE: ONLY MPIMODE 2 AND 3 SUPPORTED! ',gst(gstID)%mpiMode
      call flush(6)
      call exit(1)
    endif

    ! 2.1 Reset to zero the modes that are not part of the truncation
!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK)
    do jlat = gst(gstID)%myLatBeg,gst(gstID)%myLatEnd
       do  jk = 1, kfield
          do jlon = 2*(gst(gstID)%ntrunc+1)+1, gst(gstID)%ni
             pgd(jlon,jk,jlat) = 0.d0
          enddo
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

    ! 2.2 Apply the inverse FFT 
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd,kfield,kdim,+1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

  END SUBROUTINE GST_SPEREE



  SUBROUTINE GST_SPEREE5(PSP,PGD) 1,5
    implicit none

    real(8)  :: psp(gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8)  :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: pgd2(:,:,:),pgd3(:,:,:)
    integer :: jlat, jk, jlon

    if(gst(gstID)%mpiMode.ne.5) call abort3d('GST_SPEREE5: only mpiMode 5 is allowed!')

    allocate(pgd2(2*gst(gstID)%maxmcount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj))
    allocate(pgd3(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd))

    ! 1.1 Inverse Legendre transform (lon -> m)
    call tmg_start(72+gst(gstID)%mpiMode,'LT')
    call spereepar_mpi2d(psp,pgd2)
    call tmg_stop(72+gst(gstID)%mpiMode)

    ! 1.2 Transpose data along npey from M to Latitudes
    call transpose2d_MtoLat(pgd2,pgd3)
    deallocate(pgd2)

    ! 2.1 Reset to zero the modes that are not part of the truncation
!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK)
    do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
       do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          do jlon = 2*(gst(gstID)%ntrunc+1)+1, gst(gstID)%ni
             pgd3(jlon,jk,jlat) = 0.d0
          enddo
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

    ! 2.2 Apply the inverse FFT 
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd3,gst(gstID)%myLevCount,gst(gstID)%myLevCount,+1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! 2.3 Transpose data along npex from Levels to Longitudes
    call transpose2d_LevtoLon(pgd3,pgd)

    deallocate(pgd3)

  END SUBROUTINE GST_SPEREE5



  SUBROUTINE GST_SPEREE4(PSP,PGD) 3,6
    implicit none

    real(8) :: psp(gst(gstID)%maxMyNla, 2, gst(gstID)%nk)
    real(8) :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: psp2(:,:,:),pgd2(:,:,:),pgd3(:,:,:)
    integer :: jlat, jk, jlon

    if(gst(gstID)%mpiMode.ne.4) call abort3d('GST_SPEREE4: only mpiMode 4 is allowed!')

    allocate(psp2(gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd))
    allocate(pgd2(2*gst(gstID)%maxmcount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj))
    allocate(pgd3(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd))

    ! 1.0 Transpose data along npex from N to Levels
    call transpose2d_NtoLev(psp,psp2)

    ! 1.1 Inverse Legendre transform (lon -> m)
    call tmg_start(72+gst(gstID)%mpiMode,'LT')
    call spereepar_mpi2d(psp2,pgd2)
    call tmg_stop(72+gst(gstID)%mpiMode)
    deallocate(psp2)

    ! 1.2 Transpose data along npey from M to Latitudes
    call transpose2d_MtoLat(pgd2,pgd3)
    deallocate(pgd2)

    ! 2.1 Reset to zero the modes that are not part of the truncation
!$OMP PARALLEL 
!$OMP DO PRIVATE (JLAT,JLON,JK)
    do jlat = gst(gstID)%myLatBeg, gst(gstID)%myLatEnd
       do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
          do jlon = 2*(gst(gstID)%ntrunc+1)+1, gst(gstID)%ni
             pgd3(jlon,jk,jlat) = 0.d0
          enddo
       enddo
    enddo
!$OMP END DO
!$OMP END PARALLEL

    ! 2.2 Apply the inverse FFT 
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd3,gst(gstID)%myLevCount,gst(gstID)%myLevCount,+1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! 2.3 Transpose data along npex from Levels to Longitudes
    call transpose2d_LevtoLon(pgd3,pgd)
    deallocate(pgd3)

  END SUBROUTINE GST_SPEREE4



  SUBROUTINE GST_REESPE(PSP,PGD,KFIELD,KDIM) 4,5
    implicit none

    integer :: kfield,kdim
    real(8) :: psp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd(gst(gstID)%ni,kdim,gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: pgd2(:,:,:)
    integer :: ji,jj,jk, jlon, jlat

    ! 1. Apply the FFT
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd,kfield,kdim,-1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! 2. Direct Legendre transform
    if(gst(gstID)%mpiMode.eq.2) then
      allocate(pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj))
      call transpose_LattoM(pgd2,pgd,kfield,kdim)
      call tmg_start(72+gst(gstID)%mpiMode,'LT')
      call reespepar_mpi2(pgd2,psp,kfield,kdim)
      call tmg_stop(72+gst(gstID)%mpiMode)
      deallocate(pgd2)
    elseif(gst(gstID)%mpiMode.eq.3) then
      allocate(pgd2(2*(gst(gstID)%ntrunc+1),gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj))
      call transpose_LattoLev(pgd2,pgd,kfield,kdim)
      call tmg_start(72+gst(gstID)%mpiMode,'LT')
      call reespepar_mpi3(pgd2,psp,kfield,kdim)
      call tmg_stop(72+gst(gstID)%mpiMode)
      deallocate(pgd2)
    else
      write(*,*) 'GST_REESPE: ONLY MPIMODE 2 AND 3 SUPPORTED! ',gst(gstID)%mpiMode
      call flush(6)
      call exit(1)    
    endif

  END SUBROUTINE GST_REESPE



  SUBROUTINE GST_REESPE5(PSP,PGD) 2,5
    implicit none

    real(8) :: psp(gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: pgd2(:,:,:),pgd3(:,:,:)
    integer :: jlat, jk, jlon

    if(gst(gstID)%mpiMode.ne.5) call abort3d('GST_REESPE5: only mpiMode 5 is allowed!')

    allocate(pgd2(2*gst(gstID)%maxmcount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj))
    allocate(pgd3(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd))

    ! Transpose data along npex from Longitudes to Levels
    call transpose2d_LontoLev(pgd,pgd3)

    ! 1. Apply the FFT
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd3,gst(gstID)%myLevCount,gst(gstID)%myLevCount,-1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! Transpose data along npey from Latitudes to M
    call transpose2d_LattoM(pgd3,pgd2)
    deallocate(pgd3)

    ! 2. Direct Legendre transform
    call tmg_start(72+gst(gstID)%mpiMode,'LT')
    call reespepar_mpi2d(pgd2,psp)
    call tmg_stop(72+gst(gstID)%mpiMode)
    deallocate(pgd2)

  END SUBROUTINE GST_REESPE5



  SUBROUTINE GST_REESPE4(PSP,PGD) 4,6
    implicit none

    real(8) :: psp(gst(gstID)%maxMyNla, 2, gst(gstID)%nk)
    real(8) :: pgd(gst(gstID)%myLonBeg:gst(gstID)%myLonEnd, gst(gstID)%nk, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd)

    real(8), allocatable :: psp2(:,:,:),pgd2(:,:,:),pgd3(:,:,:)
    integer :: jlat, jk, jlon

    if(gst(gstID)%mpiMode.ne.4) call abort3d('GST_REESPE4: only mpiMode 4 is allowed!')

    allocate(psp2(gst(gstID)%nla, 2, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd))
    allocate(pgd2(2*gst(gstID)%maxmcount,  gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%nj))
    allocate(pgd3(gst(gstID)%ni, gst(gstID)%myLevBeg:gst(gstID)%myLevEnd, gst(gstID)%myLatBeg:gst(gstID)%myLatEnd))

    ! Transpose data along npex from Longitudes to Levels
    call transpose2d_LontoLev(pgd,pgd3)

    ! 1. Apply the FFT
    call tmg_start(20+gst(gstID)%mpiMode,'FFT')
    call fft_fft3dvar2(pgd3,gst(gstID)%myLevCount,gst(gstID)%myLevCount,-1,gst(gstID)%fftID)
    call tmg_stop(20+gst(gstID)%mpiMode)

    ! Transpose data along npey from Latitudes to M
    call transpose2d_LattoM(pgd3,pgd2)
    deallocate(pgd3)

    ! 2. Direct Legendre transform
    call tmg_start(72+gst(gstID)%mpiMode,'LT')
    call reespepar_mpi2d(pgd2,psp2)
    call tmg_stop(72+gst(gstID)%mpiMode)
    deallocate(pgd2)

    ! Transpose data along npex from Levels to N
    call transpose2d_LevtoN(psp2,psp)
    deallocate(psp2)

  END SUBROUTINE GST_REESPE4



  SUBROUTINE SPEREEPAR_MPI2(PSP,PGD2,KFIELD,KDIM) 1,2
!**s/r SPEREEPAR_MPI2  - Inverse spectral transform(MPI PARALLEL LOOP)

    implicit none

    integer :: kfield, kdim
    real(8) :: psp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj)

    ! local variables
    integer :: jj, jj2, jm
    integer :: ilonr, iloni, jk, jn, ila, inm

    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma(gst(gstID)%njlath+1,2,kfield)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)

    ! Inverse Legendre transform

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JJ,JJ2,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

          ! 2.1 Copy global spectral state into local spectral state
          do jn = jm, gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn - jm
             inm = jn - jm
             do jk = 1,kfield
                dlsp(inm,1,jk) = psp(ila,1,jk)
                dlsp(inm,2,jk) = psp(ila,2,jk)
             enddo
          enddo

          ! 2.2  Get Legendre polynomial (and its derivative) for all latitudes
          !      but for the chosen value of "m" from the global array
          call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

          ! 2.3  Perform the inverse Legendre transform for all fields
          call leginv4(jm,zfms,zfma,dlsp,dlalp,kfield,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          ! 2.4 Passage to Fourier space
          ilonr = 2*gst(gstID)%mymIndex(jm)-1
          iloni = 2*gst(gstID)%mymIndex(jm)
          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj - jj + 1
             if(jj.le.gst(gstID)%njlath) then
                do jk = 1,kfield
                   pgd2(ilonr,jk,jj) = zfms(jj,1,jk) + zfma(jj,1,jk)
                   pgd2(iloni,jk,jj) = zfms(jj,2,jk) + zfma(jj,2,jk)
                enddo
             else
                do jk = 1,kfield
                   pgd2(ilonr,jk,jj) = zfms(jj2,1,jk) - zfma(jj2,1,jk)
                   pgd2(iloni,jk,jj) = zfms(jj2,2,jk) - zfma(jj2,2,jk)
                enddo
             endif
          enddo
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE SPEREEPAR_MPI2



  SUBROUTINE SPEREEPAR_MPI3(PSP,PGD2,KFIELD,KDIM) 1,2
!**s/r SPEREEPAR_MPI3  - Inverse spectral transform(MPI PARALLEL LOOP)

    implicit none

    integer kfield,kdim
    real(8) :: psp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd2(2*(gst(gstID)%ntrunc+1),gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)

    ! local variables
    integer :: jj, jj2, jm
    integer :: ilonr, iloni, jk, jn, ila, inm

    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma(gst(gstID)%njlath+1,2,kfield)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)

    ! Inverse Legendre transform

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JJ,JJ2,ILONR,ILONI)
    do jm = 0, gst(gstID)%ntrunc

       ! 2.1 Copy global spectral state into local spectral state
       do jn = jm, gst(gstID)%ntrunc
          ila = gst(gstID)%nind(jm) + jn - jm
          inm = jn - jm
          do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
             dlsp(inm,1,jk) = psp(ila,1,jk)
             dlsp(inm,2,jk) = psp(ila,2,jk)
          enddo
       enddo

       ! 2.2  Get Legendre polynomial (and its derivative) for all latitudes
       !      but for the chosen value of "m" from the global array
       call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

       ! 2.3  Perform the inverse Legendre transform for all fields
       call leginv5(jm,zfms,zfma,dlsp,dlalp,kfield,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

       ! 2.4 Passage to Fourier space
       ilonr = 2*jm + 1
       iloni = 2*jm + 2
       do jj = 1, gst(gstID)%nj
          jj2 = gst(gstID)%nj - jj + 1
          if(jj.le.gst(gstID)%njlath) then
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                pgd2(ilonr,jk,jj) = zfms(jj,1,jk) + zfma(jj,1,jk)
                pgd2(iloni,jk,jj) = zfms(jj,2,jk) + zfma(jj,2,jk)
             enddo
          else
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                pgd2(ilonr,jk,jj) = zfms(jj2,1,jk) - zfma(jj2,1,jk)
                pgd2(iloni,jk,jj) = zfms(jj2,2,jk) - zfma(jj2,2,jk)
             enddo
          endif
       enddo
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE SPEREEPAR_MPI3



  SUBROUTINE SPEREEPAR_MPI2D(PSP,PGD2) 2,2
    !**s/r SPEREEPAR_MPI2D  - Inverse spectral transform(MPI PARALLEL LOOP)
    implicit none

    real(8) :: psp(gst(gstID)%nla,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: pgd2(2*gst(gstID)%maxmcount,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)

    ! local variables
    integer :: jj, jj2, jm, jn, ilonr, iloni, jk, ila, inm

    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: zfms(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: zfma(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)

    ! Inverse Legendre transform

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JJ,JJ2,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

       ! 2.1 Copy global spectral state into local spectral state
       do jn = jm, gst(gstID)%ntrunc
          ila = gst(gstID)%nind(jm) + jn - jm
          inm = jn - jm
          do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
             dlsp(inm,1,jk) = psp(ila,1,jk)
             dlsp(inm,2,jk) = psp(ila,2,jk)
          enddo
       enddo

       ! 2.2  Get Legendre polynomial (and its derivative) for all latitudes
       !      but for the chosen value of "m" from the global array
       call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

       ! 2.3  Perform the inverse Legendre transform for all fields
       call leginv2d(jm,zfms,zfma,dlsp,dlalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

       ! 2.4 Passage to Fourier space
       ilonr = 2*gst(gstID)%mymIndex(jm)-1
       iloni = 2*gst(gstID)%mymIndex(jm)

       do jj = 1, gst(gstID)%nj
          jj2 = gst(gstID)%nj - jj + 1
          if(jj.le.gst(gstID)%njlath) then
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                pgd2(ilonr,jk,jj) = zfms(jj,1,jk) + zfma(jj,1,jk)
                pgd2(iloni,jk,jj) = zfms(jj,2,jk) + zfma(jj,2,jk)
             enddo
          else
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                pgd2(ilonr,jk,jj) = zfms(jj2,1,jk) - zfma(jj2,1,jk)
                pgd2(iloni,jk,jj) = zfms(jj2,2,jk) - zfma(jj2,2,jk)
             enddo
          endif
       enddo
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE SPEREEPAR_MPI2D



  SUBROUTINE REESPEPAR_MPI2(PGD2,PSP,KFIELD,KDIM) 1,2
    implicit none

    integer :: kfield,kdim
    real(8) :: psp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd2(2*gst(gstID)%maxmCount,kdim,gst(gstID)%nj)

    integer :: jj, jj2, jk, jk2, ilonr, iloni
    integer :: jm, ila, inm, jn
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc, gst(gstID)%njlath)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma( gst(gstID)%njlath+1,2,kfield)
    real(8) :: dlrwt(gst(gstID)%nj)

    ! 1. Adjustment needed when an odd number of latitudes is considered
    dlrwt(:) = gst(gstID)%rwt(:)
    if (mod(gst(gstID)%nj,2).ne.0) then
       dlrwt(gst(gstID)%njlath) = dlrwt(gst(gstID)%njlath)/2.d0
    end if

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP   INM,ILA,JM,JN,JK,JK2,JJ,JJ2,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

       ilonr = 2*gst(gstID)%mymIndex(jm)-1
       iloni = 2*gst(gstID)%mymIndex(jm)

       ! 2.1 Fetch the Legendre functions and their derivatives for this choice of "m"
       call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

       ! 2.2  Build the symmetric and anti-symmetric Fourier coefficients including
       !      the appropriate quadrature weights (see scientific notes)
       do jj = 1,gst(gstID)%njlath
          do jk = 1,kfield
             zfms(jj,1,jk) = 0.0d0
             zfms(jj,2,jk) = 0.0d0
             zfma(jj,1,jk) = 0.0d0
             zfma(jj,2,jk) = 0.0d0
          enddo
       enddo

       do jj = 1,gst(gstID)%nj
          jj2 = gst(gstID)%nj-jj+1
          if(jj.le.gst(gstID)%njlath) then
             ! Northern hemisphere
             do jk = 1,kfield
                zfms(jj,1,jk) = pgd2(ilonr,jk,jj)
                zfms(jj,2,jk) = pgd2(iloni,jk,jj)
                zfma(jj,1,jk) = pgd2(ilonr,jk,jj)
                zfma(jj,2,jk) = pgd2(iloni,jk,jj)
             enddo
          else
             ! Southern hemisphere
             do jk = 1,kfield
                zfms(jj2,1,jk) = zfms(jj2,1,jk) + pgd2(ilonr,jk,jj)
                zfms(jj2,2,jk) = zfms(jj2,2,jk) + pgd2(iloni,jk,jj)
                zfma(jj2,1,jk) = zfma(jj2,1,jk) - pgd2(ilonr,jk,jj)
                zfma(jj2,2,jk) = zfma(jj2,2,jk) - pgd2(iloni,jk,jj)
             enddo
          endif
       enddo

       do jj = 1,gst(gstID)%njlath
          do jk = 1,kfield
             zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
             zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
             zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
             zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
          enddo
       enddo

       ! 2.3 First one with ALP for all scalar fields and for half the terms
       !     required to define the divergence and vorticity
       call legdir4(jm,zfms,zfma,dlsp,dlalp,kfield,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

       ! 2.4 Transfer the result in the global state
       do jn = jm, gst(gstID)%ntrunc
          ila = gst(gstID)%nind(jm) + jn - jm
          inm = jn - jm
          do jk2 = 1,kfield
             psp(ila,1,jk2) = dlsp(inm,1,jk2)
             psp(ila,2,jk2) = dlsp(inm,2,jk2)
          enddo
       enddo

    ! End of loop on zonal wavenumbers
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE REESPEPAR_MPI2



  SUBROUTINE REESPEPAR_MPI3(PGD2,PSP,KFIELD,KDIM) 1,2
    implicit none

    integer :: kfield,kdim
    real(8) :: psp(gst(gstID)%nla,2,kdim)
    real(8) :: pgd2(2*(gst(gstID)%ntrunc+1),gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)

    integer :: ilen,jj,jj2,jk,jk2,ilonr, iloni
    integer :: jm, ila, inm, jn
    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc, gst(gstID)%njlath)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,kfield)
    real(8) :: zfms(gst(gstID)%njlath+1,2,kfield)
    real(8) :: zfma(gst(gstID)%njlath+1,2,kfield)
    real(8) :: dlrwt(gst(gstID)%nj)

    ! 1. Adjustment needed when an odd number of latitudes is considered
    dlrwt(:) = gst(gstID)%rwt(:)
    if (mod(gst(gstID)%nj,2).ne.0) then
       dlrwt(gst(gstID)%njlath) = dlrwt(gst(gstID)%njlath)/2.d0
    end if

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP   INM,ILA,JM,JN,JK,JK2,JJ,JJ2,ILONR,ILONI)
    do jm = 0, gst(gstID)%ntrunc

          ilonr = 2 * jm + 1
          iloni = ilonr + 1

          ! 2.1 Fetch the Legendre functions and their derivatives for this choice of "m"
          call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

          ! 2.2  Build the symmetric and anti-symmetric Fourier coefficients including
          !      the appropriate quadrature weights (see scientific notes)
          do jj = 1,gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                zfms(jj,1,jk) = 0.0d0
                zfms(jj,2,jk) = 0.0d0
                zfma(jj,1,jk) = 0.0d0
                zfma(jj,2,jk) = 0.0d0
             enddo
          enddo

          do jj = 1,gst(gstID)%nj
             jj2 = gst(gstID)%nj-jj+1
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                   zfms(jj,1,jk) = pgd2(ilonr,jk,jj)
                   zfms(jj,2,jk) = pgd2(iloni,jk,jj)
                   zfma(jj,1,jk) = pgd2(ilonr,jk,jj)
                   zfma(jj,2,jk) = pgd2(iloni,jk,jj)
                enddo
             else
                ! Southern hemisphere
                do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                   zfms(jj2,1,jk) = zfms(jj2,1,jk) + pgd2(ilonr,jk,jj)
                   zfms(jj2,2,jk) = zfms(jj2,2,jk) + pgd2(iloni,jk,jj)
                   zfma(jj2,1,jk) = zfma(jj2,1,jk) - pgd2(ilonr,jk,jj)
                   zfma(jj2,2,jk) = zfma(jj2,2,jk) - pgd2(iloni,jk,jj)
                enddo
             endif
          enddo

          do jj = 1,gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
             enddo
          enddo

          ! 2.3 First one with ALP for all scalar fields and for half the terms
          !     required to define the divergence and vorticity
          call legdir5(jm,zfms,zfma,dlsp,dlalp,kfield,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          ! 2.4 Transfer the result in the global state
          do jn = jm, gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn - jm
             inm = jn - jm
             do jk2 = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                psp(ila,1,jk2) = dlsp(inm,1,jk2)
                psp(ila,2,jk2) = dlsp(inm,2,jk2)
             enddo
          enddo

    ! End of loop on zonal wavenumbers
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE REESPEPAR_MPI3



  SUBROUTINE REESPEPAR_MPI2D(PGD2,PSP) 2,2
    implicit none

    real(8) :: psp(gst(gstID)%nla,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: pgd2(2*gst(gstID)%maxmcount,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd,gst(gstID)%nj)

    integer :: ilen,jj,jj2,jk,ilonr, iloni
    integer :: jm, ila, inm, jn

    real(8) :: dlalp(0:gst(gstID)%ntrunc,gst(gstID)%njlath)
    real(8) :: dldalp(0:gst(gstID)%ntrunc, gst(gstID)%njlath)
    real(8) :: dlsp(0:gst(gstID)%ntrunc,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: zfms(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: zfma(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: dlrwt(gst(gstID)%nj)

    ! 1. Adjustment needed when an odd number of latitudes is considered
    dlrwt(:) = gst(gstID)%rwt(:)
    if (mod(gst(gstID)%nj,2).ne.0) then
       dlrwt(gst(gstID)%njlath) = dlrwt(gst(gstID)%njlath)/2.d0
    end if

!$OMP PARALLEL DO PRIVATE(DLALP,DLDALP,DLSP,ZFMS,ZFMA, &
!$OMP INM,ILA,JM,JN,JK,JJ,JJ2,ILONR,ILONI)
    do jm = gst(gstID)%mymBeg, gst(gstID)%mymEnd, gst(gstID)%mymSkip

          ! 2.1 Fetch the Legendre functions and their derivatives for this choice of "m"
          call getalp(dlalp,dldalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc,jm)

          ! 2.2  Build the symmetric and anti-symmetric Fourier coefficients including
          !      the appropriate quadrature weights (see scientific notes)
          do jj = 1, gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                zfms(jj,1,jk) = 0.0d0
                zfms(jj,2,jk) = 0.0d0
                zfma(jj,1,jk) = 0.0d0
                zfma(jj,2,jk) = 0.0d0
             enddo
          enddo

          ilonr = 2*gst(gstID)%mymIndex(jm)-1
          iloni = 2*gst(gstID)%mymIndex(jm)

          do jj = 1, gst(gstID)%nj
             jj2 = gst(gstID)%nj-jj+1
             if(jj.le.gst(gstID)%njlath) then
                ! Northern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                   zfms(jj,1,jk) = pgd2(ilonr,jk,jj)
                   zfms(jj,2,jk) = pgd2(iloni,jk,jj)
                   zfma(jj,1,jk) = pgd2(ilonr,jk,jj)
                   zfma(jj,2,jk) = pgd2(iloni,jk,jj)
                enddo
             else
                ! Southern hemisphere
                do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                   zfms(jj2,1,jk) = zfms(jj2,1,jk) + pgd2(ilonr,jk,jj)
                   zfms(jj2,2,jk) = zfms(jj2,2,jk) + pgd2(iloni,jk,jj)
                   zfma(jj2,1,jk) = zfma(jj2,1,jk) - pgd2(ilonr,jk,jj)
                   zfma(jj2,2,jk) = zfma(jj2,2,jk) - pgd2(iloni,jk,jj)
                enddo
             endif
          enddo

          do jj = 1,gst(gstID)%njlath
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                zfms(jj,1,jk) = dlrwt(jj)*zfms(jj,1,jk)
                zfms(jj,2,jk) = dlrwt(jj)*zfms(jj,2,jk)
                zfma(jj,1,jk) = dlrwt(jj)*zfma(jj,1,jk)
                zfma(jj,2,jk) = dlrwt(jj)*zfma(jj,2,jk)
             enddo
          enddo

          ! 2.3 First one with ALP for all scalar fields and for half the terms
          !     required to define the divergence and vorticity
          call legdir2d(jm,zfms,zfma,dlsp,dlalp,gst(gstID)%njlath,gst(gstID)%ntrunc,gst(gstID)%ntrunc)

          ! 2.4 Transfer the result in the global state
          do jn = jm, gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn - jm
             inm = jn - jm
             do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
                psp(ila,1,jk) = dlsp(inm,1,jk)
                psp(ila,2,jk) = dlsp(inm,2,jk)
             enddo
          enddo

    ! End of loop on zonal wavenumbers
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE REESPEPAR_MPI2D



  SUBROUTINE LEGDIR3(KM,PFMS,PFMA,DDSP,DDALP,KDIM,KLATH,KTRUNC,KTRUNCDIM) 4
    implicit none

    integer :: km, kdim, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2*kdim)
    real(8) :: pfma(gst(gstID)%njlath+1,2*kdim)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2*kdim)

    integer :: jk, jlat, jn, inm, itrunc, inmp1

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       ddsp(0:ktrunc,1:2*kdim) = 0.d0
       do jlat = gst(gstID)%myLatHalfBeg,gst(gstID)%myLatHalfEnd
          do jk = 1,2*kdim
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1 = inm + 1
                ddsp(inm,jk)   = ddsp(inm,jk) +  ddalp(inm  ,jlat)*pfms(jlat,jk)
                ddsp(inmp1,jk) = ddsp(inmp1,jk)+ ddalp(inmp1,jlat)*pfma(jlat,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       inm = jn - km
       ddsp(inm,1:2*kdim) = 0.d0

       do jlat = gst(gstID)%myLatHalfBeg,gst(gstID)%myLatHalfEnd
          do jk = 1,2*kdim
             ddsp(inm,jk) = ddsp(inm,jk) + ddalp(inm,jlat)*pfms(jlat,jk )
          enddo
       enddo
    end if

  END SUBROUTINE LEGDIR3



  SUBROUTINE LEGDIR4(KM,PFMS,PFMA,DDSP,DDALP,KDIM,KLATH,KTRUNC,KTRUNCDIM) 3
    implicit none

    integer :: km, kdim, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2*kdim)
    real(8) :: pfma(gst(gstID)%njlath+1,2*kdim)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2*kdim)

    integer :: jk, jlat, jn, inm, itrunc, inmp1

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       ddsp(0:ktrunc,1:2*kdim) = 0.d0
       do jlat = 1,klath
          do jk = 1,2*kdim
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1 = inm + 1
                ddsp(inm,jk)   = ddsp(inm,jk) +  ddalp(inm  ,jlat)*pfms(jlat,jk)
                ddsp(inmp1,jk) = ddsp(inmp1,jk)+ ddalp(inmp1,jlat)*pfma(jlat,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       inm = jn - km
       ddsp(inm,1:2*kdim) = 0.d0

       do jlat = 1,klath
          do jk = 1,2*kdim
             ddsp(inm,jk) = ddsp(inm,jk) + ddalp(inm,jlat)*pfms(jlat,jk )
          enddo
       enddo
    end if

  END SUBROUTINE LEGDIR4



  SUBROUTINE LEGDIR5(KM,PFMS,PFMA,DDSP,DDALP,KDIM,KLATH,KTRUNC,KTRUNCDIM) 1
    implicit none

    integer :: km, kdim, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2,kdim)
    real(8) :: pfma(gst(gstID)%njlath+1,2,kdim)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2,kdim)

    integer :: jk, jlat, jn, inm, itrunc, inmp1, jind, ink

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       ddsp(0:ktrunc,:,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd) = 0.d0
       do jlat = 1, klath
          do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1 = inm + 1
                ddsp(inm,1,jk)   = ddsp(inm,1,jk) +  ddalp(inm  ,jlat)*pfms(jlat,1,jk)
                ddsp(inm,2,jk)   = ddsp(inm,2,jk) +  ddalp(inm  ,jlat)*pfms(jlat,2,jk)
                ddsp(inmp1,1,jk) = ddsp(inmp1,1,jk)+ ddalp(inmp1,jlat)*pfma(jlat,1,jk)
                ddsp(inmp1,2,jk) = ddsp(inmp1,2,jk)+ ddalp(inmp1,jlat)*pfma(jlat,2,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       inm = jn - km
       ddsp(inm,:,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd) = 0.d0

       do jlat = 1, klath
          do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
             ddsp(inm,1,jk) = ddsp(inm,1,jk) + ddalp(inm,jlat)*pfms(jlat,1,jk )
             ddsp(inm,2,jk) = ddsp(inm,2,jk) + ddalp(inm,jlat)*pfms(jlat,2,jk )
          enddo
       enddo
    end if

  END SUBROUTINE LEGDIR5



  SUBROUTINE LEGDIR2D(KM,PFMS,PFMA,DDSP,DDALP,KLATH,KTRUNC,KTRUNCDIM) 3
    implicit none

    integer :: km, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: pfma(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)

    integer :: jk, jlat, jn, inm, itrunc, inmp1, jind, ink

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       ddsp(0:ktrunc,:,:) = 0.d0
       do jlat = 1, klath
          do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1 = inm + 1
                ddsp(inm,  1,jk) = ddsp(inm,  1,jk) + ddalp(inm,  jlat)*pfms(jlat,1,jk)
                ddsp(inm,  2,jk) = ddsp(inm,  2,jk) + ddalp(inm,  jlat)*pfms(jlat,2,jk)
                ddsp(inmp1,1,jk) = ddsp(inmp1,1,jk) + ddalp(inmp1,jlat)*pfma(jlat,1,jk)
                ddsp(inmp1,2,jk) = ddsp(inmp1,2,jk) + ddalp(inmp1,jlat)*pfma(jlat,2,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       inm = jn - km
       ddsp(inm,:,:) = 0.d0
       do jlat = 1, klath
          do jk = gst(gstID)%myLevBeg, gst(gstID)%myLevEnd
             ddsp(inm,1,jk) = ddsp(inm,1,jk) + ddalp(inm,jlat)*pfms(jlat,1,jk )
             ddsp(inm,2,jk) = ddsp(inm,2,jk) + ddalp(inm,jlat)*pfms(jlat,2,jk )
          enddo
       enddo
    end if

  END SUBROUTINE LEGDIR2D



  SUBROUTINE LEGINV3(KM,PFMS,PFMA,DDSP,DDALP,KDIM,KLATH,KTRUNC,KTRUNCDIM)
    implicit none

    integer :: km, kdim, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2*kdim)
    real(8) :: pfma(gst(gstID)%njlath+1,2*kdim)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2*kdim)

    integer :: jk, jlat, jn, inm, itrunc, inmp1

    pfms(:,:) = 0.d0
    pfma(:,:) = 0.d0

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       do jlat = gst(gstID)%myLatHalfBeg,gst(gstID)%myLatHalfEnd
          do jk = 1,2*kdim
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1  = inm + 1
                pfms(jlat,jk) = pfms(jlat,jk) + ddalp(inm,jlat) * ddsp(inm,jk)
                pfma(jlat,jk) = pfma(jlat,jk) + ddalp(inmp1,jlat) * ddsp(inmp1,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       if ( km .ne. ktrunc) then
          inm = jn - km
          do jlat = gst(gstID)%myLatHalfBeg,gst(gstID)%myLatHalfEnd
             do jk = 1, 2*kdim
                pfms(jlat,jk) = pfms(jlat,jk) + ddalp(inm,jlat) * ddsp(inm,jk)
             enddo
          enddo
       else
          inm = jn - km
          do jlat = gst(gstID)%myLatHalfBeg,gst(gstID)%myLatHalfEnd
             do jk = 1, 2*kdim
                pfms(jlat,jk) = ddalp(inm,jlat) * ddsp(inm,jk)
             enddo
          enddo
       end if
    end if

  END SUBROUTINE LEGINV3



  SUBROUTINE LEGINV4(KM,PFMS,PFMA,DDSP,DDALP,KDIM,KLATH,KTRUNC,KTRUNCDIM) 3
    implicit none

    integer :: km, kdim, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2*kdim)
    real(8) :: pfma(gst(gstID)%njlath+1,2*kdim)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2*kdim)

    integer :: jk, jlat, jn, inm, itrunc, inmp1

    pfms(:,:) = 0.d0
    pfma(:,:) = 0.d0

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       do jlat = 1, klath
          do jk = 1,2*kdim
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1  = inm + 1
                pfms(jlat,jk) = pfms(jlat,jk) + ddalp(inm,jlat) * ddsp(inm,jk)
                pfma(jlat,jk) = pfma(jlat,jk) + ddalp(inmp1,jlat) * ddsp(inmp1,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       if ( km .ne. ktrunc) then
          inm = jn - km
          do jlat = 1, klath
             do jk = 1, 2*kdim
                pfms(jlat,jk) = pfms(jlat,jk) + ddalp(inm,jlat) * ddsp(inm,jk)
             enddo
          enddo
       else
          inm = jn - km
          do jlat = 1, klath
             do jk = 1, 2*kdim
                pfms(jlat,jk) = ddalp(inm,jlat) * ddsp(inm,jk)
             enddo
          enddo
       end if
    end if

  END SUBROUTINE LEGINV4



  SUBROUTINE LEGINV5(KM,PFMS,PFMA,DDSP,DDALP,KDIM,KLATH,KTRUNC,KTRUNCDIM) 1
    implicit none

    integer :: km, kdim, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2,kdim)
    real(8) :: pfma(gst(gstID)%njlath+1,2,kdim)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2,kdim)

    integer :: jk, jlat, jn, inm, itrunc, inmp1

    pfms(:,:,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd) = 0.d0
    pfma(:,:,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd) = 0.d0

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       do jlat = 1, klath
          do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1  = inm + 1
                pfms(jlat,1,jk) = pfms(jlat,1,jk) + ddalp(inm,  jlat) * ddsp(inm,  1,jk)
                pfma(jlat,1,jk) = pfma(jlat,1,jk) + ddalp(inmp1,jlat) * ddsp(inmp1,1,jk)
                pfms(jlat,2,jk) = pfms(jlat,2,jk) + ddalp(inm,  jlat) * ddsp(inm,  2,jk)
                pfma(jlat,2,jk) = pfma(jlat,2,jk) + ddalp(inmp1,jlat) * ddsp(inmp1,2,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       if ( km .ne. ktrunc) then
          inm = jn - km
          do jlat = 1, klath
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                pfms(jlat,1,jk) = pfms(jlat,1,jk) + ddalp(inm,jlat) * ddsp(inm,1,jk)
                pfms(jlat,2,jk) = pfms(jlat,2,jk) + ddalp(inm,jlat) * ddsp(inm,2,jk)
             enddo
          enddo
       else
          inm = jn - km
          do jlat = 1, klath
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                pfms(jlat,1,jk) = ddalp(inm,jlat) * ddsp(inm,1,jk)
                pfms(jlat,2,jk) = ddalp(inm,jlat) * ddsp(inm,2,jk)
             enddo
          enddo
       end if
    end if

  END SUBROUTINE LEGINV5



  SUBROUTINE LEGINV2D(KM,PFMS,PFMA,DDSP,DDALP,KLATH,KTRUNC,KTRUNCDIM) 3
    implicit none

    integer :: km, ktrunc, ktruncdim, klath
    real(8) :: pfms(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: pfma(gst(gstID)%njlath+1,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: ddsp(0:ktruncdim,2,gst(gstID)%myLevBeg:gst(gstID)%myLevEnd)

    integer :: jk, jlat, jn, inm, itrunc, inmp1

    pfms(:,:,:) = 0.d0
    pfma(:,:,:) = 0.d0

    itrunc = ktrunc
    if(mod(ktrunc-km+1,2).eq.1) itrunc = ktrunc-1

    if(km.ne.ktrunc)then
       do jlat = 1, klath
          do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
             do jn = km, itrunc, 2
                inm = jn - km
                inmp1  = inm + 1
                pfms(jlat,1,jk) = pfms(jlat,1,jk) + ddalp(inm,  jlat) * ddsp(inm,  1,jk)
                pfma(jlat,1,jk) = pfma(jlat,1,jk) + ddalp(inmp1,jlat) * ddsp(inmp1,1,jk)
                pfms(jlat,2,jk) = pfms(jlat,2,jk) + ddalp(inm,  jlat) * ddsp(inm,  2,jk)
                pfma(jlat,2,jk) = pfma(jlat,2,jk) + ddalp(inmp1,jlat) * ddsp(inmp1,2,jk)
             enddo
          enddo
       enddo
    end if

    if(mod(ktrunc-km+1,2).eq.1) then
       jn = ktrunc
       if ( km .ne. ktrunc) then
          inm = jn - km
          do jlat = 1, klath
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                pfms(jlat,1,jk) = pfms(jlat,1,jk) + ddalp(inm,jlat) * ddsp(inm,1,jk)
                pfms(jlat,2,jk) = pfms(jlat,2,jk) + ddalp(inm,jlat) * ddsp(inm,2,jk)
             enddo
          enddo
       else
          inm = jn - km
          do jlat = 1, klath
             do jk = gst(gstID)%myLevBeg,gst(gstID)%myLevEnd
                pfms(jlat,1,jk) = ddalp(inm,jlat) * ddsp(inm,1,jk)
                pfms(jlat,2,jk) = ddalp(inm,jlat) * ddsp(inm,2,jk)
             enddo
          enddo
       end if
    end if

  END SUBROUTINE LEGINV2D

!------------------------------------------------------
! Subroutine for initializing the Legendre transform
!------------------------------------------------------


  SUBROUTINE ALLOCATE_COMLEG 1
    implicit none

    allocate(gst(gstID)%rmu(gst(gstID)%nj))  
    allocate(gst(gstID)%rwt(gst(gstID)%nj))
    allocate(gst(gstID)%rwocs(gst(gstID)%nj))
    allocate(gst(gstID)%r1mu2(gst(gstID)%nj))
    allocate(gst(gstID)%rsqm2(gst(gstID)%nj))
    allocate(gst(gstID)%rcolat(gst(gstID)%nj))
    allocate(gst(gstID)%r1qm2(gst(gstID)%nj))
    allocate(gst(gstID)%r1mui(gst(gstID)%nj))
    allocate(gst(gstID)%r1mua(gst(gstID)%nj))
    allocate(gst(gstID)%rlati((-1):(gst(gstID)%nj+2)))
    allocate(gst(gstID)%nind(0:gst(gstID)%ntrunc))
    allocate(gst(gstID)%nindrh(0:gst(gstID)%ntrunc))
    allocate(gst(gstID)%nclm(0:gst(gstID)%ntrunc))

  END SUBROUTINE ALLOCATE_COMLEG



  SUBROUTINE SULEG(lverbose_in) 1,1
    !
    !**s/r SULEG  - Initialisation of Gaussian latitudes, weights and related
    !     .         quantities
    implicit none
    logical, optional :: lverbose_in

    logical :: lverbose
    integer :: jlat, jm
    real(8) :: zpisu2
    external gauss

    if(present(lverbose_in)) then
      lverbose = lverbose_in
    else
      lverbose = .false.
    endif

    if(mpi_myid.eq.0) write(*,fmt='(//,6(" ***********"))')
    if(mpi_myid.eq.0) write(*,*)'     SULEG: initialisation of Gaussian', &
         ' latitudes, weights, etc...'
    if(mpi_myid.eq.0) write(*,fmt='(6(" ***********"))')

    !     1. GAUSSIAN LATITUDES AND WEIGHTS OVER AN HEMISPHERE
    !     -------------------------------------------------

    call gauss8(gst(gstID)%njlath,gst(gstID)%rmu(1),gst(gstID)%rwt(1), &
                gst(gstID)%rsqm2(1),gst(gstID)%rcolat(1),gst(gstID)%rwocs(1), &
                gst(gstID)%r1qm2(1),gst(gstID)%r1mui(1),gst(gstID)%r1mu2(1))

    do jlat = 1, gst(gstID)%njlath
       gst(gstID)%rlati(jlat) = asin(gst(gstID)%rmu(jlat))
       gst(gstID)%r1mua(jlat) = r1sa*gst(gstID)%r1mui(jlat)
    enddo

    !     2. COMPLETION FOR THE SOUTHERN HEMISPHERE
    !     --------------------------------------

    do jlat = gst(gstID)%njlath +1, gst(gstID)%nj
       gst(gstID)%rmu(jlat)   =  -gst(gstID)%rmu(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%rwocs(jlat) =   gst(gstID)%rwocs(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%r1mu2(jlat) =   gst(gstID)%r1mu2(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%rsqm2(jlat) =   gst(gstID)%rsqm2(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%r1qm2(jlat) =   gst(gstID)%r1qm2(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%r1mui(jlat) =   gst(gstID)%r1mui(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%r1mua(jlat) =   gst(gstID)%r1mua(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%rwt(jlat)   =   gst(gstID)%rwt(2*gst(gstID)%njlath +1 - jlat)
       gst(gstID)%rlati(jlat) = - gst(gstID)%rlati (2*gst(gstID)%njlath +1 - jlat)
    enddo

    zpisu2 = MPC_PI_R8/2.d0
    do jlat = 1, gst(gstID)%nj
       gst(gstID)%rcolat(jlat) = zpisu2 - gst(gstID)%rlati(jlat)
    enddo

    !*    3. Overdimensioning for interpolation

    gst(gstID)%rlati(-1) =   MPC_PI_R8-gst(gstID)%rlati(1)
    gst(gstID)%rlati(0) =   MPC_PI_R8*.5d0
    gst(gstID)%rlati(gst(gstID)%nj+1) =-MPC_PI_R8*.5d0
    gst(gstID)%rlati(gst(gstID)%nj+2) =-MPC_PI_R8-gst(gstID)%rlati(gst(gstID)%nj)

    !*    4. Print the content of GAUS

    if(lverbose.and.mpi_myid.eq.0) write(*,fmt='(" JLAT:",4X," RLATI",8X,"RCOLAT",8X,"RMU",10X ,"RWT",12X,"RW0CS")')
    do jlat = 1, gst(gstID)%nj
       if(lverbose.and.mpi_myid.eq.0) write(*,fmt='(2X,I4,5(2X,G23.16))')  &
            jlat,gst(gstID)%rlati(jlat),gst(gstID)%rcolat(jlat), gst(gstID)%rmu(jlat)  &
            ,gst(gstID)%rwt(jlat),gst(gstID)%rwocs(jlat)
    enddo

    if(lverbose.and.mpi_myid.eq.0) write(*,fmt='(//," JLAT:",4X,"R1MU2",8X,"RSQM2",9X,"R1QM2",10X,"R1MUI",10X,"R1MUA")')

    do jlat = 1, gst(gstID)%nj
       if(lverbose.and.mpi_myid.eq.0)  &
         write(*,fmt='(2X,I4,5(2X,G23.16))') jlat,gst(gstID)%r1mu2(jlat),gst(gstID)%rsqm2(jlat),gst(gstID)%r1qm2(jlat)  &
              ,gst(gstID)%r1mui(jlat),gst(gstID)%r1mua(jlat)
    enddo

    !*    5.  Positioning within spectral arrays

    do jm = 0, gst(gstID)%ntrunc
       gst(gstID)%nind(jm)   = jm*(gst(gstID)%ntrunc+1) - (jm*(jm-1))/2 + 1
       gst(gstID)%nindrh(jm) = jm*(gst(gstID)%ntrunc+1) + 1
       gst(gstID)%nclm(jm)   = gst(gstID)%ntrunc - jm + 1
    enddo

    if(lverbose.and.mpi_myid.eq.0) write(*,fmt='(/," NIND(0:NTRUNC):",/,10(2X,I8))')  &
         (gst(gstID)%nind(jm),jm=0,gst(gstID)%ntrunc)
    if(lverbose.and.mpi_myid.eq.0) write(*,fmt='(" NINDRH(0:NTRUNC):",/,10(2X,I8))')  &
         (gst(gstID)%nindrh(jm),jm=0,gst(gstID)%ntrunc)
    if(lverbose.and.mpi_myid.eq.0) write(*,fmt='("   NCLM(0:NTRUNC):",/,10(2X,I8))')  &
         (gst(gstID)%nclm(jm),jm=0,gst(gstID)%ntrunc)

  END SUBROUTINE SULEG



  SUBROUTINE GAUSS8(NRACP,RACP,PG,SIA,RAD,PGSSIN2,SINM1,SINM2,SIN2) 3,8
    implicit none

    integer :: nracp
    real(8) :: racp(*),pg(*),rad(*),pgssin2(*),sinm2(*),sin2(*),sia(*),sinm1(*)
    real(8) :: xlim,pi,fi,fi1,fn,dot,dn,dn1,a,b,c,g,gm,gp,gt,ractemp,gtemp
    integer :: i,ir,irm,irp

    xlim = 1.d-13
    pi = 4.d0*atan(1.d0)
    ir = 2*nracp
    fi = dble(ir)
    fi1 = fi+1.d0
    fn = dble(nracp)

    do i = 1,nracp
       dot = dble(i-1)
       racp(i) = -pi*.5d0*(dot+.5d0)/fn + pi*.5d0
       racp(i) =  sin(racp(i))
    enddo

    dn = fi/sqrt(4.d0*fi*fi-1.d0)
    dn1 = fi1/sqrt(4.d0*fi1*fi1-1.d0)
    a = dn1*fi
    b = dn*fi1
    irp = ir + 1
    irm = ir -1

    do i = 1,nracp
42     call ordleg8(g,racp(i),ir)
       call ordleg8(gm,racp(i),irm)
       call ordleg8(gp,racp(i),irp)
       gt = (a*gp-b*gm)/(racp(i)*racp(i)-1.d0)
       ractemp = racp(i) - g/gt
       gtemp = racp(i) - ractemp
       racp(i) = ractemp
       if( abs(gtemp).gt.xlim) go to 42
    enddo

    do i = 1,nracp
       a = 2.d0*(1.d0-racp(i)**2)
       call ordleg8(b,racp(i),irm)
       b = b*b*fi*fi
       pg(i) = a*(fi-.5d0)/b
       rad(i) =   acos(racp(i))
       sia(i) =  sin(rad(i))
       c = (sia(i))**2
       sinm1(i) = 1.d0/sia(i)
       sinm2(i) = 1.d0/c
       pgssin2(i) = pg(i)/c
       sin2(i) = c
    enddo

  END SUBROUTINE GAUSS8



  SUBROUTINE ORDLEG8(SX,COA,IR) 8
    implicit none

    real(8) :: sx,coa
    integer :: ir
    integer :: n,kk,k,n1,irpp,irppm
    real(8) :: pi,sqr2,delta,sia,theta,c1,c4,s1,ang,fk,fn,fn2,fn2sq,a,b

    pi    = 4.d0*atan(1.d0)
    sqr2  = sqrt(2.d0)
    irpp  = ir   + 1
    irppm = irpp - 1
    delta = acos(coa)
    sia   = sin(delta)

    theta = delta
    c1    = sqr2

    do n = 1,irppm
       fn2   = dble(2*n)
       fn2sq = fn2*fn2
       c1    =  c1*sqrt(1.d0 - 1.d0/fn2sq)
    enddo

    n   = irppm
    fn  = dble(n)
    ang = fn*theta
    s1  = 0.d0
    c4  = 1.d0
    a   =-1.d0
    b   = 0.d0
    n1  = n+1

    do kk = 1,n1,2
       k   = kk-1
       if (k.eq.n) c4 = 0.5d0*c4
       s1  = s1+c4* cos(ang)
       a   =  a+2.d0
       b   =  b+1.d0
       fk  = dble(k)
       ang = theta*(fn-fk-2.d0)
       c4  = ( a * (fn-b+1.d0) / (b*(fn2-a)) )*c4
    enddo

    sx = s1*c1

  END SUBROUTINE ORDLEG8



  SUBROUTINE SUALP 1,2
    implicit none

    integer :: jj,jlat,jm,jn,ilat
    integer :: ilarh,ila,ilatbd,ierr
    real(8) :: dlalp(gst(gstID)%nlarh,nlatbd)
    real(8) :: dldalp(gst(gstID)%nlarh,nlatbd)
    real(8) :: dldelalp(gst(gstID)%nlarh,nlatbd)
    !     
    !     Memory allocation for Legendre polynomials
    !     
    if(mpi_myid.eq.0) write(*,*) 'allocating dalp:',gst(gstID)%nla,gst(gstID)%njlath,gst(gstID)%nla*gst(gstID)%njlath
    allocate( gst(gstID)%dalp(gst(gstID)%nla,gst(gstID)%njlath) )
    allocate( gst(gstID)%dealp(gst(gstID)%nla,gst(gstID)%njlath))
    if(mpi_myid.eq.0) write(*,*) 'succeeded'

    latitudes: do jlat = 1, gst(gstID)%njlath, nlatbd
       ilatbd = min(nlatbd,gst(gstID)%njlath - jlat + 1)

       if(ilatbd.eq.8) then
          call allp(dlalp,dldalp,gst(gstID)%rmu(jlat),gst(gstID)%nclm(0),0,gst(gstID)%ntrunc,ilatbd)
       else
          call allp2(dlalp,dldalp,gst(gstID)%rmu(jlat),gst(gstID)%nclm(0),0,gst(gstID)%ntrunc,ilatbd)
       endif

       do jm = 0,gst(gstID)%ntrunc
          do jn = jm,gst(gstID)%ntrunc
             ila = gst(gstID)%nind(jm) + jn -jm
             ilarh = gst(gstID)%nindrh(jm) + jn-jm
             do jj = 1,ilatbd
                ilat = jlat+jj-1
                gst(gstID)%dalp (ila,jlat+jj-1) = dlalp (ilarh,jj)
                gst(gstID)%dealp(ila,jlat+jj-1) = dldalp(ilarh,jj)
             enddo
          enddo
       enddo
    enddo latitudes

  END SUBROUTINE SUALP



  SUBROUTINE GETALP(DDALP,DDDALP,KLATH,KTRUNC,KTRUNCDIM ,KM) 12
    implicit none

    integer :: km, klath, ktrunc, ktruncdim
    integer :: ila,ind
    integer :: jlat,jn, jlen
    real(8) :: ddalp(0:ktruncdim,klath)
    real(8) :: dddalp(0:ktruncdim,klath)

    do jlat = 1,klath
       do jlen = 0, ktrunc
          ddalp(jlen,jlat) = 0.d0
          dddalp(jlen,jlat)= 0.d0
       end do
    end do

    do jlat = 1, klath
       do jn = km, ktrunc
          ila = gst(gstID)%nind(km) + jn-km
          ind = jn-km
          ddalp(ind,jlat) =  gst(gstID)%dalp(ila,jlat)
          dddalp(ind,jlat) = gst(gstID)%dealp(ila,jlat)
       end do
    end do

  END SUBROUTINE GETALP



  SUBROUTINE ALLP( P , G , X , LR , HEM , R , NLATP)  1

    implicit none 
    integer :: r, nlatp, lr(0:r), hem 
    real(8) :: p(0:r,0:r,nlatp) , g(0:r,0:r,nlatp) 
    real(8) :: x(nlatp) 

    real(8) :: onehalf   
    real(8) :: xp , xp2,  p0, enm, fnm
    integer :: ilat , m , l , n

    data onehalf /0.5d0/

    do ilat = 1,nlatp
       xp2 = sqrt( 1.0d0 - x(ilat) ** 2 ) 
       p(0,0,ilat) = sqrt(onehalf) 
       do m = 1,r 
          xp = real(m,8)
          p(0,m,ilat) = sqrt( (2.0d0*xp+1.0d0)/(2.0d0*xp) ) * xp2 * p(0,m-1,ilat)
       enddo
    enddo

    do ilat = 1,nlatp
       do m = 0,r 
          xp = real(m,8)
          g(0,m,ilat) = - x(ilat)*xp * p(0,m,ilat) 
       enddo
    enddo
    do n = 1,r
       do m = 0,lr(n)-1
          l =  1
          p0 = real(m+n,8)
          xp = real(m,8)
          enm = sqrt( ((p0*p0-xp*xp)*(2.0d0*p0+1.0d0))/(2.0d0*p0-1.0d0) )
          fnm = sqrt( (2.0d0*p0+1.0d0)/((p0*p0-xp*xp)*(2.0d0*p0-1.0d0)) )

          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
          p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) -  g(n-1,m,l) ) * fnm 
          g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l) 
          l = l + 1
       enddo
    enddo

  END SUBROUTINE ALLP



  SUBROUTINE ALLP2( P , G , X , LR , HEM , R , NLATP) 1

    implicit none
    integer :: r, nlatp, lr(0:r), hem, jlat
    real(8) :: p(0:r,0:r,nlatp) , g(0:r,0:r,nlatp) 
    real(8) :: x(nlatp)

    real(8) :: onehalf   
    real(8) :: xp , xp2,  p0, enm, fnm
    integer :: ilat , m , l , n

    data onehalf /0.5d0/

    do ilat = 1,nlatp
       xp2 = sqrt( 1.0d0 - x(ilat) ** 2 )
       p(0,0,ilat) = sqrt(onehalf)
       do m = 1,r
          xp = real(m,8)
          p(0,m,ilat) = sqrt( (2.0d0*xp+1.0d0)/(2.0d0*xp) ) * xp2 * p(0,m-1,ilat)
       enddo
    enddo

    do ilat = 1,nlatp
       do m = 0,r
          xp = real(m,8)
          g(0,m,ilat) = - x(ilat)*xp * p(0,m,ilat)
       enddo
    enddo

    do n = 1,r
       do m = 0, r
          p0 = real(m+n,8)
          xp = real(m,8)
          enm = sqrt( ((p0*p0-xp*xp)*(2.0d0*p0+1.0d0))/(2.0d0*p0-1.0d0) )
          fnm = sqrt( (2.0d0*p0+1.0d0)/((p0*p0-xp*xp)*(2.0d0*p0-1.0d0)) )

          do jlat = 1, nlatp
             l = jlat
             p(n,m,l) = ( x(l) * p0 * p(n-1,m,l) - g(n-1,m,l) ) * fnm
             g(n,m,l) = enm * p(n-1,m,l) - x(l) * p0 * p(n,m,l)
          enddo
       enddo
    enddo

  END SUBROUTINE ALLP2



  SUBROUTINE GST_ZLEGPOL(PLEG) 3
    !***s/r ZLEGPOL  - Evaluation of Legendre polynomials restricted to
    !*     .           (n,m) = (n,0)
    !*Arguments
    !*     o   PLEG               : Legendre functions evaluated at the KNJ Gaussian latitudes
    implicit none

    real(8) :: pleg(0:gst(gstID)%ntrunc,gst(gstID)%nj)

    integer :: jn, jlat, ierr
    real(8) :: dlfact1, dlfact2, dln
    real(8) :: dlnorm(0:gst(gstID)%ntrunc)

    do jlat = 1, gst(gstID)%nj
       pleg(0,jlat) = sqrt(0.5d0)
       pleg(1,jlat) = sqrt(1.5d0)*gst(gstID)%rmu(jlat)
    enddo

    do jn = 0, gst(gstID)%ntrunc
       dln = 1.d0*real(jn,8)
       dlnorm(jn) = dsqrt((2.d0*dln + 1.d0)/2.d0)
    enddo

    do jn = 1, gst(gstID)%ntrunc-1
       dln = real(jn,8)
       dlfact1 = ((2.d0*dln+1.d0)/(dln+1.d0))*(dlnorm(jn+1)/dlnorm(jn))
       dlfact2 = (dln/(dln+1.d0))*(dlnorm(jn+1)/dlnorm(jn-1))
       do jlat = 1,gst(gstID)%nj
          pleg(jn+1,jlat) = dlfact1*gst(gstID)%rmu(jlat)*dble(pleg(jn,jlat))   &
                - dlfact2*dble(pleg(jn-1,jlat))
       enddo
    enddo

  END SUBROUTINE GST_ZLEGPOL

END MODULE globalSpectralTransform