!-------------------------------------- LICENCE BEGIN ------------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer,
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer
!version 3 or (at your option) any later version that should be found at:
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software;
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec),
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
!

      SUBROUTINE SPA2GD 6,11
      use modstag, only : lstagwinds
#if defined (DOC)
*
***s/r SPA2GD  - Convert analysis variables to model state variables
*               ***Version of SPA2SP adapted only for NANALVAR=4
*               ***for new PtoT approach with localized Tb correlations
*               ***Created May 2008 by M. Buehner
*     .
* Purpose
*     .  To revert from the analysis variables to the model state variables
*     .  as defined in spectral space
*     .  IMPORTANT: before a call to SPA2SP, the fields SPVOR and SPDIV
*     .             may contain PSI and CHI. This will eventually become
*     .             the default option
*Author  : P. Gauthier *ARMA/AES  March 25, 1996
*     .
*Revision:
*     .    P. Gauthier *ARMA/AES  August 5, 1996
*     .         Redefine the balance relationship and reformulate the
*     .         analysis variable in terms of PSI and CHI instead of
*     .         vorticity and divergence.
*     . S. Pellerin *ARMA/AES Sept 97.
*                   - Control of the different model state of the 3Dvar
*                     through COMSTATE, COMSTATEC and COMSTNUM common
*                     blocks variables (comstate.cdk).
*     . M. Buehner November 97
*     .         Introduce transformation for new definition of control
*     .         vector that reduces background covariances to the identity
*     .         matrix (for NANALVAR=3)
*     . M. Buehner jul 98
*     .         Introduce direct use of grid-point (PSI,CHI)
*     .         background std-dev errors
*
*     . L. Fillion *ARMA/AES  20 jul 1998
*     .         New version with (T,lnps)u as analysis variables
*       C. Charette *ARMA/AES Nov 1998
*          - Changed LDIVBAL to LBALDIV
*       C. Charette *ARMA/AES 8 dec 1998
*          - Added new parameters to call bmass
*       C. Charette *ARMA/AES 8 dec 1999
*          - Define variable ILON in section 320
*       JM Belanger CMDA/SMC  Aug 2000
*                   . 32 bits conversion
*                    (MXMA8, replace 2.0 by REAL*8 in SQRT)
*       M. Buehner *ARMA/SMC April 2002
*          - Added option to leave analysis in unbalanced variables (LDOBAL)
*       P. Koclas  CMC Apr 2003
*          - changed loop nesting order and call to mxmaop insted of mxma8 for ibm conversion
*       M. Buehner *ARMA/SMC October 2004
*          - changed damplibg to 3D and to be after balance operator
#endif
C
      IMPLICIT NONE
#include "pardim.cdk"
#include "comdim.cdk"
#include "comlun.cdk"
#include "comcst.cdk"
#include "comgem.cdk"
#include "comleg.cdk"
#include "comcva.cdk"
#include "comsp.cdk"
#include "comspg.cdk"
#include "comgd0.cdk"
#include "comgd1.cdk"
#include "compstat.cdk"
#include "comcorr.cdk"
#include "comcse1.cdk"
#include "comstate.cdk"
      INTEGER ILEN
      INTEGER JN,JM,JK,ILA
      integer ji,jj,idum1,idum2,idum3,idum4
      real*8 zmin,zmax
!      REAL*8 ZSP(NKSDIM,2,0:NTRUNC),ZSP2(NKSDIM2,2,0:NTRUNC),SQ2
!      POINTER(PXZSP,ZSP), (PXZSP2,ZSP2)
      REAL*8 SQ2
      REAL*8 , ALLOCATABLE,DIMENSION(:,:,:) :: ZSP, ZSP2
      REAL*8 zp(NI,nflev,NJ)
      REAL*8 ZWINDOW(NJ)

      INTEGER ILENSP, ILENGD, IERR
     S     , ILON, JLEV, JLON, JLAT, JLA, IULOUT
     S     , JLATBIN, JLATMIN, JLATMAX, KLATPTOT(NLATBIN)
      REAL*8 Z1MNU2, ZSQRTNU2, ZCORIOLIS
      REAL*8 ZGDPSI(NIBEG:NIEND,NFLEV,NJBEG:NJEND)
     S     ,ZGDCHI(NIBEG:NIEND,NFLEV,NJBEG:NJEND)
      REAL*8 DLZSOMME, DLDSOMME, DLA2, DL1SA2, DLFACT
     S     ,DLNORMPSI, DLNORMCHI, ZCORIOLIS, ZPSB(NI,NJ)
      POINTER (PXGDPSI,ZGDPSI),(PXGDCHI,ZGDCHI)
      REAL*8 ZGD(NI,NKGDIM,NJ)
!
      INTEGER :: JN0,INS,JNS,NTRUNCHF
      INTEGER :: thdid,numthd,omp_get_thread_num,omp_get_num_threads
cjmb
      real*8 two
      data two /2.0D0/
*-----------------------------------------------------------------
C
C     1. Create local arrays for CHI and PSI if needed
C
 100  CONTINUE
C
C     .  1.1 Case where the analysis variable is X - Xb
C     .      (SPA2SP = Identity)
C
 110  CONTINUE
      IF(NANALVAR.EQ.0) THEN
         RETURN
      END IF
C
C     .  1.2 Memory allocation
C
      IF(NANALVAR.GE.2) THEN
         ILENGD = (NJEND-NJBEG+1)*NFLEV*(NIEND-NIBEG+1)
         CALL HPALLOC(PXGDPSI,MAX(1,ILENGD),IERR,8)
         CALL HPALLOC(PXGDCHI,MAX(1,ILENGD),IERR,8)
      END IF
C
      if(nlatbin.eq.1) then
        klatptot(1)=1
      elseif(nlatbin.eq.3) then
        klatptot(1)=1
        klatptot(2)=nj/2
        klatptot(3)=nj
      endif
C
C     3. Formulation using PSI and CHI
C
 300  CONTINUE
c

c   3.0 Multiply by EIGENCOR to add in vert corr (scale/rotate)
       SQ2=sqrt(two)
       zgd(:,:,:)=0.0d0
       TB_out(:,:,:)=0.0d0
       DO JLATBIN=1,NLATBIN
       NTRUNCHF=NTRUNC/2
!$OMP PARALLEL PRIVATE(thdid,numthd,JN,JM,JK,ILA,INS,JN0,JNS,zsp2,zsp)
       thdid=omp_get_thread_num()
       numthd=omp_get_num_threads()

       ALLOCATE(ZSP(NKSDIM,2,0:NTRUNC))
       ALLOCATE(ZSP2(NKSDIM2,2,0:NTRUNC))
       DO JN0 = thdid,NTRUNCHF,numthd
          INS=1
          IF(JN0 == (NTRUNC-JN0))INS=0
          DO 222 JNS=0,INS
             JN=(1-JNS)*JN0+JNS*(NTRUNC-JN0)
C
C*    Transfer Dx from global to working array
C
         DO JM = 0, JN
            ILA = NIND(JM) + JN - JM
            DO JK = 1, NKSDIM
               ZSP(JK,1,JM) = SPLAT(ILA,1,JK,JLATBIN)
               ZSP(JK,2,JM) = SPLAT(ILA,2,JK,JLATBIN)
            END DO
         END DO
C
C*    Compute (CORNS(NKSDIM,NKSDIM,JN) * ZSP)
C
            CALL MXMAOP(CORNS(1,1,JN,JLATBIN),1,NKSDIM2,ZSP(1,1,0),1,NKSDIM
     +        ,ZSP2(1,1,0),1,NKSDIM2,NKSDIM2,NKSDIM,2*(JN+1))
C
C*    Transfer from working array to global array
C
         ILA = NIND(0) +JN
         DO JK = 1, NKSDIM
            SP(ILA,1,JK) = ZSP2(JK,1,0)*SQ2
            SP(ILA,2,JK) = ZSP2(JK,2,0)*SQ2
         END DO
         DO JK = 1, NFLEV
            SPTB(ILA,1,JK) = ZSP2(JK+NKSDIM,1,0)*SQ2
            SPTB(ILA,2,JK) = ZSP2(JK+NKSDIM,2,0)*SQ2
         END DO

         DO JM = 1, JN
            ILA = NIND(JM) +JN - JM
            DO JK = 1, NKSDIM
               SP(ILA,1,JK) = ZSP2(JK,1,JM)
               SP(ILA,2,JK) = ZSP2(JK,2,JM)
            END DO
         END DO
         DO JM = 1, JN
            ILA = NIND(JM) +JN - JM
            DO JK = 1, NFLEV
               SPTB(ILA,1,JK) = ZSP2(JK+NKSDIM,1,JM)
               SPTB(ILA,2,JK) = ZSP2(JK+NKSDIM,2,JM)
            END DO
         END DO
C
         DO JK=1,NKSDIM
            DO ILA=1,NTRUNC+1
               SP(ILA,2,JK)=0.0
            ENDDO
         ENDDO
         DO JK=1,NFLEV
            DO ILA=1,NTRUNC+1
               SPTB(ILA,2,JK)=0.0
            ENDDO
         ENDDO
C
222    CONTINUE
       END DO
       DEALLOCATE(zsp)
       DEALLOCATE(zsp2)
!$OMP END PARALLEL
C
C     .  3.1 Transform PSI and CHI and the other variables
C     .      to physical space
C
 310  CONTINUE
      DLA2   = DBLE(RA)*DBLE(RA)
      DL1SA2 = 1.D0/DLA2
C
      CALL SPEREE(NKSDIM,SP,GD
     S     ,NLA,NIBEG,NIEND,NJBEG,NJEND,NKSDIM)
      CALL SPEREE(NFLEV,SPTB,TB0
     S     ,NLA,NIBEG,NIEND,NJBEG,NJEND,NFLEV)
C
      DO JLAT = 1, NJ
         ILON = NILON(JLAT)
         DO JLEV = 1, NFLEV
            DO JLON = 1, ILON
               ZGDPSI(JLON,JLEV,JLAT)= UT0(JLON,JLEV,JLAT)
               ZGDCHI(JLON,JLEV,JLAT)= VT0(JLON,JLEV,JLAT)
            END DO
         END DO
      END DO
C
C     .  3.3 Obtain the full mass field (TT, Ps)
C
      DO JLAT = 1, NJ
        ILON = NILON(JLAT)
        ZCORIOLIS = 2.*ROMEGA*RMU(JLAT)
        DO JLEV = 1, NFLEVPTOT
          DO JLON = 1, ILON
            zp(JLON,JLEV,JLAT) = ZCORIOLIS*ZGDPSI(JLON,JLEV,JLAT)
          END DO
        END DO
      ENDDO
C
      DO JLAT = 1, NJ
        DO JLON = 1, NI
          zpsb(jlon,jlat)=0.0
          do jlev=1,NFLEVPTOT
            zpsb(jlon,jlat)=zpsb(jlon,jlat) +
     +        PtoT(NFLEV+1,jlev,klatptot(jlatbin))*zp(jlon,jlev,jlat)
          enddo
        END DO
      END DO
C
      DO JLAT = 1, NJ
         ZCORIOLIS = 2.*ROMEGA*RMU(JLAT)
         ILON = NILON(JLAT)
         DO JLEV = 1, NFLEV
            DO JLON = 1, ILON
               TB0(JLON,JLEV,JLAT)= ZCORIOLIS*TB0(JLON,JLEV,JLAT)
            END DO
         END DO
      END DO
c
c Multiply by std dev and add balanced and unbalanced components
c
      CALL FGERR('M')
      DO JLAT = 1, NJ
        DO JLEV = 1, NFLEV
          ILON = NILON(JLAT)
          DO JLON = 1, ILON
            TB0(JLON,JLEV,JLAT)=
     +        TB0(JLON,JLEV,JLAT)*RGSIGTB(JLAT,JLEV)
            TT0(JLON,JLEV,JLAT)=
     +        TT0(JLON,JLEV,JLAT)+TB0(JLON,JLEV,JLAT)
          END DO
        END DO
      END DO
      DO JLAT = 1, NJ
        ILON = NILON(JLAT)
        DO JLON = 1, ILON
          ZPSB(JLON,JLAT)=
     +        ZPSB(JLON,JLAT)*RGSIGPSB(JLAT)
          gps0(JLON,1,JLAT)=
     +        gps0(JLON,1,JLAT)+zpsb(JLON,JLAT)
        END DO
      END DO
c
cpik
!$OMP PARALLEL DO PRIVATE(DLNORMPSI,DLNORMCHI,ILON,JLEV,JLAT,JLON)
      DO JLEV = 1, NFLEV
        DO JLAT = 1, NJ
            DLNORMPSI = RGSIGUU(JLAT,JLEV)
            DLNORMCHI = RGSIGVV(JLAT,JLEV)
            ILON = NILON(JLAT)
            DO JLON = 1, ILON
              ZGDPSI(JLON,JLEV,JLAT)
     S             = ZGDPSI(JLON,JLEV,JLAT)*DLNORMPSI
              ZGDCHI(JLON,JLEV,JLAT)
     S             = ZGDCHI(JLON,JLEV,JLAT)*DLNORMCHI
            END DO
          END DO
        END DO
!$OMP END PARALLEL DO
c
      IF(LBALDIV) THEN
        CALL DIVBAL(zgdpsi,zgdchi)
      ENDIF
C
C     .  3.4 Spectral transform all fields
C
 340  CONTINUE
        DO JLAT = 1, NJ
           ILON = NILON(JLAT)
           DO JLEV = 1, NFLEV
              DO JLON = 1, ILON
                 UT0(JLON,JLEV,JLAT) = ZGDPSI(JLON,JLEV,JLAT)
                 VT0(JLON,JLEV,JLAT) = ZGDCHI(JLON,JLEV,JLAT)
              END DO
           END DO
        END DO
C
C Apply 3D amplification factor after constructing full variables
C
        DO JLAT = 1, NJ
          DO JLEV = 1, NKGDIM
            DO JLON = 1, NI
              GD(JLON,JLEV,JLAT)=
     +          GD(JLON,JLEV,JLAT)*damplibg(JLON,JLEV,JLAT)
            END DO
          END DO
        END DO
        DO JLAT = 1, NJ
          DO JLEV = 1, NFLEV
            DO JLON = 1, NI
              TB0(JLON,JLEV,JLAT)=
     +          TB0(JLON,JLEV,JLAT)*damplibg(JLON,JLEV,JLAT)
            END DO
          END DO
        END DO
C
        CALL REESPE(NKSDIM,SP,GD
     S     ,NLA,NIBEG,NIEND,NJBEG,NJEND,NKSDIM)
C
C     .  3.5 Rederive the vorticity and divergence from PSI and CHI
C
!$OMP PARALLEL DO
        DO JLEV = 1, NFLEV
           DO JLA = 1, NLA
              SPVOR(JLA,1,JLEV)  = SPVOR(JLA,1,JLEV)*DL1SA2*RNNP1(JLA)
              SPVOR(JLA,2,JLEV)  = SPVOR(JLA,2,JLEV)*DL1SA2*RNNP1(JLA)
              SPDIV(JLA,1,JLEV)  = SPDIV(JLA,1,JLEV)*DL1SA2*RNNP1(JLA)
              SPDIV(JLA,2,JLEV)  = SPDIV(JLA,2,JLEV)*DL1SA2*RNNP1(JLA)
           END DO
        END DO
!$OMP END PARALLEL DO

c transform from VOR,DIV to winds in physical space
      if(lstagwinds) then
         write(nulout,*) 'USING STAGWINDS!'
         call stagwinds(nulout) ! Winds are put directly on GEM's staggered grid
      else
         call spgd
      end if
c
C COMBINE LATITUDE BINS TO OBTAIN SINGLE GLOBAL 3D STATE INCREMENT
c
      ZWINDOW(:)=0.0d0
      CALL SUWINLATBIN(ZWINDOW,JLATBIN)

      DO JLAT = 1,NJ
         ILON = NILON(JLAT)
         DO JLEV = 1, NKSDIM
            DO JLON = 1, ILON
               ZGD(JLON,JLEV,JLAT)= ZGD(JLON,JLEV,JLAT)+ZWINDOW(JLAT)*GD(JLON,JLEV,JLAT)
            END DO
         END DO
      END DO
      DO JLAT = 1,NJ
         DO JLEV = 1, NFLEV
            DO JLON = 1, NI
               TB_out(JLON,JLEV,JLAT)= TB_out(JLON,JLEV,JLAT)+ZWINDOW(JLAT)*TB0(JLON,JLEV,JLAT)
            END DO
         END DO
      END DO
C END LOOP ON JLATBIN
      ENDDO
c RESULTS LEFT IN GD!!!!
      DO JLAT = 1,NJ
         ILON = NILON(JLAT)
         DO JLEV = 1, NKSDIM
            DO JLON = 1, ILON
               GD(JLON,JLEV,JLAT)= ZGD(JLON,JLEV,JLAT)
            END DO
         END DO
      END DO
      do jk = 1,nflev
        do jj = 1,nj
          do ji = 1,ni
            zp(ji,jk,jj) = tt0(ji,jk,jj)
          enddo
        enddo
      enddo
      call maxmin(zp,ni,nj,nflev,zmin,zmax,
     &              idum1,idum2,idum3,idum4,'spa2gd      ',
     &              'T0')
C
C     9. Deallocate local arrays
C
 900  CONTINUE
      IF(NANALVAR.GE.2) THEN
         CALL HPDEALLC(PXGDPSI,IERR,1)
         CALL HPDEALLC(PXGDCHI,IERR,1)
      ENDIF
C
      write(nulout,*) 'end of spa2gd!!!'
      call flush(nulout)

      RETURN
      END