SUBROUTINE SUCORNS2 1,7
        IMPLICIT NONE
#if defined (DOC)
*
***s/r  SUCORNS2 - produce matrix that scales and rotates controls according to
*                  eigenmodes/values of CORNS to create new control vector that
*                  diagonalize the Jb term
*                - this matrix is phi*lambda^-0.5 where phi and lambda are the
*                  eigenvectors and eigenvalues of the background covariance matrix
*                - called from SUCOV when NANALVAR=4
*                - matrix used in SPA2GD, SPA2GDAD
*               ***Version of SUCORNS adapted only for NANALVAR=4    
*               ***for new PtoT approach with localized Tb correlations
*               ***Created May 2008 by M. Buehner 
*
*Author  : M. Buehner November 97
*Revision:
*          S. Pellerin *ARMA/SMC May 2000
*                   . Check for negative eigenvalues
*          JM Belanger CMDA/SMC  Aug 2000 
*                   . 32 bits conversion
*                    (MXMA8, DSYEV instead of MXMA, SSYEV)
*          M. Buehner ARMA/EC, May 2008
*                   . Application of localization using the
*                     5'th order function from Gaspari and Cohn.
*                   . Added calc and output of CORVERT
*                   . Introduction of Tb-related correlations
*          Y.J. Rochon ARQX/EC March 2009
*                   . sucorns2 can now also be used as sucorns.
*                   . Included decomposition by uncorrelated sets of
*                     correlated block matrices (forming smaller
*                     square matrices) to reduce computational time. (IBLOCK=1)
*                   . Replaced some DO loops by F90 statements
*                   . Removed attempt to read from NULBGST (which gave ikey=-12)
*                   . Call to MXMA8 with use of the eigenvalue square roots
*                     replaced to take advantage of 'diagonal' eigenvalue
*                     square roor matrix. More computational effeciency.
*                   . Second call to MXMA8 replaced by use of MATMUL also
*                     to increase computational speed.
*           Bin He   *ARMA/MRB*   NOv. 2011
*                   . Replaced MAMX8 with DGEMUL of ESSL. 
*
*
*Comments:
*
*       1. It is assumed that the order of variables in CORNS is:
*          3D analysis variables followed by 2D variables 
*          followed by extra 3D variables (e.g. Tb). 
*
*          Addendum: The block correlation mapping array 'icorel' has the 
*          ordering of the 2D variables at the end of the array, following 
*          the extra 3D variables. This difference with CORNS is taken into 
*          account in the code. (YJR)
*
*       2. The choice between (a) taking square roots of the entire undecomposed
*          correlation matrices (for each wavenumber) and (b) taking square 
*          roots of uncorrelated components of correlated matrix blocks is  
*          made by the value of IBLOCK set below (values 0 and 1 respectively). 
*
*          The mapping array 'icorel' set below identifies the correlated
*          variables. (YJR)
*
*       3. For NKSDIM2>NKSDIM, RVLOCPSITT=RVLOCPSI=RVLOCBALT is recommended
*          but is not enforced. 
*
*       4. For NKSDIM2>NKSDIM:
*
*          If IBLOCK=1, when the correlated blocks do not include blocks from
*          any extra 3D variables (e.g. Tb), the resulting square root of that
*          subset is made to be square and symmetric. Otherwise, it will be
*          non-square and non-symmetric (e.g. for (PP,Tb)). The different 
*          subsets are then assembled to form the final non-square square 
*          root matrices.
*
*          If IBLOCK=0, the entire square root matrix is non-square and
*          non-symmetric
*
*          For NKSDIM2=NKSDIM, the square root matrices are all square and
*          symmetric. (YJR)
*
*          Aside: Symmetry is imposed via the second call to MXMA8 (replaced
*          by MATMUL) for multiplication by the eigenvectors
*
*       5. Normalization of the (PP,Tb) correlation matrices prior to taking
*          the square roots was introduced to improve matrix conditionning 
*          (un-doing of the normalization is done afterwards). This improvement
*          was applied only for the IBLOCK=1 case. (YJR)
*
*
#endif
#include "pardim.cdk"
#include "comdim.cdk"
#include "comlun.cdk"
#include "comcorr.cdk"
#include "comstate.cdk"
#include "comcva.cdk"  
#include "comgem.cdk"
#include "comleg.cdk"
#include "comcst.cdk"
#include "comspg.cdk" 
#include "rpnstd.cdk"
*
c
      real*8, allocatable, dimension(:,:) :: eigen,eigen2,
     &                                    result,result2
      real*8, allocatable, dimension(:) :: zwork,eigenv,sqrtin,sqrtin2
C
      INTEGER IJ,II,II2,IN,IBLOCK,IBLOCK3D,IBLOCK2D,IB,IC,IJ1,ISQ
      INTEGER IC2,IB2,ID2,IB1,INF,INF2,ib3,ib4,ib5,ib11,iextra3d,isquare
      INTEGER IER,ILWORK,INFO,iulcorvert,IKEY,JLATBIN
      INTEGER KLATPTOT(NLATBIN)
      INTEGER icorel(NVSP+NVSAUX+10+NVSP2D,NVSP+NVSAUX+10+NVSP2D)
      INTEGER isum3d,isum3d2,isum2d
C
      REAL*8 ZFACT,ZFACT2,ZCORIOLIS,ZPSIPS(NFLEV)
      REAL*8 ZPSI(NFLEV,NFLEV),ZWINDOW(NJ),ZFACTTB(NJ,NFLEV),ZFACTPSB(NJ)
C           
      EXTERNAL DSYEV
C
C     Variables used during localization
C
      integer jk1,jk2
      real*8 ztlen,zlc,zcorr,zr,zprof(jpnflev),zps
C
      real*8 corvertt(NKSDIM2,NKSDIM2),corns2(NKSDIM2,NKSDIM2),
     &       corvert2(NKSDIM2,NKSDIM2),xnorm2(NKSDIM2),
     &       xnorm(NKSDIM2,0:ntrunc),xwork(10*nksdim2)
C
C     Additional parameters associated to application of Tb correlations
C     in CORNS
C
      real*8 ztt(nflev,nflev,ntrunc+1),ztpsi(nflev,nflev,ntrunc+1)
C
      write(NULOUT,*)'  ***************************************'
C
C     Set flag to indicate how square root is to be done.
C   
C         0 - Taking square root of entire CORNS at one time for each IN 
C         1 - Disassembling CORNS in uncorrelated sets of correlated blocks 
C             for taking square roots (and re-assembling afterwards).
C
      IBLOCK=1
C 
C     Initialize number of different block types.
C
      IBLOCK3D=NVSP+NVSAUX 
      IEXTRA3D=0
      if (NKSDIM2.gt.NKSDIM) THEN
         IEXTRA3D=(NKSDIM2-NKSDIM)/NFLEV
         IBLOCK3D=IBLOCK3D+IEXTRA3D
      END IF
      IBLOCK2D=NVSP2D
C
C     Initialize identification of null cross-correlation blocks
C
      icorel(:,:)=0
C         
C     To validate IBLOCK=1 in being able to replicate IBLOCK=0, uncomment  
C     the following record and comment out the remaining 5+2 'icorel' setting
C     records found afterwards.
C
c      icorel(1,1:IBLOCK3D+IBLOCK2D)=1
C
      do ii=1,IBLOCK3D+IBLOCK2D
         icorel(ii,ii)=1
      end do
C
C     Identify T' <-> Ps' cross-correlation block
C
      icorel(3,IBLOCK3D+1)=1
      icorel(IBLOCK3D+1,IBLOCK3D+1)=0 ! acccounts for later merging of blocks
C
C     Set work pressures
C
      zps = 101000. 0
      call calcpres(ZPROF,vhybinc,nflev,ZPS,rptopinc
     &             ,rprefinc,rcoefinc,1) 
      zprof(1:nflev)=log(zprof(1:nflev))
C
C     Set PtoT matrix identifiers. (relevant only for IEXTRA3D > 0)
C      
      if(nlatbin.eq.1) then
        klatptot(1)=1
      elseif(nlatbin.eq.3) then
        klatptot(1)=1
        klatptot(2)=nj/2
        klatptot(3)=nj
      endif
C
      DO JLATBIN=1,NLATBIN
C
C     Set up Tb <-> PSI cross-correlation matrix blocks if requested
C
      if (IEXTRA3D.GT.0) then
C
         WRITE(NULOUT,*) '  SUCORNS2 --- Set Tb <-> PSI correlations'
c
         icorel(1,iblock3d)=1
         icorel(iblock3d,iblock3d)=0 ! acccounts for later merging of blocks
c
         write(nulout,*) 'KLATPTOT=',jlatbin,klatptot(jlatbin)
         DO II=1,NFLEV
         DO II2=1,NFLEVPTOT
             write(622,*) ii,ii2,klatptot(jlatbin),
     &               ptot(ii,ii2,KLATPTOT(JLATBIN))
         enddo
         enddo
c     
c        Explicitly compute the balanced temperature and temperature-psi 
c        correlations
c
         DO IN=0,NTRUNC
           DO IJ=1,NFLEV
           DO II=1,NFLEV
              ZTPSI(II,IJ,IN+1)=0.0
              ZTT(II,IJ,IN+1)=0.0
           END DO
           END DO
           DO IJ=1,NFLEVPTOT
           DO II=1,NFLEV
              DO II2=1,NFLEVPTOT
                ZTPSI(II,IJ,IN+1)=ZTPSI(II,IJ,IN+1)
     &                           +PTOT(II,II2,klatptot(jlatbin))
     &                           *CORNS(II2,IJ,IN,jlatbin)
              ENDDO
           ENDDO
           ENDDO
           IF(NFLEVPTOT.lt.NFLEV) THEN
              DO IJ=(NFLEVPTOT+1),NFLEV
              DO II=1,NFLEV
                 ZTPSI(II,IJ,IN+1)=ZTPSI(II,NFLEVPTOT,IN+1)
              ENDDO
              ENDDO
           ENDIF
           DO IJ=1,NFLEV
           DO II=1,NFLEV
               DO II2=1,NFLEVPTOT
                 ZTT(II,IJ,IN+1)=ZTT(II,IJ,IN+1)
     &                          +ZTPSI(II,II2,IN+1)
     &                          *PTOT(IJ,II2,klatptot(jlatbin))
               ENDDO
           ENDDO
           ENDDO
         ENDDO
         write(620,*) ZTT
         write(621,*) ZTPSI
c
c        Fill in blocks for balance temperature
c
         do in=0,ntrunc
            DO IJ=1,NFLEV
            DO II=1,NFLEV
               CORNS(NKSDIM+II,NKSDIM+IJ,IN,jlatbin)=ZTT(II,IJ,IN+1)
               CORNS(       II,NKSDIM+IJ,IN,jlatbin)=ZTPSI(IJ,II,IN+1)
               CORNS(NKSDIM+II,       IJ,IN,jlatbin)=ZTPSI(II,IJ,IN+1)
            ENDDO
            ENDDO
         enddo
C
C        Save un-localized PSI correlations
C
         DO JK2 = 1, NFLEV
         DO JK1 = 1, NFLEV
            ZPSI(JK1,JK2) = 0.0
            DO IN = 0, NTRUNC
               ZPSI(JK1,JK2) = ZPSI(JK1,JK2)+((2*IN+1)*CORNS(JK1,JK2,IN,JLATBIN))
            ENDDO
         ENDDO
         ENDDO
C
      end if
C
      WRITE(NULOUT,*) '  SUCORNS2 --- Apply localization to CORRNS. Bin: ',jlatbin
C
C     Apply localization to CORRNS.
C     Loop over 3D variables
C
      DO IB=1,IBLOCK3D
C
C        Specify length scale (in units of ln(Pressure))
C        Note: In the 3D-Var v10.2.1+, ztlen can be level dependent 
C        (labelled as zlocprof).
C
         if (IB.EQ.1) then
            ztlen=RVLOCPSI
            if (jlatbin.eq.2) ztlen=3.0D0
         else if (IB.EQ.2) then
            ztlen=RVLOCCHI 
            if (jlatbin.eq.2) ztlen=3.0D0
         else if (IB.EQ.3) then
            ztlen=RVLOCUNBALT
            if (jlatbin.eq.2) ztlen=2.0D0
         else if (IB.EQ.4) then     
            ztlen=RVLOCLQ 
            if (jlatbin.eq.2) ztlen=2.0D0
         else if (IB.eq.IBLOCK3D-IEXTRA3D+1) then
            ztlen=RVLOCBALT
            if (jlatbin.eq.2) ztlen=2.0D0 
         else if (IB.le.IBLOCK3D-IEXTRA3D) then
            ztlen=RVLOCTR(IB-4)
         else
            write(nulout,*) '  SUCORNS2 warning: ZTLEN not found: ',IB
         end if
C
         write(nulout,*) '  SUCORNS2: ztlen for I= ',IB,' is ',ztlen
C
         if (ib.gt.iblock3d-iextra3d) then
            ij=(ib-1)*nflev+iblock2d
         else
            ij=(ib-1)*nflev
         end if
         if(ZTLEN.gt.0.001) then   
C
C          Apply 5'th order localization function (from Gaspari and Cohn)
C
           ZLC=ZTLEN/2.0
           do jk1=1,nflev
            do jk2=1,nflev
              ZR = abs(zprof(jk2) - zprof(jk1))
              if(ZR.le.ZLC) then
                zcorr=-0.250*(ZR/ZLC)**5+0.5*(ZR/ZLC)**4
     +                +0.625*(ZR/ZLC)**3-(5.0/3.0)*(ZR/ZLC)**2+1.0
              elseif(ZR.le.(2.0*ZLC)) then
                zcorr=(1.0/12.0)*(ZR/ZLC)**5-0.5*(ZR/ZLC)**4
     +                +0.625*(ZR/ZLC)**3+(5.0/3.0)*(ZR/ZLC)**2
     +                -5.0*(ZR/ZLC)+4.0-(2.0/3.0)*(ZLC/ZR)
              else
                zcorr= 0.0
              endif
              if(zcorr.lt.0.0) zcorr=0.0
              ij1=jk2+ij
              ii2=jk1+ij
              do in=0,ntrunc
                corns(ii2,ij1,IN,jlatbin)=
     +           corns(ii2,ij1,IN,jlatbin)*zcorr
              enddo
            enddo
           enddo
C
cc         if (ib.eq.3.or.ib.eq.iblock3d-iextra3d+1) then
C
C             Apply additional localization regarding the model lid level
C             for T' and Tb
C
cc            ZLC=RVLOCTTOP/2.0
cc            jk1=1 
cc            do jk2=1,nflev
cc               ZR = abs(zprof(jk2) - zprof(jk1))
cc               if(ZR.le.ZLC) then
cc                zcorr=-0.250*(ZR/ZLC)**5+0.5*(ZR/ZLC)**4
cc     +                   +0.625*(ZR/ZLC)**3-(5.0/3.0)*(ZR/ZLC)**2+1.0
cc              elseif(ZR.le.(2.0*ZLC)) then
cc                zcorr=(1.0/12.0)*(ZR/ZLC)**5-0.5*(ZR/ZLC)**4
cc     +                   +0.625*(ZR/ZLC)**3+(5.0/3.0)*(ZR/ZLC)**2
cc     +                   -5.0*(ZR/ZLC)+4.0-(2.0/3.0)*(ZLC/ZR)
cc              else
cc                zcorr= 0.0
cc              endif   
cc              if(zcorr.lt.0.0) zcorr=0.0
cc              ij1=jk2+ij
cc              ii2=jk1+ij
cc              do in=0,ntrunc
cc                 corns(ii2,ij1,IN,jlatbin)=
cc     +            corns(ii2,ij1,IN,jlatbin)*zcorr
cc              enddo
cc            enddo
cc         end if
C             
           if (ib.eq.iblock3d-iextra3d+1) then
C                
C            Apply localization to Tb <-> Psi cross-correlation
C
             ZLC=RVLOCPSITT/2.0 
             if (jlatbin.eq.2) ZLC=3.0D0/2.0  
             do jk1=1,nflev
               do jk2=1,nflev
                 ZR=abs(zprof(jk2) - zprof(jk1))
                 if(ZR.le.ZLC) then
                   zcorr=-0.250*(ZR/ZLC)**5+0.5*(ZR/ZLC)**4
     +                   +0.625*(ZR/ZLC)**3-(5.0/3.0)*(ZR/ZLC)**2+1.0
                 elseif(ZR.le.(2.0*ZLC)) then
                   zcorr=(1.0/12.0)*(ZR/ZLC)**5-0.5*(ZR/ZLC)**4  
     +                   +0.625*(ZR/ZLC)**3+(5.0/3.0)*(ZR/ZLC)**2
     +                   -5.0*(ZR/ZLC)+4.0-(2.0/3.0)*(ZLC/ZR)
                 else
                   zcorr= 0.0
                 endif
                 if(zcorr.lt.0.0) zcorr=0.0
                 ij1=jk2+ij
                 ii2=jk1+ij
                 do in=0,ntrunc
                   corns(jk1,ij1,IN,jlatbin)=
     +                corns(jk1,ij1,IN,jlatbin)*zcorr
                   corns(ii2,jk2,IN,jlatbin)=
     +                corns(ii2,jk2,IN,jlatbin)*zcorr
                 enddo
                enddo
              enddo
           end if
C
           if (ib.eq.3) then
C
C             Also apply localization to T' <-> ps' cross-correlation
C
              ZLC=ZTLEN/2.0
              jk1=nflev
              do jk2=1,nflev
                ZR=abs(zprof(jk2) - zprof(jk1))
                if(ZR.le.ZLC) then
                  zcorr=-0.250*(ZR/ZLC)**5+0.5*(ZR/ZLC)**4
     +                +0.625*(ZR/ZLC)**3-(5.0/3.0)*(ZR/ZLC)**2+1.0
                elseif(ZR.le.(2.0*ZLC)) then
                  zcorr=(1.0/12.0)*(ZR/ZLC)**5-0.5*(ZR/ZLC)**4
     +                +0.625*(ZR/ZLC)**3+(5.0/3.0)*(ZR/ZLC)**2
     +                -5.0*(ZR/ZLC)+4.0-(2.0/3.0)*(ZLC/ZR)
                else
                  zcorr= 0.0
                endif
                if(zcorr.lt.0.0) zcorr=0.0
                ij1=jk2+(ib-1)*nflev
                ii=(IBLOCK3D-IEXTRA3D)*nflev         
                ii2=ii+1
                do in=0,ntrunc
                   corns(ii2,ij1,IN,jlatbin)=
     +                  corns(ii2,ij1,IN,jlatbin)*zcorr
                   corns(ij1,ii2,IN,jlatbin)=
     +                  corns(ij1,ii2,IN,jlatbin)*zcorr
                end do
              end do
           end if
         endif     
C
      END DO
C
C     Identify normalization factors
C
      DO JK2 = 1, NKSDIM2
         xnorm(jk2,0:ntrunc)=sqrt(CORNS(JK2,JK2,0:ntrunc,jlatbin))
         where (xnorm(jk2,0:ntrunc).lt.1.E-15) xnorm(jk2,0:ntrunc)=1.0D0
      ENDDO
C
      if (.true.) then   ! Must be .true. when NKSDIM2>NKSDIM
C
C       Compute total vertical correlations (after localization)
C
        DO JK2 = 1, NKSDIM2
          DO JK1 = 1, NKSDIM2
            CORVERT(JK1,JK2) = 0.0
            DO IN = 0, NTRUNC
              CORVERT(JK1,JK2) = CORVERT(JK1,JK2)+((2*IN+1)
     &                          *CORNS(JK1,JK2,IN,jlatbin))
            ENDDO
          ENDDO
        ENDDO
C
        write(701,*) corvert
        write(702,*) zpsi
        if(.true.) then
C
          DO JK2 = 1, NKSDIM2
             xnorm2(jk2)=sqrt(corvert(jk2,jk2))
             if (xnorm2(jk2).lt.1.E-15) xnorm2(jk2)=1.0D0
          ENDDO
C
          iulcorvert = 0
          ierr = fnom(iulcorvert,'corvert.fst','RND',0)
          ierr = fstouv(iulcorvert,'RND')
C
          ini = nksdim2
          inj = nksdim2
          ink = 1
          ip1 = jlatbin 
          ip2 = ntrunc
          ip3=0
          ig1=0
          ig2=0
          ig3=0
          ig3=0
          inpas=0
          ideet=0
          idateo=0
          inbits=16
          clnomvar= 'ZV'
          cletiket= 'CORVERT'
          idatyp  = 5
C
          ierr = vfstecr
(corvert, xnorm, -inbits, iulcorvert, idateo
     &         , ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar,
     &         clnomvar,cletiket,clgrtyp,ig1, ig2, ig3, ig4, idatyp,
     &         .true.)
cc          ierr = vfstlir(corvert,iulcorvert,INI,INJ,INK
cc     S       ,idateo,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
C
          if(.false.) then
C
             DO JK2 = 1, NKSDIM2
                corvertt(jk2,1:nksdim2)=corvert(jk2,1:nksdim2)
     &            /xnorm2(jk2)/xnorm2(1:nksdim2)
             END DO
C
             ierr = vfstecr
(corvertt, xnorm, -inbits, iulcorvert, idateo
     &         , ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar,
     &         clnomvar,'CORVERTN',clgrtyp,ig1, ig2, ig3, ig4, idatyp,
     &         .true.)
C 
             clnomvar='ZZ'
             DO IN = 0, NTRUNC
               do ij=1,nksdim2 
                  corns2(ij,1:nksdim2)=corns(ij,1:nksdim2,in,jlatbin)
     &                /(xnorm(ij,in)*xnorm(1:nksdim2,in))
               end do 
C 
               ip2=in
               ierr = vfstecr
(corns2,xwork, -inbits,iulcorvert,idateo, 
     &           ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar, 
     &           clnomvar,'CORNS   ',clgrtyp,ig1, ig2, ig3, ig4, idatyp, 
     &           .true.) 
             end do
C
          end if
C
          ierr = fstfrm(iulcorvert)
          ierr = fclos(iulcorvert)
        endif
      end if
C
      IF (IEXTRA3D.GT.0) THEN
C
C       Modify RGSIGTB to obtain correct sigma_Tb
C  
        ZWINDOW(:)=0.0d0
        CALL SUWINLATBIN(ZWINDOW,JLATBIN)
        do jk1=1,nflev
           zfact=corvert(jk1+nksdim,jk1+nksdim)
           do ij=1,nj
              zcoriolis = abs(2.*romega*rmu(ij))
              if(zfact.gt.0.0.and.zcoriolis.ne.0.0) then
                 zfact2=1/(zfact*zcoriolis*zcoriolis)
              else  
                 zfact2=0.0
              endif
              write(801,*) jk1,ij,zwindow(ij),zfact2
              zfacttb(ij,jk1)=zfacttb(ij,jk1)+(zwindow(ij)*zwindow(ij)*zfact2)
           enddo
        enddo
C 
C       Modify RGSIGPSB to obtain correct sigma_PSb
C
        do ij=1,nj
          do jk2=1,nflevptot
             zpsips(jk2)=0.0d0
             do jk1=1,nflevptot
                zpsips(jk2)=zpsips(jk2)+ptot(NFLEV+1,jk1,klatptot(jlatbin))*zpsi(jk1,jk2)
             enddo
          enddo  
          zfact=0.0d0
          do jk1=1,nflevptot
             zfact=zfact+ptot(NFLEV+1,jk1,klatptot(jlatbin))*zpsips(jk1)
          enddo
          zcoriolis = abs(2.*romega*rmu(ij))
          if (zfact.gt.0.0.and.zcoriolis.ne.0.0) then
              zfact2=1/(zfact*zcoriolis*zcoriolis)
          else
              zfact2=0.0 
          endif
          write(802,*) ij,zwindow(ij),zfact2
          zfactpsb(ij)=zfactpsb(ij)+(zwindow(ij)*zwindow(ij)*zfact2)
        enddo
C
      end if
C
      end do ! end loop on jlatbin
C
      IF (IEXTRA3D.GT.0) THEN
C
C        Modify RGSIGTB and RGSIGPSB to obtain correct 
C        sigma_Tb and sigma_Psb
C
         do ij=1,nj
            if(zfactpsb(ij).gt.0.0) then
               rgsigpsb(ij)=rgsigpsb(ij)*sqrt(zfactpsb(ij))
            else
              rgsigpsb(ij)=0.0
            endif
            do jk1=1,nflev
               if(zfacttb(ij,jk1).gt.0.0) then
                  rgsigtb(ij,jk1)=rgsigtb(ij,jk1)*sqrt(zfacttb(ij,jk1))
               else   
                  rgsigtb(ij,jk1)=0.0
               endif
            enddo
         enddo
      end if
C
      WRITE(NULOUT,*)'  SUCORNS2 --- Calculate CORNS^(0.5)'
C
C     Calculate CORNS^(0.5) for each total wave number
C
      DO JLATBIN=1,NLATBIN
C
      IF (IBLOCK.EQ.0) THEN
C
C     Memory allocation for taking the square root.
C
      if (jlatbin.eq.1) then
C
      ILWORK=2*4*NKSDIM2
      allocate(zwork(2*4*NKSDIM2)) 
      allocate(result(NKSDIM2,NKSDIM2))
      allocate(result2(NKSDIM2,NKSDIM))
      allocate(eigenv(NKSDIM2))
      allocate(eigen(NKSDIM2,NKSDIM2))
      allocate(sqrtin(NKSDIM2))
      allocate(eigen2(NKSDIM2,NKSDIM2))
      allocate(sqrtin2(NKSDIM2))
C
      end if
C
C     Decompose complete matrices
C
      DO IN=0,NTRUNC
C
        EIGEN(1:NKSDIM2,1:NKSDIM2)=CORNS(1:NKSDIM2,1:NKSDIM2,IN,jlatbin)
C
C       1.  CALCULATE EIGENVALUES AND EIGENVECTORS.
C
        CALL DSYEV('V','U',NKSDIM2, EIGEN,NKSDIM2, EIGENV,
     +             ZWORK, ILWORK, INFO )
C
        SQRTIN(:)=0.0D0
        SQRTIN2(:)=0.0D0 
        DO II=1,NKSDIM2
          if(EIGENV(II).ge.1.0e-25) then
            SQRTIN(II)=SQRT(EIGENV(II))
          else
            SQRTIN(II)=0.0D0
          endif
        END DO
C
C       Reverse the order of E-values and vectors
C
        DO IJ=1,NKSDIM2
           isq=NKSDIM2-IJ+1
           SQRTIN2(IJ)=SQRTIN(isq)
           EIGEN2(1:NKSDIM2,IJ)=EIGEN(1:NKSDIM2,ISQ)
        end do
C
C       2. CALCULATE THE PRODUCT AND STORE IT BACK IN CORNS
C
C       Note that the following is to give RESULT2(NKSDIM2,NKSDIM)
C
        IF (IEXTRA3D.GT.0) RESULT2(1:NKSDIM2,NKSDIM+1:NKSDIM2)=0.0D0
        DO II=1,NKSDIM
           RESULT2(1:NKSDIM2,II)=EIGEN2(1:NKSDIM2,II)*SQRTIN2(II)
        END DO
C
        if (IEXTRA3D.EQ.0) then
C
C          There are different solutions possible for the square root matrix.
C          The second call to MXMA8 below is to produce a symmetric square
C          root matrix which is the convention when producing square roots
C          of covariance matrices.
C
c           CALL MXMA8(RESULT2,1,NKSDIM2,  EIGEN2,NKSDIM2,1,  RESULT,1,
c     +          NKSDIM2,NKSDIM2,NKSDIM2,NKSDIM2)
c           CORNS(1:NKSDIM2,1:NKSDIM2,IN,jlatbin)=RESULT(1:NKSDIM2,1:NKSDIM2)
c
c           Call to MXMA8 replaced to improve computational speed.
c
            CORNS(1:NKSDIM2,1:NKSDIM2,IN,jlatbin)=MATMUL(RESULT2,EIGEN2)
        else
C
C          Square root matrix is set as non-square and non-symmetric
C
           CORNS(1:NKSDIM2,NKSDIM+1:NKSDIM2,IN,JLATBIN)=0.0D0
           CORNS(1:NKSDIM2,1:NKSDIM,IN,jlatbin)=RESULT2(1:NKSDIM2,1:NKSDIM)
        end if
C
      enddo
C
      if (jlatbin.eq.nlatbin) then
C
      deallocate(zwork)
      deallocate(result)
      deallocate(result2)
      deallocate(eigenv)
      deallocate(eigen)
      deallocate(sqrtin)
      deallocate(eigen2)
      deallocate(sqrtin2)
C
      end if
C
      ELSE
C
C     Decompose by sets of correlated blocks
C
      DO IB=1,IBLOCK3D
C
      if (icorel(ib,ib).eq.0) cycle
C
C     Identify number of principal diagonal sub-blocks to form the final block.
C     Non-diagonal sub-block contributions are identified through the
C     diagonal sub-block indices.
C     
      isum3d=sum(ICOREL(ib,ib:IBLOCK3D))
      isum3d2=0
      if (IEXTRA3D.ge.1.and.ib.lt.IBLOCK3D) 
     &   isum3d2=sum(ICOREL(ib,max(ib+1,IBLOCK3D-IEXTRA3D+1):IBLOCK3D))
      isum2d=sum(ICOREL(ib,IBLOCK3D+1:IBLOCK3D+IBLOCK2D)) 
      INF=NFLEV*isum3d+isum2d
      INF2=INF
C
      isquare=1    ! Square root matrix to be square and symmetric
      if (ib.le.IBLOCK3D-IEXTRA3D.and.isum3d2.gt.0) then
         INF2=NFLEV*(isum3d-isum3d2)+isum2d
         isquare=0 ! Square root matrix to be non-square and non-symmetric
      end if
C
C     Memory allocation for taking the square root.
C
      ILWORK=2*4*INF   
      allocate(zwork(2*4*INF))
      allocate(result(INF,INF))
      allocate(result2(INF,INF))
      allocate(eigenv(INF))
      allocate(eigen(INF,INF))
      allocate(sqrtin(INF))
      allocate(eigen2(INF,INF))
      allocate(sqrtin2(INF))
      if (ib.gt.iblock3d-iextra3d) then
         IC=(IB-1)*NFLEV+iblock2d
      else
         IC=(IB-1)*NFLEV
      end if
C
C     Decompose for each spectral coefficient
C
      DO IN=0,NTRUNC
C
C       1. Extract desired block matrix
C
        EIGEN(1:NFLEV,1:NFLEV)=CORNS(IC+1:IC+NFLEV,IC+1:IC+NFLEV,IN,jlatbin)
        if (isum3d2.gt.0) then   
C
C          Apply normalization for improved matrix conditionning
C
           do ij=1,nflev
             EIGEN(ij,1:NFLEV)=EIGEN(ij,1:NFLEV)
     &           /xnorm(ic+ij,in)
     &           /xnorm(IC+1:IC+NFLEV,in)
           end do
        end if
C
C       Add correlation contributions associated to 3D analysis fields
C
        id2=nflev
        if (isum3d-isum3d2.gt.1) then
           do ib2=ib+1,iblock3d-iextra3d
              if (icorel(ib,ib2).eq.1) then
C
C                Fill in diagonal block
C
                 ic2=(ib2-1)*nflev
                 EIGEN(ID2+1:ID2+NFLEV,ID2+1:ID2+NFLEV)=
     &              CORNS(IC2+1:IC2+NFLEV,IC2+1:IC2+NFLEV,IN,jlatbin)
C   
C                Fill in non-diagonal blocks
C
                 DO IJ=1,NFLEV
                 ib4=(ib-1)*nflev
                 ib5=0
                 IJ1=IJ+ID2
                 IB1=IJ+IC2
                 do ib3=ib,ib2-1
                    if (icorel(ib,ib3).eq.1) then
                      EIGEN(IB5+1:IB5+NFLEV,IJ1)=
     &                  CORNS(IB4+1:IB4+NFLEV,IB1,IN,jlatbin)
                      EIGEN(IJ1,IB5+1:IB5+NFLEV)=EIGEN(IB5+1:IB5+NFLEV,IJ1)
                      ib5=ib5+nflev
                    end if
                    ib4=ib4+nflev
                 end do
                 END DO
C
                 id2=id2+nflev
              end if
           end do
        end if 
C
C       Add correlation contributions associated to 2D fields
C       
        if (isum2d.gt.0) then
           id2=id2+1
           ic2=(iblock3d-iextra3d)*nflev
C
           do ib2=iblock3d+1,iblock3d+iblock2d
              ic2=ic2+1
              if (icorel(ib,ib2).eq.1) then
C
C                Fill in diagonal block
C
                 EIGEN(ID2,ID2)=CORNS(IC2,IC2,IN,jlatbin)
C
C                Fill in non-diagonal blocks
C
                 ib4=(ib-1)*nflev
                 ib5=0
                 do ib3=ib,iblock3d
                    if (icorel(ib,ib3).eq.1) then
                       EIGEN(IB5+1:IB5+NFLEV,ID2)=
     &                   CORNS(IB4+1:IB4+NFLEV,IC2,IN,jlatbin)
                       EIGEN(ID2,IB5+1:IB5+NFLEV)=EIGEN(IB5+1:IB5+NFLEV,ID2)
                       ib5=ib5+nflev
                     end if  
                     ib4=ib4+nflev
                 END DO
C
                 if (ib2.gt.iblock3d+1) then
                 do ib3=iblock3d+1,ib2-1
                    if (icorel(ib,ib3).eq.1) then
                       EIGEN(IB5+1,ID2)=CORNS(IB4+1,IC2,IN,jlatbin)
                       EIGEN(ID2,IB5+1)=EIGEN(IB5+1,ID2)
                       ib5=ib5+1
                     end if
                     ib4=ib4+1
                 END DO
                 end if
C
                 id2=id2+1
              end if
           end do
        id2=id2-1
        end if
C
C       Add correlation contributions associated to other correlated 3D fields
C
        if (isum3d2.gt.0) then
           do ib2=iblock3d-iextra3d+1,iblock3d
              if (icorel(ib,ib2).eq.1) then
C
C                Fill in diagonal block
C
                 ic2=(ib2-1)*nflev+iblock2d
                 EIGEN(ID2+1:ID2+NFLEV,ID2+1:ID2+NFLEV)=
     &              CORNS(IC2+1:IC2+NFLEV,IC2+1:IC2+NFLEV,IN,jlatbin)
C 
C                Apply normalization for improved matrix conditionning
C          
                 do ij=1,nflev
                    EIGEN(ID2+1:ID2+NFLEV,ID2+ij)=
     &                EIGEN(ID2+1:ID2+NFLEV,ID2+ij)
     &                /(xnorm(IC2+1:IC2+NFLEV,in)*xnorm(IC2+ij,in))
                 end do
C
C                Fill in non-diagonal blocks
C
                 DO IJ=1,NFLEV
                 ib4=(ib-1)*nflev
                 ib5=0
                 IJ1=IJ+ID2
                 IB1=IJ+IC2
                 do ib3=ib,ib2-1
                    if (icorel(ib,ib3).eq.1) then
                      EIGEN(IB5+1:IB5+NFLEV,IJ1)=
     &                  CORNS(IB4+1:IB4+NFLEV,IB1,IN,jlatbin)
                      EIGEN(IJ1,IB5+1:IB5+NFLEV)=EIGEN(IB5+1:IB5+NFLEV,IJ1)
C  
C                     Apply normalization for improved matrix conditionning
C
                      EIGEN(IB5+1:IB5+NFLEV,IJ1)=
     &                     EIGEN(IB5+1:IB5+NFLEV,IJ1)
     &                      /xnorm(IB1,in)
     &                      /xnorm(IB4+1:IB4+NFLEV,in)
                      EIGEN(IJ1,IB5+1:IB5+NFLEV)=                      
     &                     EIGEN(IJ1,IB5+1:IB5+NFLEV)
     &                      /xnorm(IB1,in)
     &                      /xnorm(IB4+1:IB4+NFLEV,in)
C
                      ib5=ib5+nflev
                    end if
                    ib4=ib4+nflev
                 end do
                 END DO
C
                 id2=id2+nflev
              end if
           end do
        end if
C
C       2.  CALCULATE EIGENVALUES AND EIGENVECTORS.
C
        CALL DSYEV('V','U',INF, EIGEN, INF, EIGENV,
     +             ZWORK, ILWORK, INFO )
C
        SQRTIN(:)=0.0D0
        SQRTIN2(:)=0.0D0 
        DO II=1,INF
          if(EIGENV(II).ge.1.0E-25) then
            SQRTIN(II)=SQRT(EIGENV(II))
          else
            SQRTIN(II)=0.0D0
          endif
        END DO
C
C       Reverse the order of E-values and vectors
C
        DO IJ=1,INF
           isq=INF-IJ+1
           SQRTIN2(IJ)=SQRTIN(isq)
           EIGEN2(1:INF,IJ)=EIGEN(1:INF,ISQ)
        end do
C
C       3. CALCULATE THE PRODUCT AND STORE IT BACK IN CORNS
C
C       Note that the following is to give RESULT2(INF,INF2)
C
        IF (INF2.LT.INF) RESULT2(1:INF,INF2+1:INF)=0.0D0
        DO II=1,INF2
           RESULT2(1:INF,II)=EIGEN2(1:INF,II)*SQRTIN2(II)
        END DO
C
        IF (ISQUARE.EQ.1) then    ! INF2=INF
C
C          There are different solutions possible for the square root matrix.
C          The following multiplication is to produce a symmetric square
C          root matrix which is the convention when producing square roots
C          of covariance matrices.
C
c           CALL MXMA8(RESULT2,1,INF,  EIGEN2,INF,1,  RESULT,1,
c     +          INF,INF,INF,INF)
C
C          MXMA8 replaced by faster MATMUL 
C
           RESULT=MATMUL(RESULT2,EIGEN2)
C
        ELSE   ! INF2 < INF
C          
C          Square root matrix is set as non-square and non-symmetric
C
           RESULT(1:INF,INF2+1:INF)=0.0D0
           RESULT(1:INF,1:INF2)=RESULT2(1:INF,1:INF2)
        END IF
C
C       4. Transfer square root to CORNS
C
        CORNS(IC+1:IC+NFLEV,IC+1:IC+NFLEV,IN,jlatbin)=
     &       RESULT(1:NFLEV,1:NFLEV)
        if (isum3d2.gt.0) then   ! Undo earlier applied normalization 
           do ij=1,nflev
             CORNS(IC+ij,IC+1:IC+NFLEV,IN,jlatbin)=
     &           CORNS(IC+ij,IC+1:IC+NFLEV,IN,jlatbin)
     &           *xnorm(ic+ij,in)
           end do
        end if
C
C       Set contributions associated to 3D analysis fields
C
        id2=nflev
        if (isum3d-isum3d2.gt.1) then
           do ib2=ib+1,iblock3d-iextra3d
              if (icorel(ib,ib2).eq.1) then
C
C                Fill in diagonal block
C
                 ic2=(ib2-1)*nflev
                 CORNS(IC2+1:IC2+NFLEV,IC2+1:IC2+NFLEV,IN,jlatbin)=
     &                 RESULT(ID2+1:ID2+NFLEV,ID2+1:ID2+NFLEV)
C
C                Fill in non-diagonal blocks
C
                 DO IJ=1,NFLEV
                 ib4=(ib-1)*nflev
                 ib5=0 
                 IJ1=IJ+ID2   
                 IB1=IJ+IC2
                 do ib3=ib,ib2-1
                    if (icorel(ib,ib3).eq.1) then
                       CORNS(IB1,IB4+1:IB4+NFLEV,IN,jlatbin)=
     &                     RESULT(IJ1,IB5+1:IB5+NFLEV)
                       CORNS(IB4+1:IB4+NFLEV,IB1,IN,jlatbin)=
     &                     RESULT(IB5+1:IB5+NFLEV,IJ1) 
                       ib5=ib5+nflev
                    end if
                    ib4=ib4+nflev
                 end do
                 END DO   
C
                 id2=id2+nflev
              end if
           end do
        end if
C
C       Set contributions associated to 2D fields
C
        if (isum2d.gt.0) then
           id2=id2+1
           ic2=(iblock3d-iextra3d)*nflev
C
           do ib2=iblock3d+1,iblock3d+iblock2d
              ic2=ic2+1
              if (icorel(ib,ib2).eq.1) then
C
C                Fill in diagonal block
C
                 CORNS(IC2,IC2,IN,jlatbin)=RESULT(ID2,ID2)
C
C                Fill in non-diagonal blocks
C
                 ib4=(ib-1)*nflev
                 ib5=0
                 do ib3=ib,iblock3d
                    if (icorel(ib,ib3).eq.1) then
                       CORNS(IC2,IB4+1:IB4+NFLEV,IN,jlatbin)=
     &                     RESULT(ID2,IB5+1:IB5+NFLEV)
                       CORNS(IB4+1:IB4+NFLEV,IC2,IN,jlatbin)=
     &                     RESULT(IB5+1:IB5+NFLEV,ID2)
                       ib5=ib5+nflev
                     end if
                     ib4=ib4+nflev
                 END DO
C
                 if (ib2.gt.iblock3d+1) then
                 do ib3=iblock3d+1,ib2-1
                    if (icorel(ib,ib3).eq.1) then
                       CORNS(IC2,IB4+1,IN,jlatbin)=RESULT(ID2,IB5+1)
                       CORNS(IB4+1,IC2,IN,jlatbin)=RESULT(IB5+1,ID2)
                       ib5=ib5+1
                     end if
                     ib4=ib4+1
                 END DO
                 end if
C
                 id2=id2+1
              end if  
           end do
           id2=id2-1
        end if
C
C       Set contributions associated to other correlated 3D fields
C       
        if (isum3d2.gt.0) then
           do ib2=iblock3d-iextra3d+1,iblock3d
              if (icorel(ib,ib2).eq.1) then
C
C                Fill in diagonal block
C
                 ic2=(ib2-1)*nflev+iblock2d
                 CORNS(IC2+1:IC2+NFLEV,IC2+1:IC2+NFLEV,IN,jlatbin)=0.0D0
C
C                Fill in non-diagonal blocks
C
                 DO IJ=1,NFLEV
                 ib4=(ib-1)*nflev
                 ib5=0
                 IJ1=IJ+ID2
                 IB1=IJ+IC2
                 do ib3=ib,ib2-1
                    if (icorel(ib,ib3).eq.1) then
                       CORNS(IB1,IB4+1:IB4+NFLEV,IN,jlatbin)=
     &                     RESULT(IJ1,IB5+1:IB5+NFLEV)
                       CORNS(IB4+1:IB4+NFLEV,IB1,IN,jlatbin)=
     &                     RESULT(IB5+1:IB5+NFLEV,IJ1)
C
C                      Undo normalization
C
                       CORNS(IB1,IB4+1:IB4+NFLEV,IN,jlatbin)=
     &                     CORNS(IB1,IB4+1:IB4+NFLEV,IN,jlatbin)
     &                      *xnorm(IB1,in)
                       CORNS(IB4+1:IB4+NFLEV,IB1,IN,jlatbin)=
     &                     CORNS(IB4+1:IB4+NFLEV,IB1,IN,jlatbin)
     &                      *xnorm(IB1,in)
C
                       ib5=ib5+nflev
                    end if
                    ib4=ib4+nflev
                 end do
                 END DO
C
                 id2=id2+nflev
              end if
           end do
        end if
C
      enddo
C
      deallocate(zwork)
      deallocate(result)
      deallocate(result2)
      deallocate(eigenv)
      deallocate(eigen)
      deallocate(sqrtin)
      deallocate(eigen2)
      deallocate(sqrtin2)
C
      enddo
C
C     Go through 1x1 blocks (assume no cross-correlation between 
C     remaining 2D variables)
C
C
      ic=(iblock3d-iextra3d)*nflev
      DO IB=1,IBLOCK2D
         IF (ICOREL(IBLOCK3D+IB,IBLOCK3D+IB).eq.0) THEN
C           Square root component already computed
         ELSE
            II2=IC+IB
            CORNS(II2,II2,0:NTRUNC,jlatbin)=
     &        sqrt(CORNS(II2,II2,0:NTRUNC,jlatbin))
         END IF
      END DO
C
      END IF
C
      if (.false.) then
C
C         Write square root and re-constructed correlation matrices
C         for diagnostic purpose only.
C
          iulcorvert=0
          ierr = fnom(iulcorvert,'sqrtroot.fst','RND',0)
          ierr = fstouv(iulcorvert,'RND')
C 
          ini = nksdim2
          inj = nksdim2
          ink = 1
          ip3=0
          ip1 = jlatbin
          ig1=0
          ig2=0
          ig3=0
          ig3=0
          inpas=0
          ideet=0
          idateo=0
          inbits=16
          clnomvar='ZZ'
          cletiket= 'SQRTROOT'
          idatyp  = 5
C
          write(nulout,*) '  SUCORNS2: Writing square root matrices'
C
          CORVERT2(:,:) = 0.0D0
          do in=0,ntrunc
C
             corvert(1:nksdim2,1:nksdim2)=
     &                 corns(1:nksdim2,1:nksdim2,in,jlatbin)
C
             corvertt=transpose(corvert)
             
             CALL DGEMUL(corvert,NKSDIM2,'N',corvertt,NKSDIM2,'N',corns2,
     +             NKSDIM2,NKSDIM2,NKSDIM2,NKSDIM2)  
!             CALL MXMA8(corvert,1,NKSDIM2,corvertt,1,NKSDIM2,corns2,1,
!     +           NKSDIM2,NKSDIM2,NKSDIM2,NKSDIM2)
C
             ip2=in
             ierr = vfstecr
(corvert,xwork, -inbits,iulcorvert,idateo,
     &         ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar,
     &         clnomvar,cletiket,clgrtyp,ig1, ig2, ig3, ig4, idatyp,
     &         .true.)
C
C
             CORVERT2(:,:) = CORVERT2(:,:)+((2*IN+1)*CORNS2(:,:))
             do ij=1,nksdim2
                corns2(ij,1:nksdim2)=corns2(ij,1:nksdim2)
     &                /(xnorm(ij,in)*xnorm(1:nksdim2,in))
             end do
C
             ierr = vfstecr
(corns2,xwork, -inbits,iulcorvert,idateo,
     &         ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar,
     &         clnomvar,'CORNS2  ',clgrtyp,ig1, ig2, ig3, ig4, idatyp,
     &         .true.)
C
          end do
C
          ip2=0
          clnomvar='ZV'
          ierr = vfstecr
(corvert2,xwork, -inbits,iulcorvert,idateo,  
     &         ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar,   
     &         clnomvar,'CORVERT2',clgrtyp,ig1, ig2, ig3, ig4, idatyp,
     &         .true.)
C
          DO JK2 = 1, NKSDIM2
             corvert2(jk2,1:nksdim2)=corvert2(jk2,1:nksdim2)
     &          /xnorm2(jk2)/xnorm2(1:nksdim2)
          END DO
C
          ierr = vfstecr
(corvert2, xwork, -inbits, iulcorvert, idateo
     &         , ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar, 
     &         clnomvar,'CORVERTN2',clgrtyp,ig1, ig2, ig3, ig4, idatyp,
     &         .true.)
C
          ierr = fstfrm(iulcorvert)
          ierr = fclos(iulcorvert)
C
      end if
C
      END DO  ! End of loop on JLATBIN
C
      RETURN
      END