!--------------------------------------- LICENCE BEGIN -----------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer,
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer
!version 3 or (at your option) any later version that should be found at:
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software;
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec),
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------

!--------------------------------------------------------------------------
! MODULE bMatrixHI (Background-error Covariance Matrix estimated
!                       using lagged forecast differences or ensemble 
!                       members and based on horizontally homogeneous/isotropic
!                       correlations.  prefix="bhi")
!
! Purpose: Performs transformation from control vector to analysis increment 
!          using the spatially localized ensemble covariance matrix
!
! Subroutines:
!    bhi_setup (public)
!    bhi_BSqrt (public)
!    bhi_BSqrtAd (public)
!    cain
!    cainAd
!
! Dependencies:
!    globalSpectralTransform
!--------------------------------------------------------------------------

MODULE BmatrixHI 1,9
  use mpivar_mod
  use MathPhysConstants_mod
  use earthConstants_mod
  use gridStateVector_mod
  use globalSpectralTransform
  use gaussGrid_mod
  use horizontalCoord_mod
  use verticalCoord_mod
  use varNameList_mod
  implicit none
  save
  private

  ! public procedures
  public :: bhi_Setup,bhi_BSqrt,bhi_BSqrtAd,bhi_Finalize,bhi_expandToMPIglobal,bhi_reduceToMPIlocal
  public :: bhi_getScaleFactor


  logical             :: initialized = .false.
  integer             :: nj_l,ni_l
  integer             :: nlev_M,nlev_T,nlev_T_even,nkgdim,nkgdim2,nkgdimSqrt
  integer             :: ntrunc,nla_mpiglobal,nla_mpilocal
  integer             :: cvDim_mpilocal,cvDim_mpiglobal
  logical             :: squareSqrt
  integer             :: gstID, gstID2
  integer             :: nlev_bdl
  type(struct_vco),pointer :: vco_anl
  logical             :: is_staggered

  real(8),allocatable :: tantheta(:,:)
  real(8),allocatable :: PtoT(:,:,:)

  real(8),pointer     :: rgsig(:,:)
  real(8),pointer     :: rgsiguu(:,:),rgsigvv(:,:),rgsigtt(:,:),rgsigtb(:,:),rgsigq(:,:)
  real(8),pointer     :: rgsigps(:),rgsigpsb(:)
  real(8),allocatable :: tgstdbg(:,:)

  real(8),allocatable :: corns(:,:,:)
  real(8),allocatable :: rstddev(:,:)

  ! originally from common blocks and possibly from the namelist:
  integer,parameter   :: maxNumLevels=200
  real(8)             :: scaleFactor(maxNumLevels)
  real(8)             :: scaleFactorLQ(maxNumLevels)
  logical             :: scaleTG
  real(8)             :: rcscltg(1)=100000.d0
  real(8)             :: rfacthum=1.0d0
  real(8)             :: rlimsuptg=3.0d0
  logical             :: llimtg=.true.
  integer             :: nulbgst=0
  integer             :: nLevPtoT
  real(8)             :: rvlocbalt   = 6.0d0
  real(8)             :: rvlocpsi    = 6.0d0
  real(8)             :: rvlocchi    = 6.0d0
  real(8)             :: rvlocpsitt  = 6.0d0
  real(8)             :: rvlocunbalt = 4.0d0
  real(8)             :: rvloclq     = 4.0d0
  real(8)             :: rlimlv_bdl  = 85000.0d0
  integer             :: numModeZero  ! number of eigenmodes to set to zero

  ! this should come from state vector object
  integer             :: numvar3d
  integer             :: numvar2d
  integer             :: nspositVO 
  integer             :: nspositDI 
  integer             :: nspositTT 
  integer             :: nspositQ
  integer             :: nspositPS 
  integer             :: nspositTG

  real(8), pointer    :: pressureProfile_M(:),pressureProfile_T(:)

  integer             :: mymBeg,mymEnd,mymSkip,mymCount
  integer             :: mynBeg,mynEnd,mynSkip,mynCount
  integer             :: maxMyNla
  integer             :: myLatBeg,myLatEnd
  integer             :: myLonBeg,myLonEnd
  integer, pointer    :: ilaList_mpiglobal(:)
  integer, pointer    :: ilaList_mpilocal(:)

  integer,external    :: get_max_rss


CONTAINS


  SUBROUTINE BHI_setup(hco_in,vco_in,CVDIM_OUT) 1,17
    implicit none

    type(struct_hco),pointer :: hco_in
    type(struct_vco),pointer :: vco_in
    integer                  :: cvDim_out

    integer :: jlev, mpiMode, nulnam, ierr, fnom, fclos, fstouv, fstfrm
    integer :: jm, jn, status, latPerPE, lonPerPE, Vcode_anl
    logical :: llfound, lExists
    real(8) :: zps
    character(len=8) :: bFileName = './bgcov'

    NAMELIST /NAMBHI/ntrunc,scaleFactor,scaleFactorLQ,scaleTG,numModeZero,squareSqrt

    call tmg_start(15,'BHI_SETUP')

    vco_anl => vco_in
    nLev_M = vco_anl%nlev_M
    nLev_T = vco_anl%nlev_T
    ! need an even number of levels for spectral transform (gstID2)
    if(mod(nLev_T,2).ne.0) then
      nLev_T_even = nLev_T+1
    else
      nLev_T_even = nLev_T
    endif
    if(mpi_myid.eq.0) write(*,*) 'BHI_setup: nLev_M, nLev_T, nLev_T_even=',nLev_M, nLev_T, nLev_T_even

    status = vgd_get(vco_anl%vgrid,key='ig_1 - vertical coord code',value=Vcode_anl)
    if(Vcode_anl .eq. 5001) then
      is_staggered = .false.
    elseif(Vcode_anl .eq. 5002) then
      is_staggered = .true.
    else
      write(*,*) 'Vcode_anl = ',Vcode_anl
      call abort3d('bmatrixHI: unknown vertical coordinate type!')
    endif
    if(mpi_myid.eq.0) write(*,*) 'bmatrixHI: vertical coord is_staggered = ',is_staggered


    ! default values for namelist variables
    ntrunc = 108
    scaleFactor(:) = 1.0d0
    scaleFactorLQ(:) = 1.0d0
    scaleTG = .true.
    numModeZero = 0
    squareSqrt = .false.

    nulnam = 0
    ierr = fnom(nulnam,'./flnml','FTN+SEQ+R/O',0)
    read(nulnam,nml=nambhi,iostat=ierr)
    if(ierr.ne.0) call abort3d('bhi_setup: Error reading namelist')
    if(mpi_myid.eq.0) write(*,nml=nambhi)
    ierr = fclos(nulnam)

    do jlev = 1, max(nLev_M,nLev_T)
      if(scaleFactor(jlev).gt.0.0d0) then 
        scaleFactor(jlev) = sqrt(scaleFactor(jlev))
      else
        scaleFactor(jlev) = 0.0d0
      endif
    enddo

    if (sum(scaleFactor(1:max(nLev_M,nLev_T))).eq.0.0d0 ) then
      if(mpi_myid.eq.0) write(*,*) 'bmatrixHI: scaleFactor=0, skipping rest of setup'
      cvdim_out = 0
      initialized = .true.
      return
    end if

    do jlev = 1, max(nLev_M,nLev_T)
      if(scaleFactorLQ(jlev).gt.0.0d0) then 
        scaleFactorLQ(jlev) = sqrt(scaleFactorLQ(jlev))
      else
        scaleFactorLQ(jlev) = 0.0d0
      endif
    enddo

    numvar3d = 4
    numvar2d = 2

    nLevPtot = nLev_M-1 ! ignore streamfunction at hyb=1, since highly correlated with next level
    nspositVO = 1
    nspositDI = 1*nLev_M+1
    nspositTT = 2*nLev_M+1
    nspositQ  = 2*nLev_M+1*nLev_T+1
    nspositPS = 2*nLev_M+2*nLev_T+1
    nspositTG = 2*nLev_M+2*nLev_T+2
    nkgdim = nLev_M*2 + nLev_T*2 + numvar2d
    nkgdim2 = nkgdim + nLev_T
    if(squareSqrt) then
      nkgdimSqrt = nkgdim2
    else
      nkgdimSqrt = nkgdim
    endif
    nla_mpiglobal = (ntrunc+1)*(ntrunc+2)/2
    
    ni_l = hco_in%ni
    nj_l = hco_in%nj

    mpiMode = 4
    gstID  = gst_setup(ni_l,nj_l,ntrunc,mpiMode,nkgdim)
    gstID2 = gst_setup(ni_l,nj_l,ntrunc,mpiMode,nlev_T_even)
    if(mpi_myid.eq.0) write(*,*) 'BHI:returned value of gstID =',gstID
    if(mpi_myid.eq.0) write(*,*) 'BHI:returned value of gstID2=',gstID2

    call mpivar_setup_latbands(nj_l,latPerPE,myLatBeg,myLatEnd)
    call mpivar_setup_lonbands(ni_l,lonPerPE,myLonBeg,myLonEnd)

    call mpivar_setup_m(ntrunc,mymBeg,mymEnd,mymSkip,mymCount)
    call mpivar_setup_n(ntrunc,mynBeg,mynEnd,mynSkip,mynCount)

    call gst_ilaList_mpiglobal(ilaList_mpiglobal,nla_mpilocal,maxMyNla,gstID,mymBeg,mymEnd,mymSkip,mynBeg,mynEnd,mynSkip)
    call gst_ilaList_mpilocal(ilaList_mpilocal,gstID,mymBeg,mymEnd,mymSkip,mynBeg,mynEnd,mynSkip)

    ! compute mpilocal control vector size
    cvDim_mpilocal = 0
    do jm = mymBeg, mymEnd, mymSkip
      do jn = mynBeg, mynEnd, mynSkip
        if(jm.le.jn) then
          if(jm.eq.0) then
            ! only real component for jm=0
            cvDim_mpilocal = cvDim_mpilocal + 1*nkgdimSqrt
          else
            ! both real and imaginary components for jm>0
            cvDim_mpilocal = cvDim_mpilocal + 2*nkgdimSqrt
          endif
        endif
      enddo
    enddo
    cvDim_out = cvDim_mpilocal

    ! also compute mpiglobal control vector dimension
    call rpn_comm_allreduce(cvDim_mpilocal,cvDim_mpiglobal,1,"mpi_integer","mpi_sum","GRID",ierr)

    allocate(PtoT(nlev_T+1,nlev_M,nj_l))
    allocate(tantheta(nlev_M,nj_l))
    allocate(rgsig(nj_l,nkgdim))
    allocate(tgstdbg(ni_l,nj_l))
    rgsiguu => rgsig(1:nj_l,nspositVO:nspositVO+nlev_M-1)
    rgsigvv => rgsig(1:nj_l,nspositDI:nspositDI+nlev_M-1)
    rgsigtt => rgsig(1:nj_l,nspositTT:nspositTT+nlev_T-1)
    rgsigq  => rgsig(1:nj_l,nspositQ :nspositQ +nlev_T-1)
    rgsigps => rgsig(1:nj_l,nspositPS)
    allocate(rgsigtb(nj_l,nlev_T))
    allocate(rgsigpsb(nj_l))
    allocate(corns(nkgdim2,nkgdim2,0:ntrunc))
    allocate(rstddev(nkgdim2,0:ntrunc))

    zps = 101000.D0
    status = vgd_levels( vco_anl%vgrid, ip1_list=vco_anl%ip1_M, levels=pressureProfile_M, &
                         sfc_field=zps, in_log=.false.)
    status = vgd_levels( vco_anl%vgrid, ip1_list=vco_anl%ip1_T, levels=pressureProfile_T, &
                         sfc_field=zps, in_log=.false.)

    llfound = .false.
    nlev_bdl = 0
    do jlev = 1, nlev_M
      if(.not.llfound .and. (pressureProfile_M(jlev) .ge. rlimlv_bdl  )) then
        nlev_bdl = jlev
        llfound = .true.
      endif
    enddo

    inquire(file=bFileName,exist=lExists)
    IF ( lexists )then
      ierr = fnom(nulbgst,bFileName,'RND+OLD+R/O',0)
      if ( ierr .eq. 0 ) then
        ierr =  fstouv(nulbgst,'RND+OLD')
      else
        call abort3d('BHI_setup:NO BACKGROUND STAT FILE!!')
      endif
    endif

    call BHI_rdspPtoT

    call BHI_readcorns2

    call BHI_sutg

    call BHI_rdspstd_newfmt

    call BHI_scalestd

    call BHI_sucorns2

    ierr = fstfrm(nulbgst)
    ierr = fclos(nulbgst)

    if(mpi_myid.eq.0) write(*,*) 'END OF BHI_SETUP'

    initialized = .true.

    call tmg_stop(15)

  END SUBROUTINE BHI_setup



  subroutine bhi_getScaleFactor(scaleFactor_out) 2
    implicit none
    real(8) :: scaleFactor_out(:)
    integer :: jlev

    do jlev = 1, max(nLev_M,nLev_T)
      scaleFactor_out(jlev) = scaleFactor(jlev)
    enddo

  end subroutine bhi_getScaleFactor



  SUBROUTINE BHI_scalestd 1
    implicit none

    integer :: jlev, jlon, jlat, shift_level

    if(is_staggered) then
      shift_level = 1
    else
      shift_level = 0
    endif

    do jlev = 1, nlev_M
      do jlat = 1, nj_l
        rgsiguu(jlat,jlev) = scaleFactor(jlev+shift_level)*rgsiguu(jlat,jlev)
        rgsigvv(jlat,jlev) = scaleFactor(jlev+shift_level)*rgsigvv(jlat,jlev)
      enddo
    enddo
    do jlev = 1, nlev_T
      do jlat = 1, nj_l
        rgsigtt(jlat,jlev) = scaleFactor(jlev)*rgsigtt(jlat,jlev)
        rgsigq(jlat,jlev)  = scaleFactorLQ(jlev)*scaleFactor(jlev)*rgsigq(jlat,jlev)
        rgsigtb(jlat,jlev) = scaleFactor(jlev)*rgsigtb(jlat,jlev)
      enddo
    enddo
    do jlat = 1, nj_l
      rgsigpsb(jlat) = scaleFactor(max(nLev_M,nLev_T))*rgsigpsb(jlat)
      rgsigps(jlat)  = scaleFactor(max(nLev_M,nLev_T))*rgsigps(jlat)
    enddo
    ! User has the option to not scale down the STDDEV of TG (because underestimated in Benkf)
    if(scaleTG) then
    do jlat = 1, nj_l
      do jlon = 1, ni_l
        tgstdbg(jlon,jlat) = scaleFactor(max(nLev_M,nLev_T))*tgstdbg(jlon,jlat)
      enddo
    enddo
    endif

  END SUBROUTINE BHI_scalestd



  SUBROUTINE BHI_SUCORNS2 1,12
    implicit none

    real(8) :: eigenval(nkgdim2), eigenvec(nkgdim2,nkgdim2), result(nkgdim2,nkgdim2)
    real(8) :: eigenvalsqrt(nkgdim2), eigenvec2(nkgdim2,nkgdim2), eigenvalsqrt2(nkgdim2)

    integer :: jlat,jn,jk1,jk2,jk3,jr
    integer :: ilwork,info,klatPtoT
    integer :: iulcorvert, ikey, nsize

    real(8) :: zwork(2*4*nkgdim2)
    real(8) :: ztt(nlev_T,nlev_T,(ntrunc+1)),ztpsi(nlev_T,nlev_M,(ntrunc+1))
    real(8) :: ztlen,zcorr,zr,zpres1,zpres2
    real(8) :: zfact,zfact2,zcoriolis,zpsips(nLevPtoT)
    real(8) :: zpsi(nlev_M,nlev_M),zfacttb(nj_l,nlev_T),zfactpsb(nj_l)
    real(8) :: corvert(nkgdim2,nkgdim2)
    real(8),allocatable :: corns_temp(:,:,:)
    logical :: lldebug

    ! standard file variables
    integer :: ini,inj,ink, inpas, inbits, idatyp, ideet 
    integer :: ip1,ip2,ip3,ig1,ig2,ig3,ig4,iswa,ilength,idltf
    integer :: iubc,iextr1,iextr2,iextr3,ierr,ipak,ipas,ntrials
    integer :: iliste(100),idate(100),idimax,infon,iheures,idateo
    character(len=2)  :: cltypvar
    character(len=1)  :: clgrtyp
    character(len=4)  :: clnomvar
    character(len=12) :: cletiket
    integer :: vfstlir,vfstecr,fstprm,fstinf
    integer :: fnom,fstouv,fstfrm,fclos

    lldebug = .false.

    iulcorvert = 0
    if(mpi_myid==0) then
      ierr = fnom(iulcorvert,'corvert_modular.fst','RND',0)
      ierr = fstouv(iulcorvert,'RND')
    endif

    klatPtoT = 1
    zfactpsb(:) = 0.0d0
    zfacttb(:,:) = 0.0d0

    if(lldebug) then
      do jk1 = 1, nlev_T
        do jk2 = 1, nlevPtoT
          write(622,*) jk1,jk2,klatPtoT,PtoT(jk1,jk2,klatPtoT)
        enddo
      enddo
    endif

    ! explicitly compute the balanced temperature and temperature-psi correlations

    do jn = 0, ntrunc

      ztpsi(:,:,jn+1) = 0.0d0
      ztt(:,:,jn+1) = 0.0d0
      do jk1 = 1, nlevPtoT
        do jk2 = 1, nlev_T
          do jk3 = 1, nlevPtoT
            ztpsi(jk2,jk1,jn+1) = ztpsi(jk2,jk1,jn+1)+PtoT(jk2,jk3,klatPtoT)*corns(jk3,jk1,jn)
          enddo
        enddo
      enddo
      if(nlevPtoT.lt.nlev_M) then
        do jk1 = (nlevPtoT+1), nlev_M
          do jk2 = 1, nlev_T
            ztpsi(jk2,jk1,jn+1) = ztpsi(jk2,nlevPtoT,jn+1)
          enddo
        enddo
      endif
      do jk1 = 1, nlev_T
        do jk2 = 1, nlev_T
          do jk3 = 1, nlevPtoT
            ztt(jk2,jk1,jn+1) = ztt(jk2,jk1,jn+1)+ztpsi(jk2,jk3,jn+1)*PtoT(jk1,jk3,klatPtoT)
          enddo
        enddo
      enddo
    enddo

    if(lldebug) then
      write(620,*) ztt
      write(621,*) ztpsi
    endif

    ! fill in blocks for balance temperature

    do jn = 0, ntrunc
      do jk1 = 1, nlev_T
        do jk2 = 1, nlev_T
          corns(nkgdim+jk2,nkgdim+jk1,jn) = ztt(jk2,jk1,jn+1)
        enddo
      enddo
      do jk1 = 1, nlev_M
        do jk2 = 1, nlev_T
          corns(       jk1,nkgdim+jk2,jn) = ztpsi(jk2,jk1,jn+1)
          corns(nkgdim+jk2,       jk1,jn) = ztpsi(jk2,jk1,jn+1)
        enddo
      enddo
    enddo

    ! Save un-localized PSI correlations
    do jk2 = 1, nlev_M
      do jk1 = 1, nlev_M
        zpsi(jk1,jk2) = 0.0d0
        do jn = 0, ntrunc
          zpsi(jk1,jk2) = zpsi(jk1,jk2)+((2*jn+1)*corns(jk1,jk2,jn))
        enddo
      enddo
    enddo

    ! Apply vertical localization to corrns

    ! unbalanced temperature
    ztlen = rvlocunbalt
    if(ztlen.gt.0.0d0) then
      ! calculate 5'th order function (from Gaspari and Cohn)
      do jk1 = 1, nlev_T
        zpres1 = log(pressureProfile_T(jk1))
        do jk2 = 1, nlev_T
          zpres2 = log(pressureProfile_T(jk2))
          zr = abs(zpres2 - zpres1)
          zcorr = gasparicohn(ztlen,zr)
          do jn = 0, ntrunc
            corns(jk1+2*nlev_M,jk2+2*nlev_M,jn)  =   &
                 corns(jk1+2*nlev_M,jk2+2*nlev_M,jn)*zcorr
          enddo
        enddo
      enddo
    endif

    ! balanced temperature
    ztlen = rvlocbalt
    if(ztlen.gt.0.0d0) then
      ! calculate 5'th order function (from Gaspari and Cohn)
      do jk1 = 1, nlev_T
        zpres1 = log(pressureProfile_T(jk1))
        do jk2 = 1, nlev_T
          zpres2 = log(pressureProfile_T(jk2))
          zr = abs(zpres2 - zpres1)
          zcorr = gasparicohn(ztlen,zr)
          do jn = 0, ntrunc
            corns(jk1+nkgdim,jk2+nkgdim,jn)  =        &
                 corns(jk1+nkgdim,jk2+nkgdim,jn)*zcorr
          enddo
        enddo
      enddo
    endif

    ! streamfunction 
    ztlen = rvlocpsi    ! specify length scale (in units of ln(Pressure))
    if(ztlen.gt.0.0d0) then
      ! calculate 5'th order function (from Gaspari and Cohn)
      do jk1 = 1, nlev_M
        zpres1 = log(pressureProfile_M(jk1))
        do jk2 = 1, nlev_M
          zpres2 = log(pressureProfile_M(jk2))
          zr = abs(zpres2 - zpres1)
          zcorr = gasparicohn(ztlen,zr)
          do jn = 0, ntrunc
            corns(jk1,jk2,jn) = corns(jk1,jk2,jn)*zcorr
          enddo
        enddo
      enddo
    endif

    ! temp-psi cross-correlations
    ztlen = rvlocpsitt    ! specify length scale (in units of ln(Pressure))
    if(ztlen.gt.0.0d0) then
      ! calculate 5'th order function (from Gaspari and Cohn)
      do jk1 = 1, nlev_M
        zpres1 = log(pressureProfile_M(jk1))
        do jk2 = 1, nlev_T
          zpres2 = log(pressureProfile_T(jk2))
          zr = abs(zpres2 - zpres1)
          zcorr = gasparicohn(ztlen,zr)
          do jn = 0, ntrunc
            corns(jk1,jk2+nkgdim,jn) = corns(jk1,jk2+nkgdim,jn)*zcorr
            corns(jk2+nkgdim,jk1,jn) = corns(jk2+nkgdim,jk1,jn)*zcorr
          enddo
        enddo
      enddo
    endif

    ! velocity potential (unbalanced)
    ztlen = rvlocchi    ! specify length scale (in units of ln(Pressure))
    if(ztlen.gt.0.0d0) then
      ! calculate 5'th order function (from Gaspari and Cohn)
      do jk1 = 1, nlev_M
        zpres1 = log(pressureProfile_M(jk1))
        do jk2 = 1, nlev_M
          zpres2 = log(pressureProfile_M(jk2))
          zr = abs(zpres2 - zpres1)
          zcorr = gasparicohn(ztlen,zr)
          do jn = 0, ntrunc
            corns(jk1+nlev_M,jk2+nlev_M,jn) = corns(jk1+nlev_M,jk2+nlev_M,jn)*zcorr
          enddo
        enddo
      enddo
    endif

    ! cross-correlation t'-ps'
    if(.true.) then
    ztlen = rvlocunbalt    ! specify length scale (in units of ln(Pressure))
    if(ztlen.gt.0.0d0) then
      ! calculate 5'th order function (from Gaspari and Cohn)
      zpres1 = log(pressureProfile_T(nlev_T))
      do jk2 = 1, nlev_T
        zpres2 = log(pressureProfile_T(jk2))
        zr = abs(zpres2 - zpres1)
        zcorr = gasparicohn(ztlen,zr)
        do jn = 0, ntrunc
          corns(1+2*nlev_M+2*nlev_T,jk2+2*nlev_M,jn)  =       &
               corns(1+2*nlev_M+2*nlev_T,jk2+2*nlev_M,jn)*zcorr
          corns(jk2+2*nlev_M,1+2*nlev_M+2*nlev_T,jn)  =       &
               corns(jk2+2*nlev_M,1+2*nlev_M+2*nlev_T,jn)*zcorr
        enddo
      enddo
    endif
    endif

    ! humidity
    ztlen = rvloclq    ! specify length scale (in units of ln(Pressure))
    if(ztlen.gt.0.0d0) then
      ! calculate 5'th order function (from Gaspari and Cohn)
      do jk1 = 1, nlev_T
        zpres1 = log(pressureProfile_T(jk1))
        do jk2 = 1, nlev_T
          zpres2 = log(pressureProfile_T(jk2))
          zr = abs(zpres2 - zpres1)
          zcorr = gasparicohn(ztlen,zr)
          do jn = 0, ntrunc
            corns(jk1+2*nlev_M+nlev_T,jk2+2*nlev_M+nlev_T,jn)  =       &
                 corns(jk1+2*nlev_M+nlev_T,jk2+2*nlev_M+nlev_T,jn)*zcorr
          enddo
        enddo
      enddo
    endif

    ! compute total vertical correlations (including for balanced temperature)
    if(.true.) then
      do jk2 = 1, nkgdim2
        do jk1 = 1, nkgdim2
          corvert(jk1,jk2) = 0.0d0
          do jn = 0, ntrunc
            corvert(jk1,jk2) = corvert(jk1,jk2)+((2*jn+1)*corns(jk1,jk2,jn))
          enddo
        enddo
      enddo

      if(lldebug) then
        write(701,*) corvert
        write(702,*) zpsi
      endif

      if(mpi_myid == 0) then
        ikey = fstinf(NULBGST,ini,inj,ink,-1,'CORRNS',-1,0,-1,' ','ZZ')
        ierr = fstprm(ikey,idateo,ideet,inpas,ini,inj,ink, inbits        &
             ,idatyp,ip1,ip2,ip3,cltypvar,clnomvar,cletiket,clgrtyp      &
             ,ig1,ig2,ig3,ig4,iswa,ilength,idltf,iubc,iextr1,iextr2      &
             ,iextr3)

        ini = nkgdim2
        inj = nkgdim2
        ink = 1
        ip1 = 0
        ip2 = ntrunc
        ip3 = 0
        clnomvar = 'ZV'
        cletiket = 'CORVERT'
        idatyp = 5

        ierr = vfstecr(corvert, corvert, -inbits, iulcorvert, idateo    &
             , ideet,inpas, ini, inj, ink, ip1, ip2, ip3, cltypvar,     &
             clnomvar,cletiket,clgrtyp,ig1, ig2, ig3, ig4, idatyp,      &
             .true.)

      endif

      ! Modify RGSIGTB to obtain correct sigma_Tb
      do jk1 = 1, nlev_T
        zfact = corvert(jk1+nkgdim,jk1+nkgdim)
        do jlat = 1, nj_l
          zcoriolis = abs(2.d0*romega*gst_getrmu(jlat,gstID))
          if(zfact.gt.0.0d0.and.zcoriolis.ne.0.0d0) then
            zfact2 = 1.0d0/(zfact*zcoriolis*zcoriolis)
          else 
            zfact2 = 0.0d0
          endif
          zfacttb(jlat,jk1) = zfacttb(jlat,jk1)+zfact2
        enddo
      enddo

      ! Modify RGSIGPSB to obtain correct sigma_PSb
      do jlat = 1, nj_l
        do jk2 = 1, nlevPtoT
          zpsips(jk2) = 0.0d0
          do jk1 = 1, nlevPtoT
            zpsips(jk2) = zpsips(jk2)+PtoT(nlev_T+1,jk1,klatPtoT)*zpsi(jk1,jk2)
          enddo
        enddo
        zfact = 0.0d0
        do jk1 = 1, nlevPtoT
          zfact = zfact+PtoT(nlev_T+1,jk1,klatPtoT)*zpsips(jk1)
        enddo
        zcoriolis = abs(2.d0*romega*gst_getrmu(jlat,gstID))
        if(zfact.gt.0.0d0.and.zcoriolis.ne.0.0d0) then
          zfact2 = 1.0d0/(zfact*zcoriolis*zcoriolis)
        else 
          zfact2 = 0.0d0
        endif
        zfactpsb(jlat) = zfactpsb(jlat)+zfact2
      enddo
    endif

    ! Modify RGSIGTB and RGSIGPSB to obtain correct sigma_Tb and sigma_Psb
    do jlat = 1, nj_l
      if(zfactpsb(jlat).gt.0.0d0) then
        rgsigpsb(jlat) = rgsigpsb(jlat)*sqrt(zfactpsb(jlat))
      else
        rgsigpsb(jlat) = 0.0d0
      endif          
      do jk1 = 1, nlev_T
        if(zfacttb(jlat,jk1).gt.0.0d0) then
          rgsigtb(jlat,jk1) = rgsigtb(jlat,jk1)*sqrt(zfacttb(jlat,jk1))
        else
          rgsigtb(jlat,jk1) = 0.0d0
        endif
      enddo
    enddo

    ! compute square-root of corns for each total wavenumber
    allocate(corns_temp(nkgdim2,nkgdim2,0:ntrunc))
    corns_temp(:,:,:)=0.0d0
    do jn = mpi_myid, ntrunc, mpi_nprocs

      do jk1 = 1, nkgdim2
         do jk2 = 1, nkgdim2
            eigenvec(jk2,jk1) = corns(jk2,jk1,jn)
         enddo
      enddo

      ! CALCULATE EIGENVALUES AND EIGENVECTORS.
      ilwork = 4*nkgdim2*2
      if(squareSqrt) then
        call dsyev('V','U',nkgdim2,eigenvec,nkgdim2,eigenval,zwork,ilwork,info)
      else
        ! use old version of dsyev for backward compatibility
        call dsyev2('V','U',nkgdim2,eigenvec,nkgdim2,eigenval,zwork,ilwork,info)
      endif
      if(info.ne.0) then
        write(*,*) 'bhi_sucorns2: non-zero value of info =',info,' returned by dsyev for wavenumber ',jn
        call abort3d('BHI_SUCORNS')
      endif

      ! set selected number of eigenmodes to zero
      if(numModeZero.gt.0) then
        write(*,*) 'bhi_sucorns2: setting ',numModeZero,' eigenvalues to zero for wavenumber n=',jn
        write(*,*) 'bhi_sucorns2: original eigenvalues=',eigenval(:)
        do jk1 = 1, numModeZero
          eigenval(jk1) = 0.0d0
        enddo
        write(*,*) 'bhi_sucorns2: modified eigenvalues=',eigenval(:)
      endif

      do jk1 = 1, nkgdim2
        if(eigenval(jk1).lt.1.0d-15) then
          eigenvalsqrt(jk1) = 0.0d0
        else
          eigenvalsqrt(jk1) = sqrt(eigenval(jk1))
        endif
      enddo
 
      ! Reverse the order of E-Values if old formulation (for compatibility)
      if(.not.squareSqrt) then
        do jk1 = 1, nkgdim2
          eigenvalsqrt2(jk1) = eigenvalsqrt(nkgdim2-jk1+1)
          do jk2 = 1, nkgdim2
            eigenvec2(jk2,jk1) = eigenvec(jk2,nkgdim2-jk1+1)
          enddo
        enddo
        eigenvalsqrt(:) = eigenvalsqrt2(:)
        eigenvec(:,:) = eigenvec2(:,:)
      endif

      ! compute E * lambda^1/2
      result(:,:) = 0.0d0
      do jk1 = 1, nkgdimSqrt
         do jk2 = 1, nkgdim2
            result(jk2,jk1) = eigenvec(jk2,jk1)*eigenvalsqrt(jk1)
         enddo
      enddo

      ! compute (E * lambda^1/2) * E^T if new formulation
      if(squareSqrt) then
        do jk1 = 1, nkgdim2
          do jk2 = 1, nkgdim2
            do jk3 = 1, nkgdim2
              corns_temp(jk2,jk1,jn) = corns_temp(jk2,jk1,jn) + result(jk2,jk3)*eigenvec(jk1,jk3)
            enddo
          enddo
        enddo
      else
        corns_temp(:,:,jn) = result(:,:)
      endif

      !if(jn.eq.30) then
      !  write(200,*) corns(:,:,jn)
      !  write(201,*) corns_temp(:,:,jn)
      !  write(202,*) eigenval(:)
      !  write(203,*) eigenvec(:,:)
      !  call flush(200)
      !  call flush(201)
      !  call flush(202)
      !  call flush(203)
      !endif

    enddo ! jn

    nsize = nkgdim2*nkgdim2*(ntrunc+1)
    call rpn_comm_allreduce(corns_temp,corns,nsize,"mpi_double_precision","mpi_sum","GRID",ierr)
    deallocate(corns_temp)

    if(mpi_myid==0) then
      ierr = fstfrm(iulcorvert)
      ierr = fclos(iulcorvert)
    endif

  END SUBROUTINE BHI_SUCORNS2



  FUNCTION GASPARICOHN(ztlen,zr) 7

    real(8)  :: gasparicohn
    real(8)  :: ztlen,zr,zlc

    zlc = ztlen/2.0d0
    if(zr.le.zlc) then
      gasparicohn = -0.250d0*(zr/zlc)**5+0.5d0*(zr/zlc)**4             &
                  +0.625d0*(zr/zlc)**3-(5.0d0/3.0d0)*(zr/zlc)**2+1.0d0
    elseif(zr.le.(2.0d0*zlc)) then
      gasparicohn = (1.0d0/12.0d0)*(zr/zlc)**5-0.5d0*(zr/zlc)**4         &
                  +0.625d0*(zr/zlc)**3+(5.0d0/3.0d0)*(zr/zlc)**2       &
                  -5.0d0*(zr/zlc)+4.0d0-(2.0d0/3.0d0)*(zlc/zr)
    else
      gasparicohn = 0.0d0
    endif
    if(gasparicohn.lt.0.0d0) gasparicohn = 0.0d0

  END FUNCTION GASPARICOHN



  SUBROUTINE BHI_CALCCORR(zgd,pcscl,klev) 1,3
    implicit none
    integer :: klev
    real(8) :: zgd(myLonBeg:myLonEnd,klev,myLatBeg:myLatEnd)
    real(8) :: pcscl(klev)

    integer :: jlev, jlat, jlon
    real(8) :: zr, dlfac, dltemp, dln, dlcsurn, dlc, dlcorr

    ! parameters that define the correlation function
    integer :: ntoar = 3
    real(8) :: dlalpha = 0.2d0
    integer :: kcorrtyp = 1

    dlfac   = 1.d0/(1.d0+dlalpha)
    dln     = 1.d0*real(ntoar,8)
    dltemp  = (3.d0*(1.d0 + dlalpha))/(1.d0 + dlalpha/(dln*dln))
    dltemp  = dsqrt(dltemp)

    if (kcorrtyp.eq.1) then
      ! Gaussian correlation
      do  jlev = 1, klev
        dlc = 1.d0/dble(pcscl(jlev))
        dlc = 0.5d0*dlc*dlc
        do  jlat = myLatBeg, myLatEnd
          zr = ra * acos(gst_getRmu(jlat,gstID))
          dlcorr = dexp(-(zr**2)*dlc)
          do  jlon = myLonBeg, myLonEnd
            zgd(jlon,jlev,jlat) = dlcorr
          enddo
        enddo
      enddo
    elseif (kcorrtyp.eq.2) then
      ! Autoregressive (SOAR) correlation
      do jlev = 1, klev
        dlc = dltemp/dble(pcscl(jlev))
        dlcsurn = dlc/dln
        do jlat = myLatBeg, myLatEnd
          zr = ra * acos(gst_getRmu(jlat,gstID))
          dlcorr = (1.d0 + dlc*zr + zr*dlc*zr*dlc/3.d0)*dexp(-zr*dlc)    &
            + dlalpha*(1.d0 + dlcsurn*zr + zr*dlcsurn*zr*dlcsurn/3.d0)*dexp(-zr*dlcsurn)
          dlcorr = dlcorr*dlfac
          do jlon = myLonBeg, myLonEnd
            zgd(jlon,jlev,jlat) = dlcorr
          enddo
        enddo
      enddo
    else
      call abort3d('CALCCORR- Undefined correlation type')
    endif

  END SUBROUTINE BHI_calcCorr



  SUBROUTINE BHI_SUTG 1,9
    implicit none

    logical :: llpb
    integer :: ikey, jlat, jlon, jla, ezgprm, igdgid, ezqkdef
    integer :: jn, jm, ila_mpilocal, ila_mpiglobal, inlev, itggid, inmxlev, iset, nsize
    integer :: ezdefset, vezsint
    integer :: ip1style,ip1kind
    integer :: koutmpg
    real(8), allocatable :: dltg(:,:), tgstdbg_tmp(:,:)
    real(8) :: cortgg(nla_mpiglobal,2)
    real(8) :: rcscltg_vec(nlev_T_even)
    real(8) :: zabs, zpole, dlfac, dlcorr
    real(8) :: zsp_mpilocal(maxMyNla,2,nlev_T_even)
    real(8) :: zgd(myLonBeg:myLonEnd,nlev_T_even,myLatBeg:myLatEnd)
    real(8) :: zsp_mpiglobal(nla_mpiglobal,2,1)

    real(8),allocatable :: my_zsp_mpiglobal(:,:,:)
    ! standard file variables
    integer :: ini,inj,ink, inpas, inbits, idatyp, ideet
    integer :: ip1,ip2,ip3,ig1,ig2,ig3,ig4,iswa,ilength,idltf
    integer :: iubc,iextr1,iextr2,iextr3,ierr,ipak,ipas,ntrials
    integer :: iliste(100),idate(100),idimax,infon,iheures,idateo
    character(len=2)  :: cltypvar
    character(len=1)  :: clgrtyp
    character(len=4)  :: clnomvar
    character(len=12) :: cletiket
    integer :: vfstlir,vfstecr,fstprm,fstinf

    clnomvar = 'TG'
    idateo = -1
    inmxlev = 1
    ntrials = 1

    call getfldprm2(IP1, IP2, IP3, INLEV, CLETIKET, CLTYPVAR, ITGGID,  &
         clnomvar, idateo, inmxlev, nulbgst, ip1style, ip1kind,  &
         ntrials, koutmpg)

    ierr = ezgprm(itggid,CLGRTYP,INI,INJ,IG1,IG2,IG3,IG4)
    allocate(dltg(ini,inj))

    !write(*,*)'reading TG variances'

    ikey = vfstlir(dltg,koutmpg,ini,inj,ink,idateo,cletiket,ip1,   &
           ip2, ip3, cltypvar, clnomvar)

    if(clgrtyp == 'G' .and. ni_l == ini .and. nj_l == inj .and. ig1 == 0  &
          .and. ig2 ==0 .and. ig3 == 0 .and.ig4 == 0) then

      do jlat = 1, nj_l
        do jlon = 1,ni_l
          tgstdbg(jlon,jlat) = dltg(jlon,nj_l-jlat+1)
        enddo
      enddo

    elseif(clgrtyp == 'G' .and. ni_l == ini .and. nj_l == inj .and. ig1 ==   &
            0 .and. ig2 ==1 .and. ig3 == 0 .and.ig4 == 0) then
      do jlat = 1, nj_l
        do jlon = 1,ni_l
          tgstdbg(jlon,jlat) = dltg(jlon,jlat)
        enddo
      enddo

    else

       allocate(tgstdbg_tmp(ni_l,nj_l))
       ! Interpolate to a gaussian grid stored from North to South(IG2=1)
       ! First, from 'G' to 'G'
       igdgid = ezqkdef(ni_l, nj_l, 'G', 0, 0, 0, 0 ,0)
       iset = ezdefset(igdgid,itggid)
       ierr = vezsint(tgstdbg_tmp,dltg,ni_l,nj_l,1,ini,inj,1)
       ! Then, inverse the vector directly
       do jlat = 1, nj_l
          do jlon = 1,ni_l
             tgstdbg(jlon,jlat) = tgstdbg_tmp(jlon,nj_l-jlat+1)
          enddo
       enddo
       deallocate(tgstdbg_tmp)

    endif

    ! If specified in namelist, do not accept tg errors of more than value specified in namelist
    if ( llimtg ) then
       where ( tgstdbg > rlimsuptg) tgstdbg = rlimsuptg
    endif

    zgd(:,:,:) = 0.0d0
    zsp_mpilocal(:,:,:) = 0.0d0
    allocate(my_zsp_mpiglobal(nla_mpiglobal,2,1)) 
    my_zsp_mpiglobal(:,:,:) = 0.0d0

    do jla = 1, nla_mpiglobal
       cortgg(jla,1) = 0.0d0
       cortgg(jla,2) = 0.0d0
    enddo

    ! 4.2  Compute correlations in physical space
    rcscltg_vec(:) = rcscltg(1)
    call BHI_calccorr(zgd,rcscltg_vec,nlev_T_even)

    ! 4.3  Bring back the result in spectral space
    call gst_setID(gstID2)
    call gst_reespe4(zsp_mpilocal,zgd)

    ! and make the result mpiglobal
    do jm = mymBeg, mymEnd, mymSkip
      do jn = mynBeg, mynEnd, mynSkip
        if(jm.le.jn) then
          ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm
          ila_mpilocal  = ilaList_mpilocal(ila_mpiglobal)
          my_zsp_mpiglobal(ila_mpiglobal,:,1) = zsp_mpilocal(ila_mpilocal,:,1)
        endif
      enddo
    enddo
    nsize = 2*nla_mpiglobal
    call rpn_comm_allreduce(my_zsp_mpiglobal(:,:,1),zsp_mpiglobal(:,:,1),nsize,"mpi_double_precision","mpi_sum","GRID",ierr)
    deallocate(my_zsp_mpiglobal) 
    ! 4.4  Check positiveness
    llpb = .false.
    do jla = 1, ntrunc+1
      zabs = abs(zsp_mpiglobal(jla,1,1))
      llpb = llpb.or.((zsp_mpiglobal(jla,1,1).lt.0.).and.(zabs.gt.epsilon(zabs)))
    enddo
    if(llpb) then
      call abort3d(' AUTOCORRELATION  NEGATIVES')
    endif
    do jla = 1, ntrunc+1
      zsp_mpiglobal(jla,1,1) = abs(zsp_mpiglobal(jla,1,1))
    enddo

    zpole = 0.d0
    do  jla = 1, ntrunc+1
      jn = jla-1
      zpole = zpole + zsp_mpiglobal(jla,1,1)*sqrt((2.d0*jn+1.d0)/2.d0)
    enddo
    if(zpole.le.0.d0) then
      call abort3d('POLE VALUE NEGATIVE IN SUTG')
    endif
    do jla = 1, ntrunc+1
      zsp_mpiglobal(jla,1,1) = zsp_mpiglobal(jla,1,1)/zpole
      zsp_mpiglobal(jla,2,1) = zsp_mpiglobal(jla,2,1)/zpole
    enddo

    !  4.5  Correlation
    do jm = 0, ntrunc
      do jn = jm, ntrunc
        jla = gst_getNIND(jm,gstID) + jn - jm
        dlfac = 0.5d0/dsqrt((2*jn+1.d0)/2.d0)
        cortgg(jla,1) = dlfac * zsp_mpiglobal(jn+1,1,1)
        cortgg(jla,2) = dlfac * zsp_mpiglobal(jn+1,1,1)
      enddo
    enddo

    ! 5. For zonal modes : set to zero the imaginary part and set the correct factor 1.0 for the real part
    do jla = 1, ntrunc + 1
      cortgg(jla,1) = 0.5d0*cortgg(jla,1)
      cortgg(jla,2) = 0.0d0
    enddo

    ! 6. Result in corns array
    do jn = 0, ntrunc
      ila_mpiglobal = jn + 1
      corns(nspositTG,nspositTG,jn) = 2.d0*cortgg(ila_mpiglobal,1)
    enddo

    deallocate(dltg)
    !write(*,*)'DONE in SUTG'

  END SUBROUTINE BHI_sutg



  SUBROUTINE BHI_convol 1,4
    implicit none

    real(8) dlfact2,dlc,dsummed
    real(8) dtlen,zr,dlfact
    integer ilen,jn,jlat,jk
    real(8) zlegi(0:ntrunc, nj_l),zleg(0:ntrunc, nj_l),zsp(0:ntrunc,nkgdim),zgr(nj_l,nkgdim)
    real(8) dlwti(nj_l),zrmu(nj_l)

    integer inracp
    real(8) zpg(nj_l),zsia(nj_l),zrad(nj_l),zpgssin2(nj_l)
    real(8) zsinm1(nj_l),zsinm2(nj_l),zsin2(nj_l),zsinlat(nj_l)
    real(8) dlfact1, dln
    real(8) dlnorm(0:ntrunc)

    real(8)         :: RPORVO   = 6000.D3
    real(8)         :: RPORDI   = 6000.D3
    real(8)         :: RPORTT   = 3000.D3
    real(8)         :: RPORQ    = 3000.D3
    real(8)         :: RPORPS   = 3000.D3

    do jlat = 1, nj_l
       dlwti(jlat) = gst_getrwt(jlat,gstID)
       zrmu(jlat)  = gst_getrmu(jlat,gstID)
    end do

    do jlat = 1, nj_l
       zleg(0,jlat) = sqrt(0.5d0)
       zleg(1,jlat) = sqrt(1.5d0)*zrmu(jlat)
    enddo
    do jn = 0, ntrunc
       dln = 1.d0*real(jn,8)
       dlnorm(jn) = dsqrt((2.d0*dln + 1.d0)/2.d0)
    enddo

    do jn = 1, ntrunc-1
       dln = real(jn,8)
       dlfact1 = ((2.d0*dln+1.d0)/(dln+1.d0))*(dlnorm(jn+1)/dlnorm(jn))
       dlfact2 = (dln/(dln+1.d0))*(dlnorm(jn+1)/dlnorm(jn-1))
       do jlat = 1, nj_l
          zleg(jn+1,jlat) = dlfact1*zrmu(jlat)*zleg(jn,jlat) - dlfact2*zleg(jn-1,jlat)
       enddo
    enddo

    do jlat = 1, nj_l
       do jn = 0, ntrunc
         zlegi(jn,jlat) = zleg(jn,jlat)
       enddo
    enddo

!     1.2 CONVERT THE CORRELATIONS IN SPECTRAL SPACE INTO SPECTRAL
!         COEFFICIENTS OF THE CORRELATION FUNCTION AND FUNCTION TO BE
!         SELF-CONVOLVED
    do jn = 0, ntrunc
      dlfact = ((2.0d0*jn+1)/2.0d0)**0.25d0
      dlfact2 = ((2.0d0*JN +1.0d0)/2.0d0)**(0.25d0)
      do jk = 1, nkgdim
        zsp(jn,jk) = rstddev(jk,jn)*dlfact*dlfact2
      enddo
    enddo

    ! Transform to physical space
    call zleginv(zgr,zsp,zlegi,dlwti,ntrunc,nj_l,nkgdim,nj_l,nkgdim,ntrunc)

    ! Truncate in horizontal extent with Gaussian window
    do jk = 1, nkgdim
      if (jk.ge.nspositVO.and.jk.lt.nspositVO+nlev_M) then
        dtlen = rporvo
      elseif (jk.ge.nspositDI.and.jk.lt.nspositDI+nlev_M) then
        dtlen = rpordi
      elseif (jk.ge.nspositTT.and.jk.lt.nspositTT+nlev_T) then
        dtlen = rportt
      elseif (jk.ge.nspositQ.and.jk.lt.nspositQ+nlev_T) then
        dtlen = rporq
      elseif (jk.eq.nspositPS) then
        dtlen = rporps
      endif

      if(dtlen.gt.0.0d0) then
        dlc = 1.d0/dble(dtlen)
        dlc = 0.5d0*dlc*dlc
        do jlat = 1, nj_l
          zr = ra * acos(zrmu(jlat))
          dlfact = dexp(-(zr**2)*dlc)
          zgr(jlat,jk) = dlfact*zgr(jlat,jk)
        enddo
      endif

      !write(*,*) 'zeroing length (km)=',jk,dtlen/1000.0
    enddo

    ! Transform back to spectral space
    call zlegdir(zgr,zsp,zlegi,dlwti,ntrunc,nj_l,nkgdim,nj_l,nkgdim,ntrunc)

    ! Convert back to correlations
    do jk = 1, nkgdim
      do jn = 0, ntrunc
         zsp(jn,jk) = zsp(jn,jk)*(2.0d0/(2.0d0*jn+1.0))**(0.25d0)
      enddo
    enddo

    ! PUT BACK INTO RSTDDEV
    do jn = 0, ntrunc
      do jk = 1, nkgdim
         rstddev(jk,jn) = zsp(jn,jk)
      enddo
    enddo
 
    ! Re-normalize to ensure correlations
    do jk = 1, nkgdim
      dsummed = 0.d0
      do jn = 0, ntrunc
        dsummed = dsummed+ dble(rstddev(jk,jn)**2)*sqrt(((2.d0*jn)+1.d0)/2.d0)
      enddo
      dsummed = sqrt(dsummed)
      do jn = 0, ntrunc
        if(dsummed.gt.1.d-30) rstddev(jk,jn) = rstddev(jk,jn)/dsummed
      enddo
    enddo

    !     CONVERT THE SPECTRAL COEFFICIENTS OF THE CORRELATION FUNCTION
    !     .  BACK INTO CORRELATIONS OF SPECTRAL COMPONENTS
    do jn = 0, ntrunc
      dlfact = sqrt(0.5d0)*(1.0d0/((2.0d0*jn+1)/2.0d0))**0.25d0
      do jk = 1, nkgdim
        rstddev(jk,jn) = rstddev(jk,jn)*dlfact
      enddo
    enddo

  END SUBROUTINE BHI_convol



  SUBROUTINE BHI_setCrossCorr(kn) 1
    implicit none

    integer :: kn, jblock1, inbrblock, jblock2
    integer :: jk1, jk2, nlev_all(numvar3d), levOffset(numvar3d+1)

    inbrblock = numvar3d
    nlev_all(1) = nLev_M
    nlev_all(2) = nLev_M
    nlev_all(3) = nLev_T
    nlev_all(4) = nLev_T
    levOffset(1) = 0
    levOffset(2) = 1*nLev_M
    levOffset(3) = 2*nLev_M
    levOffset(4) = 2*nLev_M+1*nLev_T
    levOffset(5) = 2*nLev_M+2*nLev_T

    ! Set cross-variable correlations to 0 ...
    do jblock1 = 1, inbrblock
      do jblock2 = 1, inbrblock
        if (jblock1.ne.jblock2) then
          do jk2 = 1, nlev_all(jblock2)
            do jk1 = 1, nlev_all(jblock1)
              corns(jk1 + levOffset(jblock1),jk2 + levOffset(jblock2),kn) = 0.0d0
            enddo
          enddo
        endif
      enddo
    enddo

    ! ... but T'ln(ps') correlations
    do jk2 = 1, nkgdim
      do jk1 = levOffset(5)+1, levOffset(5)+numvar2d
        if ((jk1.ne.nspositPS.or.jk2.lt.nspositTT.or.    &
             jk2.ge.(nspositTT+nlev_T)).and.(jk1.ne.jk2)) then
          corns(jk1,jk2,kn) = 0.0d0
        endif
      enddo
    enddo

    do jk2 = levOffset(5)+1, levOffset(5)+numvar2d
      do jk1 = 1, nkgdim
        if ((jk2.ne.nspositPS.or.jk1.lt.nspositTT.or.   &
             jk1.ge.(nspositTT+nlev_T)) .and.(jk1.ne.jk2)) then
          corns(jk1,jk2,kn) = 0.0d0
        endif
      enddo
    enddo

  END SUBROUTINE BHI_setCrossCorr



  SUBROUTINE BHI_READCORNS2 1,8
    implicit none

    integer :: kip1
    integer :: jn, istdkey,icornskey
    integer :: iksdim,jcol,jrow,jblock,jlevo,jlevi
    real(8) :: zwork
    real(8), allocatable, dimension(:) :: zstdsrc
    real(8), allocatable, dimension(:,:) :: zcornssrc

    ! standard file variables
    integer :: ini,inj,ink, inpas, inbits, idatyp, ideet
    integer :: ip1,ip2,ip3,ig1,ig2,ig3,ig4,iswa,ilength,idltf
    integer :: iubc,iextr1,iextr2,iextr3,ierr,ipak,ipas
    integer :: iliste(100),idate(100),idimax,infon,iheures,idateo
    character(len=2)  :: cltypvar
    character(len=1)  :: clgrtyp
    character(len=4)  :: clnomvar
    character(len=12) :: cletiket
    integer :: vfstlir,vfstecr,fstprm,fstinf

    iksdim = 2*nlev_M+2*nlev_T+1    ! assume 4 3d variables and 1 2d variable (TG not included)
    allocate(zcornssrc(iksdim,iksdim))
    allocate(zstdsrc(iksdim))

    kip1 = -1

    do jn = 0, ntrunc

      ! Looking for FST record parameters..

      idateo = -1
      cletiket = 'RSTDDEV'
      ip1 = kip1
      ip2 = jn
      ip3 = -1
      cltypvar = 'X'
      clnomvar = 'SS'

      istdkey = vfstlir(ZSTDSRC,nulbgst,INI,INJ,INK,idateo,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

      if(istdkey .lt.0 ) then
        call abort3d('READCORNS2: Problem with background stat file')
      endif

      if (ini .ne. iksdim) then
        call abort3d('READCORNS2: BG stat levels inconsitencies')
      endif

      ! Looking for FST record parameters..

      idateo = -1
      cletiket = 'CORRNS'
      ip1 = kip1
      IP2 = JN
      ip3 = -1
      cltypvar = 'X'
      clnomvar = 'ZZ'
      icornskey = vfstlir(ZCORNSSRC,nulbgst,INI,INJ,INK,idateo,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

      if(icornskey .lt.0 ) then
        call abort3d('READCORNS2: Problem with background stat file')
      endif

      if (ini .ne. iksdim .or. inj .ne. iksdim) then
        call abort3d('READCORNS2: BG stat levels inconsitencies')
      endif

      do jcol = 1, nkgdim2
        rstddev(jcol,jn) = 0.0d0
        do jrow = 1, nkgdim2
          corns(jrow,jcol,jn) = 0.0d0
        enddo
      enddo

      do jcol = 1, iksdim
        do jrow = 1, iksdim
          corns(jrow,jcol,jn) = zcornssrc(jrow,jcol)
        enddo
      enddo

      ! Set cross-variable correlations to zero except between T' and ln(ps')
      call BHI_setcrosscorr(jn)

      do jrow = 1, iksdim
        rstddev(jrow,jn) = zstdsrc(jrow)
      enddo

    enddo

    ! Apply convolution to RSTDDEV correlations

    call BHI_convol

    do jn = 0, ntrunc

      ! Re-build of correlation matrix: factorization of corns with convoluted RSTDDEV
      do jcol = 1, nkgdim
        do jrow = 1, nkgdim
          corns(jrow,jcol,jn) = rstddev(jrow,jn) * corns(jrow,jcol,jn)* rstddev(jcol,jn)
        enddo
      enddo

    enddo

    deallocate(zcornssrc)
    deallocate(zstdsrc)

    !write(*,*) 'Done in READCORNS2'
  END SUBROUTINE BHI_READCORNS2



  SUBROUTINE BHI_RDSPSTD 1,8
    implicit none

    integer, parameter  :: inbrvar3d=5
    integer, parameter  :: inbrvar2d=2
    integer :: jvar,jn,inix,injx,inkx
    integer :: ikey, jlevo, jlat,firstn,lastn
    real(8) :: zsp(0:ntrunc,max(nlev_M,nlev_T)),zspbuf(max(nlev_M,nlev_T)),zwork
    real(8) :: zleg(0:ntrunc,nj_l),zgr(nj_l,max(nlev_M,nlev_T)),zgsig(1,nj_l,max(nlev_M,nlev_T)),zstddev(nkgdim2,nj_l)
    character(len=4) :: varName3d(inbrvar3d),varName2d(inbrvar2d)

    ! standard file variables
    integer :: ini,inj,ink, inpas, inbits, idatyp, ideet  
    integer :: ip1,ip2,ip3,ig1,ig2,ig3,ig4,iswa,ilength,idltf
    integer :: iubc,iextr1,iextr2,iextr3,ipak,ipas
    integer :: iliste(100),idate(100),idimax,infon,iheures,idateo,nlev_MT
    character(len=1)  :: clgrtyp
    character(len=2)  :: cltypvar
    character(len=4)  :: clnomvar
    character(len=12) :: cletiket
    integer :: vfstlir,vfstecr,fstprm,fstinf

    data varName3d/'PP  ','UC  ','UT  ','LQ  ','TB  '/
    data varName2d/'UP  ','PB  '/

    call gst_setID(gstID)
    call gst_zlegpol(zleg)

    rgsig(:,:) = 0.0d0
    rgsigtb(:,:) = 0.0d0
    rgsigpsb(:) = 0.0d0

!   2. Reading the data

    idate(1) = -1
    ip1      = -1
    ip2      = -1
    ip3      = -1

    cletiket = 'SPSTDDEV'
    cltypvar = 'X'

    do jvar = 1, inbrvar3d
      clnomvar = varName3d(jvar)
      if(vnl_varTypeFromVarName(clnomvar).eq.'MM') then
        nlev_MT = nlev_M
      else
        nlev_MT = nlev_T
      endif
      firstn = -1
      do jn = 0, ntrunc
        ip2 = jn
        ikey = fstinf(nulbgst,inix,injx,inkx,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

        if(ikey .ge.0 ) then
          ikey = vfstlir(zspbuf(1:nlev_MT),nulbgst,ini,inj,ink,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
        else
          if(firstn.eq.-1) firstn = jn
          lastn = jn
          zspbuf(:) = 0.0d0
        endif

        if (ini .ne. nlev_MT) then
          call abort3d('RDSPSTD: BG stat levels inconsitencies')
        endif

        do jlevo = 1, nlev_MT
          zsp(jn,jlevo) = zspbuf(jlevo)
        enddo
      enddo
      if(mpi_myid.eq.0.and.firstn.ne.-1) then
        write(*,*) 'WARNING: CANNOT FIND SPSTD FOR ',clnomvar, &
                     ' AT N BETWEEN ',firstn,' AND ',lastn,', SETTING TO ZERO!!!'
      endif

      call zleginv2(zgr(:,1:nlev_MT),zsp(:,1:nlev_MT),zleg,ntrunc,nj_l,nlev_MT,nj_l,nlev_MT,ntrunc)

      if(clnomvar .eq. 'PP') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_M
            rgsiguu(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'UC' .or. clnomvar .eq. 'CC') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_M
            rgsigvv(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'UT') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigtt(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'TB') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigtb(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'LQ') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigq(jlat,jlevo) = zgr(jlat,jlevo)*rfacthum
          enddo
        enddo
      endif

    enddo

    nlev_MT = 1
    do jvar = 1, inbrvar2d
      clnomvar = varName2d(jvar)
      firstn = -1
      do jn = 0, ntrunc
        ip2 = jn
        ikey = fstinf(nulbgst,inix,injx,inkx,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

        if(ikey .ge.0 ) then
          ikey = vfstlir(zspbuf,nulbgst,ini,inj,ink,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
        else
          if(firstn.eq.-1) firstn = jn
          lastn = jn
          zspbuf(:) = 0.0d0
        endif

        zsp(jn,1) = zspbuf(1)

      enddo
      if(mpi_myid.eq.0.and.firstn.ne.-1) then
        write(*,*) 'WARNING: CANNOT FIND SPSTD FOR ',clnomvar, &
                     ' AT N BETWEEN ',firstn,' AND ',lastn,', SETTING TO ZERO!!!'

      endif

      call zleginv2(zgr(:,1:nlev_MT),zsp(:,1:nlev_MT),zleg,ntrunc,nj_l,1,nj_l,nlev_MT,ntrunc)

      if(clnomvar .eq. 'UP') then
        do jlat = 1, nj_l
          rgsigps(jlat) = zgr(jlat,1)*100.0d0
        enddo
      endif
      if(clnomvar .eq. 'PB') then
        do jlat = 1, nj_l
          rgsigpsb(jlat) = zgr(jlat,1)*100.0d0
        enddo
      endif

    enddo

  END SUBROUTINE BHI_RDSPSTD



  SUBROUTINE BHI_RDSPSTD_NEWFMT 1,12
    implicit none

    integer, parameter  :: inbrvar3d=5
    integer, parameter  :: inbrvar2d=2
    integer :: jvar,jn,inix,injx,inkx,ntrunc_file
    integer :: ikey,jlevo,jlat
    real(8) :: zsp(0:ntrunc,max(nlev_M,nlev_T)),zspbuf(0:ntrunc),zwork
    real(8) :: zleg(0:ntrunc,nj_l),zgr(nj_l,max(nlev_M,nlev_T)),zgsig(1,nj_l,max(nlev_M,nlev_T)),zstddev(nkgdim2,nj_l)
    character(len=4) :: varName3d(inbrvar3d),varName2d(inbrvar2d)

    ! standard file variables
    integer :: ini,inj,ink, inpas, inbits, idatyp, ideet
    integer :: ip1,ip2,ip3,ig1,ig2,ig3,ig4,iswa,ilength,idltf
    integer :: iubc,iextr1,iextr2,iextr3,ipak,ipas
    integer :: iliste(100),idate(100),idimax,infon,iheures,idateo,nlev_MT
    character(len=1)  :: clgrtyp
    character(len=2)  :: cltypvar
    character(len=4)  :: clnomvar
    character(len=12) :: cletiket
    integer :: vfstlir,vfstecr,fstprm,fstinf

    data varName3d/'PP  ','UC  ','UT  ','LQ  ','TB  '/
    data varName2d/'UP  ','PB  '/

    call gst_setID(gstID)
    call gst_zlegpol(zleg)

    rgsig(:,:) = 0.0d0
    rgsigtb(:,:) = 0.0d0
    rgsigpsb(:) = 0.0d0

!   2. Reading the data

    idate(1) = -1
    ip2      = -1
    ip3      = -1

    cletiket = 'SPSTDDEV'
    cltypvar = 'X'

    ! check if file is old format
    ip1 = -1
    clnomvar = varName3d(1)
    ikey = fstinf(nulbgst,inix,injx,inkx,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
    write(*,*) 'ini,inj,ink=',inix,injx,inkx
    if(inix.gt.1) then
      write(*,*) 'BHI_RDSPSTD_NEWFMT: ini>1, SPSTDDEV is in old format, calling BHI_RDSPSTD...'
      call bhi_rdspstd
      return
    endif

    !write(*,*) 'Reading 3D variables'
    do jvar = 1, inbrvar3d
      clnomvar = varName3d(jvar)
      if(vnl_varTypeFromVarName(clnomvar).eq.'MM') then
        nlev_MT = nlev_M
      else
        nlev_MT = nlev_T
      endif
      !write(*,*)'Reading ',clnomvar
      do jlevo = 1, nlev_MT
        if(vnl_varTypeFromVarName(clnomvar).eq.'MM') then
          ip1 = vco_anl%ip1_M(jlevo)
        else
          ip1 = vco_anl%ip1_T(jlevo)
        endif
        ikey = fstinf(nulbgst,inix,ntrunc_file,inkx,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
        ntrunc_file = ntrunc_file-1

        if(ntrunc_file.gt.ntrunc) call abort3d('RDSPSTD_NEWFMT: ntrunc in file > ntrunc for analysis!')

        if(ikey .ge.0 ) then
          ikey = vfstlir(zspbuf(0:ntrunc_file),nulbgst,ini,inj,ink,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
        else
          write(*,*) 'RDSPSTD_NEWFMT: ',jvar,clnomvar,nlev_MT,jlevo,ikey,ntrunc,ntrunc_file
          call abort3d('RDSPSTD_NEWFMT: SPSTDDEV record not found')
        endif

        zsp(:,jlevo) = 0.0d0
        do jn = 0, ntrunc_file
          zsp(jn,jlevo) = zspbuf(jn)
        enddo
      enddo

      call zleginv2(zgr(:,1:nlev_MT),zsp(:,1:nlev_MT),zleg,ntrunc,nj_l,nlev_MT,nj_l,nlev_MT,ntrunc)

      if(clnomvar .eq. 'PP') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_M
            rgsiguu(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'UC' .or. clnomvar .eq. 'CC') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_M
            rgsigvv(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'UT') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigtt(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'TB') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigtb(jlat,jlevo) = zgr(jlat,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'LQ') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigq(jlat,jlevo) = zgr(jlat,jlevo)*rfacthum
          enddo
        enddo
      endif

    enddo

    nlev_MT = 1
    do jvar = 1, inbrvar2d
      clnomvar = varName2d(jvar)
      ip1 = -1
      ikey = fstinf(nulbgst,inix,ntrunc_file,inkx,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
      ntrunc_file = ntrunc_file-1

      if(ikey .ge.0 ) then
        ikey = vfstlir(zspbuf(0:ntrunc_file),nulbgst,ini,inj,ink,idate(1),cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
      else
        write(*,*) 'RDSPSTD_NEWFMT: ',jvar,clnomvar,nlev_MT,jlevo,ikey,ntrunc,ntrunc_file
        call abort3d('RDSPSTD_NEWFMT: SPSTDDEV record not found')
      endif

      zsp(:,1) = 0.0d0
      do jn = 0, ntrunc_file
        zsp(jn,1) = zspbuf(jn)
      enddo

      call zleginv2(zgr(:,1:nlev_MT),zsp(:,1:nlev_MT),zleg,ntrunc,nj_l,1,nj_l,nlev_MT,ntrunc)

      if(clnomvar .eq. 'UP') then
        do jlat = 1, nj_l
          rgsigps(jlat) = zgr(jlat,1)*100.0d0
        enddo
      endif
      if(clnomvar .eq. 'PB') then
        do jlat = 1, nj_l
          rgsigpsb(jlat) = zgr(jlat,1)*100.0d0
        enddo
      endif

    enddo

  END SUBROUTINE BHI_RDSPSTD_NEWFMT



  SUBROUTINE BHI_RDSTD2D,6
    implicit none

    integer, parameter  :: inbrvar3d=5
    integer, parameter  :: inbrvar2d=2
    integer :: jvar, ikey, jlevo, jlat, nlev_MT
    real(8) :: zwork
    real(8) :: zgr(nj_l,max(nlev_M,nlev_T))
    character(len=4) :: varName3d(inbrvar3d),varName2d(inbrvar2d)

    ! standard file variables
    integer :: ini,inj,ink,ip1,ip2,ip3
    integer :: idate
    character(len=2)  :: cltypvar
    character(len=4)  :: clnomvar
    character(len=12) :: cletiket
    integer :: vfstlir,vfstecr,fstinf

    data varName3d/'PP  ','UC  ','UT  ','LQ  ','TT  '/
    data varName2d/'UP  ','P0  '/

    rgsig(:,:) = 0.0d0
    rgsigtb(:,:) = 0.0d0
    rgsigpsb(:) = 0.0d0

!   2. Reading the data

    idate    = -1
    ip1      = -1
    ip2      = -1
    ip3      = -1

    cletiket = 'STDDEV'
    cltypvar = 'E'

    write(*,*) 'Reading 3D variables'
    do jvar = 1, inbrvar3d
      clnomvar = varName3d(jvar)
      write(*,*)'Reading ',clnomvar
      if(vnl_varTypeFromVarName(clnomvar).eq.'MM') then
        nlev_MT = nlev_M
      else
        nlev_MT = nlev_T
      endif

      ikey = fstinf(nulbgst,ini,inj,ink,idate,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

      if (ink .ne. nlev_MT .or. inj .ne. nj_l) then
        write(*,*) 'RDSTD2D: ikey, varname, ink, nlev, ini, nj=',ikey,clnomvar,ink,nlev_MT,inj,nj_l
        call abort3d('RDSTD2D: BG stat levels or latitudes inconsitencies')
      endif

      if(ikey .ge.0 ) then
        ikey = vfstlir(zgr(:,1:nlev_MT),nulbgst,ini,inj,ink,idate,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
      else
        write(*,*) 'RDSTD2D: variable name=',clnomvar
        call abort3d('RDSTD2D: variable not found')
      endif

      if(clnomvar .eq. 'PP') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_M
            rgsiguu(jlat,jlevo) = zgr(nj_l-jlat+1,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'UC' .or. clnomvar .eq. 'CC') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_M
            rgsigvv(jlat,jlevo) = zgr(nj_l-jlat+1,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'UT') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigtt(jlat,jlevo) = zgr(nj_l-jlat+1,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'TB' .or. clnomvar .eq. 'TT') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigtb(jlat,jlevo) = zgr(nj_l-jlat+1,jlevo)
          enddo
        enddo
      elseif(clnomvar .eq. 'LQ') then
        do jlat = 1, nj_l
          do jlevo = 1, nlev_T
            rgsigq(jlat,jlevo) = zgr(nj_l-jlat+1,jlevo)*rfacthum
          enddo
        enddo
      endif

    enddo

    write(*,*) 'Reading 2D variables'
    do jvar = 1, inbrvar2d
      clnomvar = varName2d(jvar)
      write(*,*)'Reading ',clnomvar
      ikey = fstinf(nulbgst,ini,inj,ink,idate,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

      if(ikey .ge.0 ) then
        ikey = vfstlir(zgr,nulbgst,ini,inj,ink,idate,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
      else
        write(*,*) 'RDSTD2D: variable name=',clnomvar
        call abort3d('RDSTD2D: variable not found')
      endif

      if(clnomvar .eq. 'UP') then
        do jlat = 1, nj_l
          rgsigps(jlat) = zgr(nj_l-jlat+1,1)*100.0d0
        enddo
      endif
      if(clnomvar .eq. 'PB' .or. clnomvar .eq. 'P0') then
        do jlat = 1, nj_l
          rgsigpsb(jlat) = zgr(nj_l-jlat+1,1)*100.0d0
        enddo
      endif

    enddo

    write(*,*)'DONE in RDSTD2D'

  END SUBROUTINE BHI_RDSTD2D



  SUBROUTINE BHI_RDSPPTOT 1,6
    IMPLICIT NONE

    integer :: jn, jk1, jk2, ikey, ilen,jlat,jcol,inix,injx,inkx
    real(8) :: zsptheta(0:ntrunc,nlev_M)
    real(8) :: zgrtheta(nj_l,nlev_M)
    real(8) :: zleg(0:ntrunc,nj_l)
    real(8) :: zwork
    real(8) :: zPtoTsrc(nlev_T+1,nlev_M)
    real(8) :: zspPtoT(0:ntrunc,nlev_T+1,nlev_M)
    real(8) :: zgrPtoT(nj_l,nlev_T+1,nlev_M)
    real(8) :: ztheta(nlev_M)
    real(8) :: zPtoTecr(nlev_T+1,nlev_M,nj_l)
    
    ! standard file variables
    integer :: ini,inj,ink, inpas, inbits, idatyp, ideet
    integer :: ip1,ip2,ip3,ig1,ig2,ig3,ig4,iswa,ilength,idltf
    integer :: iubc,iextr1,iextr2,iextr3,ierr,ipak,ipas
    integer :: iliste(100),idate(100),idimax,infon,iheures,idateo
    character(len=2)  :: cltypvar
    character(len=1)  :: clgrtyp
    character(len=4)  :: clnomvar
    character(len=12) :: cletiket
    integer :: vfstlir,vfstecr,fstprm,fstinf
    
    call gst_setID(gstID)
    call gst_zlegpol(zleg)

    ip1 = -1
    ip3 = -1
    idateo = -1
    cletiket = 'SP_THETA'
    cltypvar = 'X'
    clnomvar = 'ZZ'

    ! read spectral coefficients for theta

    do jn = 0, ntrunc
      ip2 = jn
      ikey = fstinf(nulbgst,inix,injx,inkx,idateo,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

      if(ikey .ge.0 ) then
        ikey = vfstlir(ztheta,nulbgst,ini,inj,ink,idateo,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
      else
        if(mpi_myid.eq.0) write(*,*) 'WARNING: CANNOT FIND THETA FOR ',jn,' SETTING TO ZERO!!!'
        ztheta(:) = 0.0d0
      endif

      do jk1 = 1, nlev_M
        zsptheta(jn,jk1) = ztheta(jk1)
      enddo

    enddo

    ! converting theta in physical space

    call zleginv2(zgrtheta,zsptheta,zleg,ntrunc,nj_l,nlev_M,nj_l,nlev_M,ntrunc)

    do jlat = 1, nj_l
      do jk1 = 1, nlev_M
        tantheta(jk1,jlat) = tan(zgrtheta(jlat,jk1))
      end do
    end do

    ip1 = -1
    ip2 = -1
    ip3 = -1
    idateo = -1
    cletiket = 'SP_PtoT'
    cltypvar = 'X'
    clnomvar = 'ZZ'

    ! read of spectral coefficients for P to T operator

    do jn = 0, ntrunc
      ip2 = jn
      ikey = fstinf(nulbgst,inix,injx,inkx,idateo,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)

      if(ikey .ge.0 ) then
        ikey = vfstlir(zPtoTsrc,nulbgst,ini,inj,ink,idateo,cletiket,ip1,ip2,ip3,cltypvar,clnomvar)
      else
        if(mpi_myid.eq.0) write(*,*) 'WARNING: CANNOT FIND P_to_T FOR ',jn,' SETTING TO ZERO!!!'
        zPtoTsrc(:,:) = 0.0d0
      endif

      do jk2 = 1, nlev_M
        do jk1 = 1, nlev_T+1
          zspPtoT(jn,jk1,jk2) = zPtoTsrc(jk1,jk2)
        enddo
      enddo

    enddo

    ilen = nlev_M*(nlev_T+1)
    call zleginv2(zgrPtoT,zspPtoT,zleg,ntrunc,nj_l,ilen,nj_l,ilen,ntrunc)

    do jlat = 1, nj_l
      do jk2 = 1, nlev_M
        do jk1 = 1, nlev_T+1
          PtoT(jk1,jk2,jlat) = zgrPtoT(jlat,jk1,jk2)
        end do
      end do
    enddo

  END SUBROUTINE BHI_RDSPPTOT



  SUBROUTINE BHI_bSqrt(controlVector_in,statevector) 1,4
    implicit none

    real(8)   :: controlVector_in(cvDim_mpilocal)
    type(struct_gsv) :: statevector
    real(8),allocatable :: gd_out(:,:,:)
    real(8)   :: hiControlVector(nla_mpilocal,2,nkgdimSqrt)
    integer   :: jvar, ilev1, ilev2

    if(mpi_myid.eq.0) write(*,*) 'bhi_bsqrt: starting'
    if(mpi_myid.eq.0) write(*,*) 'Memory Used: ',get_max_rss()/1024,'Mb'

    if(.not. initialized) then
      if(mpi_myid.eq.0) write(*,*) 'bMatrixHI not initialized'
      return
    endif

    allocate(gd_out(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd))

    call bhi_cain(controlVector_in,hiControlVector)

    call bhi_spa2gd(hiControlVector,gd_out)

    call copyToStatevector(statevector,gd_out)

    deallocate(gd_out)

    !    Conversion of wind images to physical winds
    call uvwi2uv(statevector)

    if(mpi_myid.eq.0) write(*,*) 'Memory Used: ',get_max_rss()/1024,'Mb'
    if(mpi_myid.eq.0) write(*,*) 'bhi_bsqrt: done'

  END SUBROUTINE BHI_bSqrt



  SUBROUTINE BHI_bSqrtAd(statevector,controlVector_out) 1,4
    implicit none

    real(8)   :: controlVector_out(cvDim_mpilocal)
    type(struct_gsv) :: statevector
    real(8), allocatable :: gd_in(:,:,:)
    real(8)   :: hiControlVector(nla_mpilocal,2,nkgdimSqrt)
    integer   :: jvar, ilev1, ilev2

    if(.not. initialized) then
      if(mpi_myid.eq.0) write(*,*) 'bMatrixHI not initialized'
      return
    endif

    if(mpi_myid.eq.0) write(*,*) 'bhi_bsqrtad: starting'
    if(mpi_myid.eq.0) write(*,*) 'Memory Used: ',get_max_rss()/1024,'Mb'

    !- Conversion of physical winds to wind images
    call uvwi2uv(statevector) ! INOUT

    allocate(gd_in(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd))

    call copyFromStatevector(statevector,gd_in)

    call bhi_spa2gdad(gd_in,hiControlVector)

    call bhi_cainad(hiControlVector,controlVector_out)

    deallocate(gd_in)

    if(mpi_myid.eq.0) write(*,*) 'Memory Used: ',get_max_rss()/1024,'Mb'
    if(mpi_myid.eq.0) write(*,*) 'bhi_bsqrtad: done'

  END SUBROUTINE BHI_bSqrtAd



  SUBROUTINE copyToStatevector(statevector,gd) 1,5
    implicit none
    type(struct_gsv) :: statevector
    real(8) :: gd(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd)
    integer :: jlon, jlev, jlev2, jlat, jvar, ilev1, ilev2
    real(8), pointer :: field(:,:,:)

    do jvar = 1, vnl_numvarmax 
      if(gsv_varExist(vnl_varNameList(jvar))) then
        field => gsv_getField3D(statevector,vnl_varNameList(jvar))
        if(vnl_varNameList(jvar).eq.'UU  ') then
          ilev1 = nspositVO
        elseif(vnl_varNameList(jvar).eq.'VV  ') then
          ilev1 = nspositDI
        elseif(vnl_varNameList(jvar).eq.'TT  ') then
          ilev1 = nspositTT
        elseif(vnl_varNameList(jvar).eq.'HU  ') then
          ilev1 = nspositQ
        elseif(vnl_varNameList(jvar).eq.'P0  ') then
          ilev1 = nspositPS
        elseif(vnl_varNameList(jvar).eq.'TG  ') then
          ilev1 = nspositTG
        else
          call abort3d('bmatrixhi_mod: copyToStatevector: No covariances available for variable:' // vnl_varNameList(jvar))
        endif
        ilev2 = ilev1 - 1 + gsv_getNumLev(statevector,vnl_vartypeFromVarname(vnl_varNameList(jvar)))
!!!$OMP PARALLEL DO PRIVATE(jlat,jlev,jlev2,jlon)
        do jlat = myLatBeg, myLatEnd
          do jlev = ilev1, ilev2
            jlev2 = jlev-ilev1+1
            do jlon = myLonBeg, myLonEnd
              field(jlon,jlev2,jlat) = gd(jlon,jlev,jlat)
            enddo
          enddo
        enddo
!!!$OMP END PARALLEL DO
      endif
    enddo

  END SUBROUTINE copyToStatevector



  SUBROUTINE copyFromStatevector(statevector,gd) 1,5
    implicit none
    type(struct_gsv) :: statevector
    real(8)          :: gd(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd)
    integer :: jlon, jlev, jlev2, jlat, jvar, ilev1, ilev2
    real(8), pointer :: field(:,:,:)

    do jvar = 1, vnl_numvarmax 
      if(gsv_varExist(vnl_varNameList(jvar))) then
        field => gsv_getField3D(statevector,vnl_varNameList(jvar))
        if(vnl_varNameList(jvar).eq.'UU  ') then
          ilev1 = nspositVO
        elseif(vnl_varNameList(jvar).eq.'VV  ') then
          ilev1 = nspositDI
        elseif(vnl_varNameList(jvar).eq.'TT  ') then
          ilev1 = nspositTT
        elseif(vnl_varNameList(jvar).eq.'HU  ') then
          ilev1 = nspositQ
        elseif(vnl_varNameList(jvar).eq.'P0  ') then
          ilev1 = nspositPS
        elseif(vnl_varNameList(jvar).eq.'TG  ') then
          ilev1 = nspositTG
        else
          call abort3d('bmatrixhi_mod: copyFromStatevector: No covariances available for variable:' // vnl_varNameList(jvar))
        endif
        ilev2 = ilev1 - 1 + gsv_getNumLev(statevector,vnl_vartypeFromVarname(vnl_varNameList(jvar)))
!!!$OMP PARALLEL DO PRIVATE(jlat,jlev,jlev2,jlon)
        do jlat = myLatBeg, myLatEnd
          do jlev = ilev1, ilev2
            jlev2 = jlev-ilev1+1
            do jlon = myLonBeg, myLonEnd
              gd(jlon,jlev,jlat) = field(jlon,jlev2,jlat)
            enddo
          enddo
        enddo
!!!$OMP END PARALLEL DO
      endif
    enddo

  END SUBROUTINE copyFromStatevector



  SUBROUTINE BHI_reduceToMPILocal(cv_mpilocal,cv_mpiglobal,cvDim_mpilocal_out) 1,1
    implicit none
    real(8) :: cv_mpilocal(cvDim_mpilocal)
    real(8) :: cv_mpiglobal(cvDim_mpiglobal)
    integer :: cvDim_mpilocal_out

    integer :: jlev,jn,jm,ila_mpilocal,ila_mpiglobal,jdim_mpilocal,jdim_mpiglobal

    cvDim_mpilocal_out = cvDim_mpilocal

    jdim_mpilocal = 0
    do jlev = 1, nkgdimSqrt
      do jm = mymBeg, mymEnd, mymSkip
        do jn = mynBeg, mynEnd, mynSkip
          if(jm.le.jn) then

            ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm

            ! figure out index into global control vector
            if(jm.eq.0) then
              ! for jm=0 only real part
              jdim_mpiglobal = ila_mpiglobal
            else
              ! for jm>0 both real and imaginary part
              jdim_mpiglobal = 2*ila_mpiglobal-1 - (ntrunc+1)
            endif
            ! add offset for level
            jdim_mpiglobal = jdim_mpiglobal + (jlev-1) * (ntrunc+1)*(ntrunc+1)

            ! index into local control vector computer as in cain
            if(jm.eq.0) then
              ! only real component for jm=0
              jdim_mpilocal = jdim_mpilocal + 1
              cv_mpilocal(jdim_mpilocal) = cv_mpiglobal(jdim_mpiglobal)
            else
              ! both real and imaginary components for jm>0
              jdim_mpilocal = jdim_mpilocal + 1
              cv_mpilocal(jdim_mpilocal) = cv_mpiglobal(jdim_mpiglobal)
              jdim_mpilocal = jdim_mpilocal + 1
              cv_mpilocal(jdim_mpilocal) = cv_mpiglobal(jdim_mpiglobal+1)
            endif

            if(jdim_mpilocal.gt.cvDim_mpilocal)   &
              write(*,*) 'ERROR: jdim,cvDim,mpilocal=',jdim_mpilocal,cvDim_mpilocal,jlev,jn,jm
            if(jdim_mpiglobal.gt.cvDim_mpiglobal)   &
              write(*,*) 'ERROR: jdim,cvDim,mpiglobal=',jdim_mpiglobal,cvDim_mpiglobal,jlev,jn,jm

          endif
        enddo
      enddo
    enddo

  END SUBROUTINE BHI_reduceToMPILocal



  SUBROUTINE BHI_expandToMPIGlobal(cv_mpilocal,cv_mpiglobal,cvDim_mpiglobal_out) 1,1
    implicit none
    real(8) :: cv_mpilocal(cvDim_mpilocal)
    real(8) :: cv_mpiglobal(cvDim_mpiglobal)
    integer :: cvDim_mpiglobal_out

    real(8), allocatable :: cv_maxmpilocal(:)
    real(8), pointer :: cv_allmaxmpilocal(:,:) => null()
    integer, allocatable :: allnBeg(:),allnEnd(:),allnSkip(:)
    integer, allocatable :: allmBeg(:),allmEnd(:),allmSkip(:)
    integer :: jlev, jn, jm, jproc, ila_mpiglobal, jdim_mpilocal, jdim_mpiglobal, ierr, cvDim_maxmpilocal

    cvDim_mpiglobal_out = cvDim_mpiglobal

    ! gather all local control vectors onto mpi task 0
    call rpn_comm_allreduce(cvDim_mpilocal,cvDim_maxmpilocal,1,"mpi_integer","mpi_max","GRID",ierr)

    allocate(cv_maxmpilocal(cvDim_maxmpilocal))
    if(mpi_myid.eq.0) allocate(cv_allmaxmpilocal(cvDim_maxmpilocal,mpi_nprocs))

    cv_maxmpilocal(:) = 0.0d0
    cv_maxmpilocal(1:cvDim_mpilocal) = cv_mpilocal(1:cvDim_mpilocal)

    call tmg_start(59,'BHI_COMM')
    call rpn_comm_gather(cv_maxmpilocal,    cvDim_maxmpilocal, "mpi_double_precision",  &
                         cv_allmaxmpilocal, cvDim_maxmpilocal, "mpi_double_precision", 0, "GRID", ierr )
    call tmg_stop(59)

    deallocate(cv_maxmpilocal)

    allocate(allnBeg(mpi_nprocs))
    call rpn_comm_allgather(mynBeg,1,"mpi_integer",       &
                            allnBeg,1,"mpi_integer","GRID",ierr)
    allocate(allnEnd(mpi_nprocs))
    call rpn_comm_allgather(mynEnd,1,"mpi_integer",       &
                            allnEnd,1,"mpi_integer","GRID",ierr)
    allocate(allnSkip(mpi_nprocs))
    call rpn_comm_allgather(mynSkip,1,"mpi_integer",       &
                            allnSkip,1,"mpi_integer","GRID",ierr)

    allocate(allmBeg(mpi_nprocs))
    call rpn_comm_allgather(mymBeg,1,"mpi_integer",       &
                            allmBeg,1,"mpi_integer","GRID",ierr)
    allocate(allmEnd(mpi_nprocs))
    call rpn_comm_allgather(mymEnd,1,"mpi_integer",       &
                            allmEnd,1,"mpi_integer","GRID",ierr)
    allocate(allmSkip(mpi_nprocs))
    call rpn_comm_allgather(mymSkip,1,"mpi_integer",       &
                            allmSkip,1,"mpi_integer","GRID",ierr)

    ! reorganize gathered mpilocal control vectors into the mpiglobal control vector
    if(mpi_myid.eq.0) then
      cv_mpiglobal(:) = 0.0d0

!$OMP PARALLEL DO PRIVATE(jproc,jdim_mpilocal,jlev,jm,jn,ila_mpiglobal,jdim_mpiglobal)
      do jproc = 0, (mpi_nprocs-1)
        jdim_mpilocal = 0

        do jlev = 1, nkgdimSqrt
          do jm = allmBeg(jproc+1), allmEnd(jproc+1), allmSkip(jproc+1)
            do jn = allnBeg(jproc+1), allnEnd(jproc+1), allnSkip(jproc+1)
              if(jm.le.jn) then

                ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm

                ! figure out index into global control vector
                if(jm.eq.0) then
                  ! for jm=0 only real part
                  jdim_mpiglobal = ila_mpiglobal
                else
                  ! for jm>0 both real and imaginary part
                  jdim_mpiglobal = 2*ila_mpiglobal-1 - (ntrunc+1)
                endif
                ! add offset for level
                jdim_mpiglobal = jdim_mpiglobal + (jlev-1) * (ntrunc+1)*(ntrunc+1)

                ! index into local control vector
                if(jm.eq.0) then
                  ! only real component for jm=0
                  jdim_mpilocal = jdim_mpilocal + 1
                  cv_mpiglobal(jdim_mpiglobal) = cv_allmaxmpilocal(jdim_mpilocal,jproc+1)
                else
                  ! both real and imaginary components for jm>0
                  jdim_mpilocal = jdim_mpilocal + 1
                  cv_mpiglobal(jdim_mpiglobal) = cv_allmaxmpilocal(jdim_mpilocal,jproc+1)
                  jdim_mpilocal = jdim_mpilocal + 1
                  cv_mpiglobal(jdim_mpiglobal+1) = cv_allmaxmpilocal(jdim_mpilocal,jproc+1)
                endif

                if(jdim_mpiglobal.gt.cvDim_mpiglobal)   &
                  write(*,*) 'ERROR: jdim,cvDim,mpiglobal=',jdim_mpiglobal,cvDim_mpiglobal,jlev,jn,jm

              endif
            enddo
          enddo
        enddo
      enddo ! jproc
!$OMP END PARALLEL DO

    endif ! myid .eq. 0 

    deallocate(allnBeg)
    deallocate(allnEnd)
    deallocate(allnSkip)
    deallocate(allmBeg)
    deallocate(allmEnd)
    deallocate(allmSkip)
    if(mpi_myid.eq.0) deallocate(cv_allmaxmpilocal)

  end SUBROUTINE BHI_expandToMPIGlobal



  SUBROUTINE BHI_cain(controlVector_in,hiControlVector_out) 1,1
    implicit none

    real(8) :: controlVector_in(cvDim_mpilocal)
    real(8) :: hiControlVector_out(nla_mpilocal,2,nkgdimSqrt)

    integer :: jdim, jlev, jm, jn, ila_mpilocal, ila_mpiglobal

    jdim = 0
    hiControlVector_out(:,:,:) = 0.0d0
    do jlev = 1, nkgdimSqrt
      do jm = mymBeg, mymEnd, mymSkip
        do jn = mynBeg, mynEnd, mynSkip
          if(jm.le.jn) then
            ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm
            ila_mpilocal  = ilaList_mpilocal(ila_mpiglobal)
            if(jm.eq.0) then
              ! only real component for jm=0
              jdim = jdim + 1
              hiControlVector_out(ila_mpilocal,1,jlev) = controlVector_in(jdim)
            else
              ! both real and imaginary components for jm>0
              jdim = jdim + 1
              hiControlVector_out(ila_mpilocal,1,jlev) = controlVector_in(jdim)
              jdim = jdim + 1
              hiControlVector_out(ila_mpilocal,2,jlev) = controlVector_in(jdim)
            endif
          endif
        enddo
      enddo
    enddo

  end SUBROUTINE BHI_cain



  SUBROUTINE BHI_cainAd(hiControlVector_in,controlVector_out) 1,1
    IMPLICIT NONE

    real(8) :: controlVector_out(cvDim_mpilocal)
    real(8) :: hiControlVector_in(nla_mpilocal,2,nkgdimSqrt)

    integer :: jdim, jlev, jm, jn, ila_mpilocal, ila_mpiglobal

    jdim = 0
    do jlev = 1, nkgdimSqrt
      do jm = mymBeg, mymEnd, mymSkip
        do jn = mynBeg, mynEnd, mynSkip
          if(jm.le.jn) then
            ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm
            ila_mpilocal  = ilaList_mpilocal(ila_mpiglobal)
            if(jm.eq.0) then
              ! only real component for jm=0
              jdim = jdim + 1
              controlVector_out(jdim) = controlVector_out(jdim) + hiControlVector_in(ila_mpilocal,1,jlev)
            else
              ! both real and imaginary components for jm>0
              jdim = jdim + 1
              controlVector_out(jdim) = controlVector_out(jdim) + hiControlVector_in(ila_mpilocal,1,jlev)*2.0d0
              jdim = jdim + 1
              controlVector_out(jdim) = controlVector_out(jdim) + hiControlVector_in(ila_mpilocal,2,jlev)*2.0d0
            endif
          endif
        enddo
      enddo
    enddo

  END SUBROUTINE BHI_cainAd



  SUBROUTINE BHI_SPA2GD(hiControlVector_in,gd_out) 1,16
    IMPLICIT NONE

    real(8) :: hiControlVector_in(nla_mpilocal,2,nkgdimSqrt)
    real(8) :: gd_out(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd)

    real(8) :: sptb(maxMyNla,2,nlev_T_even),sp(maxMyNla,2,nkgdim)
    real(8) :: tb0(myLonBeg:myLonEnd,nlev_T_even,myLatBeg:myLatEnd)

    integer :: jn,jm,ila_mpilocal,ila_mpiglobal,icount
    real(8) :: sq2, zp
    real(8) , allocatable :: zsp(:,:,:), zsp2(:,:,:)
    integer :: ilon, jlev, jlon, jlat, jla_mpilocal, klatPtoT
    real(8), pointer :: zgdpsi(:,:,:),zgdchi(:,:,:)
    real(8), target  :: gd(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd)
    real(8) :: dla2, dl1sa2, zcoriolis, zpsb(myLonBeg:myLonEnd,myLatBeg:myLatEnd)

    klatPtoT = 1

    call tmg_start(53,'BHI_SPA2GD1')

    ! maybe not needed:
    sp(:,:,:) = 0.0d0
    sptb(:,:,:) = 0.0d0

    sq2 = sqrt(2.0d0)
    allocate(zsp(nkgdimSqrt,2,mymCount))
    allocate(zsp2(nkgdim2,2,mymCount))
!$OMP PARALLEL DO PRIVATE(jn,jm,jlev,ila_mpiglobal,ila_mpilocal,zsp2,zsp,icount)
    do jn = mynBeg, mynEnd, mynSkip

      icount = 0
      do jm = mymBeg, mymEnd, mymSkip
        if(jm.le.jn) then
          icount = icount+1
          ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm
          ila_mpilocal = ilaList_mpilocal(ila_mpiglobal)
          do jlev = 1, nkgdimSqrt
            zsp(jlev,1,icount) = hiControlVector_in(ila_mpilocal,1,jlev)
            zsp(jlev,2,icount) = hiControlVector_in(ila_mpilocal,2,jlev)
          enddo
        endif
      enddo

      if(icount.gt.0) then

        !CALL DGEMUL(corns(1,1,jn),nkgdim2,'N',zsp(1,1,1),nkgdimSqrt,'N',zsp2(1,1,1),nkgdim2,nkgdim2,nkgdimSqrt,2*icount)
        CALL DGEMM('N','N',nkgdim2,2*icount,nkgdimSqrt,1.0d0,corns(1,1,jn),nkgdim2,zsp(1,1,1),nkgdimSqrt,0.0d0,zsp2(1,1,1),nkgdim2)

        icount = 0
        do jm = mymBeg, mymEnd, mymSkip
          if(jm.le.jn) then
            icount = icount+1
            ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm
            ila_mpilocal = ilaList_mpilocal(ila_mpiglobal)
            do jlev = 1, nkgdim
              sp(ila_mpilocal,1,jlev) = zsp2(jlev,1,icount)
              sp(ila_mpilocal,2,jlev) = zsp2(jlev,2,icount)
            enddo
            do jlev = 1, nlev_T
              sptb(ila_mpilocal,1,jlev) = zsp2(jlev+nkgdim,1,icount)
              sptb(ila_mpilocal,2,jlev) = zsp2(jlev+nkgdim,2,icount)
            enddo
          endif
        enddo

      endif

      ! make adjustments for jm=0
      if(mymBeg.eq.0) then

        ila_mpiglobal = gst_getNind(0,gstID) + jn
        ila_mpilocal = ilaList_mpilocal(ila_mpiglobal)

        do jlev = 1, nkgdim
          sp(ila_mpilocal,1,jlev) = sp(ila_mpilocal,1,jlev)*sq2
          sp(ila_mpilocal,2,jlev) = 0.0d0
        enddo
        do jlev = 1, nlev_T
          sptb(ila_mpilocal,1,jlev) = sptb(ila_mpilocal,1,jlev)*sq2
          sptb(ila_mpilocal,2,jlev) = 0.0d0
        enddo

      endif

    enddo
!$OMP END PARALLEL DO
    deallocate(zsp)
    deallocate(zsp2)
    call tmg_stop(53)

!$OMP PARALLEL DO PRIVATE(JLAT,JLEV,JLON)
    do jlat = myLatBeg, myLatEnd
      do jlev = 1, nkgdim
        do jlon = myLonBeg, myLonEnd
          gd(jlon,jlev,jlat) = 0.0d0
        enddo
      enddo
      do jlev = 1, nlev_T_even
        do jlon = myLonBeg, myLonEnd
          tb0(jlon,jlev,jlat) = 0.0d0
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_start(55,'BHI_SPEREE') 
    call gst_setID(gstID)
    call gst_speree4(sp,gd)
    call gst_setID(gstID2)
    call gst_speree4(sptb,tb0)
    call tmg_stop(55) 

    call tmg_start(54,'BHI_SPA2GD2')
!$OMP PARALLEL DO PRIVATE(jlat,zcoriolis,jlev,jlon,zp)
    do jlat = myLatBeg, myLatEnd
      zcoriolis = 2.d0*romega*gst_getRmu(jlat,gstID)
      do jlon = myLonBeg, myLonEnd
        zpsb(jlon,jlat) = 0.0d0
        do jlev = 1, nlevPtoT
         zp = zcoriolis*gd(jlon,nspositVO+jlev-1,jlat)
         zpsb(jlon,jlat) = zpsb(jlon,jlat) + PtoT(nlev_T+1,jlev,klatPtoT)*zp
        enddo
      enddo

      do jlev = 1, nlev_T
        do jlon = myLonBeg, myLonEnd
          tb0(jlon,jlev,jlat) = zcoriolis*tb0(jlon,jlev,jlat)
        enddo
      enddo

      do jlev = 1, nkgdim
        do jlon = myLonBeg, myLonEnd
          if(jlev.ne.nspositTG) then
            gd(jlon,jlev,jlat) = gd(jlon,jlev,jlat)*rgsig(jlat,jlev)
          else
            gd(jlon,jlev,jlat) = gd(jlon,jlev,jlat)*tgstdbg(jlon,jlat)
          endif
        enddo
      enddo

      do jlev = 1, nlev_T
        do jlon = myLonBeg, myLonEnd
          tb0(jlon,jlev,jlat) = tb0(jlon,jlev,jlat)*rgsigtb(jlat,jlev)
          gd(jlon,nspositTT+jlev-1,jlat) = gd(jlon,nspositTT+jlev-1,jlat)+tb0(jlon,jlev,jlat)
        enddo
      enddo
      do jlon = myLonBeg, myLonEnd
        zpsb(jlon,jlat) = zpsb(jlon,jlat)*rgsigpsb(jlat)
        gd(jlon,nspositPS,jlat) = gd(jlon,nspositPS,jlat)+zpsb(jlon,jlat)
      enddo
    enddo  ! jlat
!$OMP END PARALLEL DO
    call tmg_stop(54)

    zgdpsi(myLonBeg:,1:,myLatBeg:) => gd(myLonBeg:myLonEnd,nspositVO:(nspositVO+nlev_M-1),myLatBeg:myLatEnd)
    zgdchi(myLonBeg:,1:,myLatBeg:) => gd(myLonBeg:myLonEnd,nspositDI:(nspositDI+nlev_M-1),myLatBeg:myLatEnd)
!$OMP PARALLEL DO PRIVATE(jlat,jlev,jlon)
    do jlat = myLatBeg, myLatEnd
      do jlev = nlev_bdl, nlev_M
        do jlon = myLonBeg, myLonEnd
          zgdchi(jlon,jlev,jlat) = zgdchi(jlon,jlev,jlat) - tantheta(jlev,jlat)*zgdpsi(jlon,jlev,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    sp(:,:,:) = 0.0d0

    call tmg_start(56,'BHI_REESPE') 
    call gst_setID(gstID)
    call gst_reespe4(sp,gd)
    call tmg_stop(56) 

    dla2   = ra*ra
    dl1sa2 = 1.d0/dla2
!$OMP PARALLEL DO PRIVATE(JLEV,JLA_MPILOCAL,ILA_MPIGLOBAL)
    do jlev = 1, nlev_M
      do jla_mpilocal = 1, nla_mpilocal
        ila_mpiglobal = ilaList_mpiglobal(jla_mpilocal)
        sp(jla_mpilocal,1,nspositVO+jlev-1) = sp(jla_mpilocal,1,nspositVO+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
        sp(jla_mpilocal,2,nspositVO+jlev-1) = sp(jla_mpilocal,2,nspositVO+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
        sp(jla_mpilocal,1,nspositDI+jlev-1) = sp(jla_mpilocal,1,nspositDI+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
        sp(jla_mpilocal,2,nspositDI+jlev-1) = sp(jla_mpilocal,2,nspositDI+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
      enddo
    enddo
!$OMP END PARALLEL DO

!$OMP PARALLEL DO PRIVATE(JLAT,JLEV,JLON)
    do jlat = myLatBeg, myLatEnd
      do jlev = 1, nkgdim
        do jlon = myLonBeg, myLonEnd
          gd(jlon,jlev,jlat) = 0.0d0
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_start(57,'BHI_SPGD_SPGDA')
    call gst_setID(gstID)
    call gst_spgd4(sp,gd,nlev_M)
    call tmg_stop(57)

!$OMP PARALLEL DO PRIVATE(JLAT,JLEV,JLON)
    do jlat = myLatBeg, myLatEnd
      do jlev = 1, nkgdim
        do jlon = myLonBeg, myLonEnd
          gd_out(jlon,jlev,jlat) = gd(jlon,jlev,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

  END SUBROUTINE BHI_SPA2GD



  SUBROUTINE BHI_SPA2GDAD(gd_in,hiControlVector_out) 1,16
    implicit none

    real(8) :: hiControlVector_out(nla_mpilocal,2,nkgdimSqrt)
    real(8) :: gd_in(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd)

    real(8) :: sptb(maxMyNla,2,nlev_T_even)
    real(8) :: sp(maxMyNla,2,nkgdim)
    real(8) :: tb0(myLonBeg:myLonEnd,nlev_T_even,myLatBeg:myLatEnd)

    integer :: jn, jm, ila_mpilocal, ila_mpiglobal, icount
    real(8) :: sq2, zp
    real(8) ,allocatable :: zsp(:,:,:), zsp2(:,:,:)

    integer :: ilon, jlev, jlon, jlat, jla_mpilocal, klatPtoT
    real(8) :: dl1sa2, dla2, zcoriolis, zpsb(myLonBeg:myLonEnd,myLatBeg:myLatEnd)
    real(8),pointer :: zgdpsi(:,:,:) ,zgdchi(:,:,:)
    real(8), target :: gd(myLonBeg:myLonEnd,nkgdim,myLatBeg:myLatEnd)

    klatPtoT = 1

!$OMP PARALLEL DO PRIVATE(JLAT,JLEV,JLON)
    do jlat = myLatBeg, myLatEnd
      do jlev = 1, nkgdim
        do jlon = myLonBeg, myLonEnd
          gd(jlon,jlev,jlat) = gd_in(jlon,jlev,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_start(57,'BHI_SPGD_SPGDA')
    call gst_setID(gstID)
    call gst_spgda4(sp,gd,nlev_M)
    call tmg_stop(57)

    dla2   = ra*ra
    dl1sa2 = 1.d0/dla2
!$OMP PARALLEL DO PRIVATE(JLEV,JLA_MPILOCAL,ILA_MPIGLOBAL)
    do jlev = 1, nlev_M
      do jla_mpilocal = 1, nla_mpilocal
        ila_mpiglobal = ilaList_mpiglobal(jla_mpilocal)
        sp(jla_mpilocal,1,nspositVO+jlev-1) = sp(jla_mpilocal,1,nspositVO+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
        sp(jla_mpilocal,2,nspositVO+jlev-1) = sp(jla_mpilocal,2,nspositVO+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
        sp(jla_mpilocal,1,nspositDI+jlev-1) = sp(jla_mpilocal,1,nspositDI+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
        sp(jla_mpilocal,2,nspositDI+jlev-1) = sp(jla_mpilocal,2,nspositDI+jlev-1)*dl1sa2*gst_getrnnp1(ila_mpiglobal,gstID)
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_start(55,'BHI_SPEREE') 
    call gst_setID(gstID)
    call gst_speree4(sp,gd)
    call tmg_stop(55) 

    zgdpsi(myLonBeg:,1:,myLatBeg:) => gd(myLonBeg:myLonEnd,nspositVO:(nspositVO+nlev_M-1),myLatBeg:myLatEnd)
    zgdchi(myLonBeg:,1:,myLatBeg:) => gd(myLonBeg:myLonEnd,nspositDI:(nspositDI+nlev_M-1),myLatBeg:myLatEnd)
!$OMP PARALLEL DO PRIVATE(jlat,jlev,jlon)
    do jlat = myLatBeg, myLatEnd
      do jlev = nlev_bdl, nlev_M
        do jlon = myLonBeg, myLonEnd
          zgdpsi(jlon,jlev,jlat) = zgdpsi(jlon,jlev,jlat) - tantheta(jlev,jlat)*zgdchi(jlon,jlev,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO

    call tmg_start(54,'BHI_SPA2GD2')
!$OMP PARALLEL DO PRIVATE(jlat,zcoriolis,jlev,jlon,zp)
    do jlat = myLatBeg, myLatEnd
      zcoriolis = 2.d0*romega*gst_getRMU(jlat,gstID)
      do jlev = 1, nlev_T
        do jlon = myLonBeg, myLonEnd
          tb0(jlon,jlev,jlat) = gd(jlon,nspositTT+jlev-1,jlat)
          tb0(jlon,jlev,jlat) = tb0(jlon,jlev,jlat)*rgsigtb(jlat,jlev)
        enddo
      enddo
      do jlon = myLonBeg, myLonEnd
        zpsb(jlon,jlat) = gd(jlon,nspositPS,jlat)
        zpsb(jlon,jlat) = zpsb(jlon,jlat)*rgsigpsb(jlat)
      enddo

      do jlev = 1, nkgdim
        do jlon = myLonBeg, myLonEnd
          if(jlev.ne.nspositTG) then
            gd(jlon,jlev,jlat) = gd(jlon,jlev,jlat)*rgsig(jlat,jlev)
          else
            gd(jlon,nspositTG,jlat) = gd(jlon,nspositTG,jlat)*tgstdbg(jlon,jlat)
          endif
        enddo
      enddo

      do jlev = 1, nlev_T
        do jlon = myLonBeg, myLonEnd
          tb0(jlon,jlev,jlat) = zcoriolis*tb0(jlon,jlev,jlat)
        enddo
      enddo

      do jlev = 1, nlevPtoT
        do jlon = myLonBeg, myLonEnd
          zp = PtoT(nlev_T+1,jlev,klatPtoT)*zpsb(jlon,jlat)
          gd(jlon,nspositVO+jlev-1,jlat) = zcoriolis*zp+gd(jlon,nspositVO+jlev-1,jlat)
        enddo
      enddo
    enddo
!$OMP END PARALLEL DO 
    call tmg_stop(54)

    call tmg_start(56,'BHI_REESPE') 
    call gst_setID(gstID)
    call gst_reespe4(sp,gd)
    call gst_setID(gstID2)
    call gst_reespe4(sptb,tb0)
    call tmg_stop(56) 

    call tmg_start(53,'BHI_SPA2GD1')

    hiControlVector_out(:,:,:) = 0.0d0
    sq2 = sqrt(2.0d0)
    allocate(zsp(nkgdimSqrt,2,mymCount))
    allocate(zsp2(nkgdim2,2,mymCount))
!$OMP PARALLEL DO PRIVATE(JN,JM,JLEV,ILA_MPILOCAL,ILA_MPIGLOBAL,zsp,zsp2,icount)
    do jn = mynBeg, mynEnd, mynSkip

      icount = 0
      do jm = mymBeg, mymEnd, mymSkip
        if(jm.le.jn) then
          icount = icount+1
          ila_mpiglobal = gst_getNind(jm,gstID) + jn - jm
          ila_mpilocal = ilaList_mpilocal(ila_mpiglobal)
          do jlev = 1, nkgdim
            zsp2(jlev,1,icount) = sp(ila_mpilocal,1,jlev)
            zsp2(jlev,2,icount) = sp(ila_mpilocal,2,jlev)
          enddo
          do jlev = 1, nlev_T
            zsp2(jlev+nkgdim,1,icount) = sptb(ila_mpilocal,1,jlev)
            zsp2(jlev+nkgdim,2,icount) = sptb(ila_mpilocal,2,jlev)
          enddo
        endif
      enddo

      if(icount.gt.0) then

        !CALL DGEMUL(corns(1,1,jn),nkgdim2,'T',zsp2(1,1,1),nkgdim2,'N',zsp(1,1,1),nkgdimSqrt,nkgdimSqrt,nkgdim2,2*icount)
        CALL DGEMM('T','N',nkgdimSqrt,2*icount,nkgdim2,1.0d0,corns(1,1,jn),nkgdim2,zsp2(1,1,1),nkgdim2,0.0d0,zsp(1,1,1),nkgdimSqrt)

        icount = 0
        do jm = mymBeg, jn, mymSkip
          icount=icount+1
          ila_mpiglobal = gst_getNIND(jm,gstID) + jn - jm
          ila_mpilocal = ilaList_mpilocal(ila_mpiglobal)
          do jlev = 1, nkgdimSqrt
            hiControlVector_out(ila_mpilocal,1,jlev) = zsp(jlev,1,icount)
            hiControlVector_out(ila_mpilocal,2,jlev) = zsp(jlev,2,icount)
          enddo
        enddo

      endif

      ! make adjustments for jm=0
      if(mymBeg.eq.0) then

        ila_mpiglobal = gst_getNIND(0,gstID) + jn
        ila_mpilocal = ilaList_mpilocal(ila_mpiglobal)

        do jlev = 1, nkgdimSqrt
          hiControlVector_out(ila_mpilocal,1,jlev) = hiControlVector_out(ila_mpilocal,1,jlev)*sq2
          hiControlVector_out(ila_mpilocal,2,jlev) = hiControlVector_out(ila_mpilocal,2,jlev)*sq2
        enddo

      endif

    enddo
!$OMP END PARALLEL DO
    deallocate(zsp)
    deallocate(zsp2)
    call tmg_stop(53)

  END SUBROUTINE BHI_SPA2GDAD



  SUBROUTINE ZLEGDIR(PF,PN,PLEG,DDWT,KTRUNC,KNJ,KLEV,KNJDIM,KLEVDIM,KNDIM) 3
!**s/r ZLEGDIR  - Direct Legendre transform restricted to
!
!*Arguments
!*     i   PF(KNJDIM,KLEVDIM)   : field in physical space
!*     o   PN(0:KNDIM, KLEVDIM ): spectral coefficients
!*     o   PLEG(0:KNDIM, KNJDIM): Legendre polynomials evaluated at the Gaussian latitudes
!*     i   DDWT(KNJDIM)          : weights of the Gaussian quadrature
!*     i   KNJ                  : number of Gaussian latitudes
!*     i   KTRUNC               : spectral truncation
!*     i   KLEV                 : number of fields to transform
!*     i   KNJDIM               : dimensioning of the field (in latitude)
!*     i   KLEVDIM              : dimensioning of the field (in KLEV)
!*     I   KNDIM                : dimensioning of the field (in KTRUNC)
      IMPLICIT NONE

      INTEGER :: KNJ, KLEV, KTRUNC, KNJDIM, KLEVDIM, KNDIM
      REAL(8) :: PF(KNJDIM,4*KLEVDIM), PN(0:KNDIM, 4*KLEVDIM), PLEG(0:KNDIM,KNJDIM)
      REAL(8) :: DDWT(KNJ)

      INTEGER :: J, JN
      REAL(8), ALLOCATABLE :: ZWORK(:,:)

      ALLOCATE(ZWORK(0:KTRUNC,KNJ))
      DO J = 1, KNJ
         DO JN = 0, KTRUNC
            ZWORK(JN,J) = PLEG(JN,J)*DDWT(J)
         END DO
      END DO

      !CALL DGEMM('N','N',ZWORK(0,1),KTRUNC+1,'N',PF(1,1),KNJDIM,'N',PN(0,1),KNDIM+1,KNDIM+1,KNJ,KLEV) 
      CALL DGEMM('N','N',KNDIM+1,KLEV,KNJ,1.0d0,ZWORK(0,1),KTRUNC+1,PF(1,1),KNJDIM,0.0d0,PN(0,1),KNDIM+1) 

      DEALLOCATE(ZWORK)

  END SUBROUTINE ZLEGDIR



  SUBROUTINE ZLEGINV(PF,PN,PLEG,DDWT,KTRUNC,KNJ,KLEV,KNJDIM,KLEVDIM,KNDIM) 1
!***s/r ZLEGINV  - Direct Legendre transform restricted to
!*     .           fields that vary with latitude only
!*Arguments
!*     o   PF(KNJDIM,KLEVDIM)  : field in physical space
!*     i   PN(0:KNDIM, KLEVDIM): spectral coefficients
!*     i   PLEG(0:KNDIM,KNJDIM): Legendre functions evaluated at the KNJ Gaussian
!*     .                         latitudes
!*     i   DDWT(KNJDIM)         : weights of the Gaussian quadrature
!*     i   KNJ                 : number of Gaussian latitudes
!*     i   KTRUNC              : spectral truncation
!*     i   KLEV                : number of fields to transform
!*     i   KNJDIM              : dimensioning of the field (in latitude)
!*     i   KLEVDIM             : dimensioning of the field (in KLEV)
!*     I   KNDIM               : dimensioning of the field (in KTRUNC)
      IMPLICIT NONE

      INTEGER :: KNJ, KLEV, KTRUNC, KNJDIM, KLEVDIM, KNDIM
      REAL(8) :: PF(KNJDIM,4*KLEVDIM), PN(0:KNDIM, 4*KLEVDIM), PLEG(0:KNDIM,KNJDIM)
      REAL(8) :: DDWT(KNJDIM)

      INTEGER :: J, JN
      REAL(8), ALLOCATABLE :: ZWORK(:,:)

      ALLOCATE(ZWORK(0:KTRUNC,KNJ))

      DO J = 1, KNJ
         DO JN = 0, KTRUNC
            ZWORK(JN,J) = PLEG(JN,J)
         END DO
      END DO

      !CALL DGEMUL(ZWORK(0,1),KTRUNC+1,'T',PN(0,1),KNDIM+1,'N',PF(1,1),KNJDIM,KNJ,KTRUNC+1,KLEV) 
      CALL DGEMM('T','N',KNJ,KLEV,KTRUNC+1,1.0d0, ZWORK(0,1),KTRUNC+1,PN(0,1),KNDIM+1,0.0d0,PF(1,1),KNJDIM) 

      DEALLOCATE(ZWORK)

  END SUBROUTINE ZLEGINV



  SUBROUTINE ZLEGINV2(PF,PN,PLEG,KTRUNC,KNJ,KLEV,KNJDIM,KLEVDIM,KNDIM) 6
!***s/r ZLEGINV2  - Direct Legendre transform restricted to
!*     .            fields that vary with latitude only
!*Arguments
!*     o   PF(KNJDIM,KLEVDIM)  : field in physical space
!*     i   PN(0:KNDIM, KLEVDIM): spectral coefficients
!*     i   PLEG(0:KNDIM,KNJDIM): Legendre functions evaluated at the KNJ Gaussian
!*     .                         latitudes
!*     i   KNJ                 : number of Gaussian latitudes
!*     i   KTRUNC              : spectral truncation
!*     i   KLEV                : number of fields to transform
!*     i   KNJDIM              : dimensioning of the field (in latitude)
!*     i   KLEVDIM             : dimensioning of the field (in KLEV)
!*     I   KNDIM               : dimensioning of the field (in KTRUNC)
      IMPLICIT NONE

      INTEGER :: KNJ, KLEV, KTRUNC, KNJDIM, KLEVDIM, KNDIM
      REAL(8) :: PF(KNJDIM,KLEVDIM), PN(0:KNDIM, KLEVDIM), PLEG(0:KNDIM,KNJDIM)

      INTEGER :: J, JN
      REAL(8), ALLOCATABLE :: ZWORK(:,:)

      ALLOCATE(ZWORK(0:KTRUNC,KNJ))

      DO J = 1, KNJ
         DO JN = 0, KTRUNC
            ZWORK(JN,J) = PLEG(JN,J)
         END DO
      END DO

      !CALL DGEMUL(ZWORK(0,1),KTRUNC+1,'T',PN(0,1),KNDIM+1,'N',PF(1,1),KNJDIM,KNJ,KTRUNC+1,KLEV) 
      CALL DGEMM('T','N',KNJ,KLEV,KTRUNC+1,1.0d0,ZWORK(0,1),KTRUNC+1,PN(0,1),KNDIM+1,0.0d0,PF(1,1),KNJDIM) 

      DEALLOCATE(ZWORK)

  END SUBROUTINE ZLEGINV2



  SUBROUTINE BHI_Finalize() 1
    implicit none

    deallocate(pressureProfile_M)
    deallocate(pressureProfile_T)
    deallocate(PtoT)
    deallocate(tantheta)
    deallocate(rgsig)
    deallocate(tgstdbg)
    deallocate(rgsigtb)
    deallocate(rgsigpsb)
    deallocate(corns)
    deallocate(rstddev)

  END SUBROUTINE BHI_Finalize

  !--------------------------------------------------------------------------
  ! uvwi2uv
  !--------------------------------------------------------------------------

  subroutine uvwi2uv(statevector) 2,3
    !
    ! s/r uvwi2uv: conversion of wind images to physical winds
    ! (Remark: this subroutine is self-adjoint)
    !
    implicit none
    type(struct_gsv) :: statevector

    integer :: jstep, jlev, jlat, jlon, lon1, lon2, lat1, lat2, nlev_gsv

    real(8), pointer :: uu_ptr(:,:,:,:),vv_ptr(:,:,:,:)
    
    uu_ptr => gsv_getField(statevector,'UU')
    vv_ptr => gsv_getField(statevector,'VV')
    
    lon1 = statevector%myLonBeg
    lon2 = statevector%myLonEnd
    lat1 = statevector%myLatBeg
    lat2 = statevector%myLatEnd
    nlev_gsv = gsv_getNumLev(statevector,'MM')

!$OMP PARALLEL
!$OMP DO PRIVATE (jlat,jstep,jlev,jlon)
    do jlat = lat1, lat2
      do jstep = 1, statevector%numStep
        do jlev = 1, nlev_gsv
          do jlon = lon1, lon2
            uu_ptr(jlon,jlev,jlat,jstep) = gaus_conphy(jlat) * uu_ptr(jlon,jlev,jlat,jstep)
            vv_ptr(jlon,jlev,jlat,jstep) = gaus_conphy(jlat) * vv_ptr(jlon,jlev,jlat,jstep)
          end do
        end do
      end do
    end do
!$OMP END DO
!$OMP END PARALLEL

  END SUBROUTINE uvwi2uv

END MODULE BmatrixHI