!-------------------------------------- LICENCE BEGIN ------------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer, 
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms 
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer 
!version 3 or (at your option) any later version that should be found at: 
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html 
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software; 
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), 
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
*     subroutine 'qcfft8' - multiple fast real shifted cosine transform

*     real shifted cosine transform of length n
*     implementation inspired by Numerical Recipes pp. 513
*     but with a different normalization

*author 
*     j.cote, rpn     - Sept 1999

*revision
* v3_30 - McTaggart-Cowan   - call qcfft8_vec if defined (NEC)
* v3_30 - Qaddouri/Lee      - initialize work vector to zero (LAM bug)

*     a     is the array containing input and output data
*     inc   is the increment within each data 'vector'
*          (e.g. inc=1 for consecutively stored data)
*     jump  is the increment between the start of each data vector
*     lot   is the number of data vectors
*     isign = +1 for transform from fourier to gridpoint
*           = -1 for transform from gridpoint to fourier

*     definition of transform:
*     -------------------------

*     isign=+1: r(i) = sum(k=0,...,n-1)(a(k)*cos((i+1/2)*k*pi/n))

*     isign=-1: r(k) = sum(i=0,...,n-1)(a(i)*cos((i+1/2)*k*pi/n))
*                      * ((2-delta(k,0))/n)

*     Note for 'a' stored as a(n1,n2) then
*
*        for a transform along the first dimension
*
*           inc   = 1
*           jump  = n1
*
*        for a transform along the second dimension
*
*           inc   = n1
*           jump  = 1
*
*     The subroutine SETSCQR must have been called to set-up
*     the commons COMFFT8 and COMFFT8X
*
*-----------------------------------------------------------------------
*

      subroutine qcfft8( a, inc, jump, lot, isign ) 8,1
      implicit none
*
      integer inc, jump, lot, isign
      real*8  a(*)
*
      integer n, m, nstore
      real*8  ssin, ccos, qsin
      pointer ( ptss,ssin(n-m-1) ), ( ptcc,ccos(n-m-1) )
      pointer ( ptqs,qsin(0:m-1) )
      common    / comfft8x / ptss, ptcc, ptqs, n, m, nstore
*
      integer i, j, k, is, k1, kk, j0, jlot, ija, ijw
      real*8  ai, as, ya, ys, c, s, rr, w(511*nstore),zero, half, 
     $        one, two, four
      parameter( zero=0.0, half=0.5, one=1.0, two=2.0, four=4.0 )
*
*-----------------------------------------------------------------------
*
      ija(i,j) = 1 + (j0+j-1)*jump + i*inc
      ijw(i,j) = j + i*511
*
#if defined (NEC)
      call qcfft8_vec ( a, inc, jump, lot, isign )
      return 
#endif
*
      w(:)=0.0

*      
*VDIR NODEP
      do 100 j0=0,lot-1,511
      jlot  = min( 511, lot - j0 )

      if ( isign .eq. -1 ) then
*
*     transform from gridpoint to Fourier
*
*VDIR NODEP
         do i = 0 , m-1

            is = n - i - 1

*VDIR NODEP
            do j=1,jlot
               ai = a( ija(i ,j) )
               as = a( ija(is,j) )
               ys = ai + as
               ya =  two *  qsin( i ) * ( as - ai )
               w( ijw(i ,j) ) = ys + ya
               w( ijw(is,j) ) = ys - ya
            enddo

         enddo

         if ( n .ne. 2 * m )  then
*VDIR NODEP
            do j=1,jlot
               w( ijw(m,j) ) = two * a( ija(m,j) )
            enddo
         endif

         call ffft8( w, 511, 1, jlot, -1 )

*VDIR NODEP
         do j=1,jlot
            a( ija(0,j) ) = w( ijw(0,j) ) * half
         enddo

*VDIR NODEP
         do k = 1 , m

            kk = 2*k
            k1 = kk + 1

            if ( k .lt. m .or. n .ne. 2 * m ) then
               c = ccos( k )
               s = ssin( k )
*VDIR NODEP
               do j=1,jlot
                  a( ija(kk-1,j) ) = -s*w( ijw(kk,j) )+c*w( ijw(k1,j) )
                  a( ija(kk  ,j) ) =  c*w( ijw(kk,j) )+s*w( ijw(k1,j) )
               enddo
            else
*VDIR NODEP
               do j=1,jlot
                  a( ija(kk-1,j) ) = -w( ijw(kk,j) )
               enddo
            endif

         enddo
         if ( n .eq. 2 * m )  then
*VDIR NODEP
            do j=1,jlot
               a( ija(n-1,j) ) = a( ija(n-1,j) ) * half
            enddo
         endif
*VDIR NODEP

         do k = 2*m-3 , 1 , -2
*VDIR NODEP
            do j=1,jlot
               a( ija(k,j) ) = a( ija(k,j) ) + a( ija(k+2,j) )
            enddo
         enddo
*
      elseif ( isign .eq. +1 ) then
*
*     transform from Fourier to gridpoint
*
*VDIR NODEP
         do j=1,jlot
            w( ijw(0,j) ) = a( ija(0,j) )
            w( ijw(1,j) ) = zero
         enddo

*VDIR NODEP

         do k = 2 , n-1 , 2
*VDIR NODEP
            do j=1,jlot
               w( ijw(k,j) ) = a( ija(k,j) ) * half
            enddo
         enddo
*VDIR NODEP

         do k = 3 , 2*m-1 , 2
*VDIR NODEP
            do j=1,jlot
              w( ijw(k,j) ) = ( a( ija(k-2,j) ) - a( ija(k,j) ) ) * half
            enddo
         enddo
         if ( n .eq. 2 * m )  then
            c = one
         else
            c = half
         endif
*VDIR NODEP
         do j=1,jlot
            w( ijw(2*m+1,j) ) = a( ija(2*m-1,j) ) * c
         enddo

*VDIR NODEP
         do k = 1 , m

            if ( k .lt. m .or. n .ne. 2 * m ) then
               c = ccos( k )
               s = ssin( k )
            else
               c = zero
               s = one
            endif

            kk = 2*k
            k1 = kk + 1
*VDIR NODEP
            do j=1,jlot
               rr = w( ijw(kk,j) )
               w( ijw(kk,j) ) =  c * rr - s * w( ijw(k1,j) )
               w( ijw(k1,j) ) =  s * rr + c * w( ijw(k1,j) )
            enddo

         enddo

         call ffft8( w, 511, 1, jlot, +1 )

*VDIR NODEP
         do i = 0 , m-1

            is = n - i - 1

*VDIR NODEP
            do j=1,jlot
               ys = ( w( ijw(i ,j) ) + w( ijw(is,j) ) ) * half
               ya = ( w( ijw(is,j) ) - w( ijw(i ,j) ) ) /
     %              ( four * qsin( i ) )
               a( ija(i ,j) ) = ys + ya
               a( ija(is,j) ) = ys - ya

            enddo

         enddo
         if ( n .ne. 2 * m )  then
*VDIR NODEP
            do j=1,jlot
               a( ija(m,j) ) = w( ijw(m,j) )
            enddo
         endif

      endif

  100 continue
*
*-----------------------------------------------------------------------
*
      return
      end