!-------------------------------------- LICENCE BEGIN ------------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer, 
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms 
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer 
!version 3 or (at your option) any later version that should be found at: 
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html 
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software; 
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), 
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
***s/r sol_fft8_2 - parallel direct solution of horizontal Helmholtz
*                   problem. With ffft8
*
#include "model_macros_f.h"
*

      subroutine sol_fft8_2( sol, Rhs, pri, 1,10
     $                       Minx, Maxx, Miny, Maxy, njl,
     $                       Minz, Maxz, Nk, Nkl, Gni, Gnj,
     $                                 Minij, Maxij, L_nij,
     $                       minx1, maxx1, minx2, maxx2,nx3,
     $                       F_npex1, F_npey1, ai, bi, ci,
     $                       fdg2,fdwfft)
#include "impnone.cdk"
#include "ptopo.cdk"
*
*
*
*author    Abdessamad Qaddouri- JULY 1999
*
*revision
* v1_96 - alain patoine            - rename sol_fft8, sol_fft8_2 (calling sequence changed)
* v3_10 - Corbeil & Desgagne & Lee - AIXport+Opti+OpenMP
*
*arguments
*   o      - Sol     - result
*   i      - Rhs     - r.h.s. of elliptic equation
*   i      - Gni     - number of points in x-direction
*   i      - Gnj     - number of points in y-direction
*
*modules
      external ffft8, rpn_comm_transpose

      integer  F_npex1, F_npey1

      integer  minx1, maxx1, minx2, maxx2,nx3

      Real*8   ai(minx1:maxx1,minx2:maxx2,nx3),
     $         bi(minx1:maxx1,minx2:maxx2,nx3),
     $         ci(minx1:maxx1,minx2:maxx2,nx3)


      integer Minx, Maxx, Miny, Maxy, njl
      integer Minz, Maxz, Nk, Nkl
      integer Gni, Gnj
      integer Minij, Maxij, L_nij
      real*8  Sol(Minx:Maxx,Miny:Maxy,Nk), Rhs(Minx:Maxx,Miny:Maxy,Nk)
      real*8  pri

**
      real*8 fdwfft(Miny:Maxy,Minz:Maxz,Gni+2+F_npex1)
      real*8   fdg2(Minz:Maxz,Minij:Maxij,Gnj+F_npey1)
*
      integer i,  j, k, ki, jw, jr, err
      integer kkii, ki0, kin, kitotal, kilon
      real*8 zero, one
      parameter( zero = 0.0 )
      parameter( one  = 1.0 )
*
      call tmg_start0 (45, 'TRP_1    ' )
      call rpn_comm_transpose( Rhs, Minx, Maxx, Gni, (Maxy-Miny+1),
     %                         Minz, Maxz, Nk, fdwfft, 1, 2 )
      call tmg_stop0(45)

*     projection ( wfft = x transposed * g )
      call tmg_start0 (46, 'SOL_4    ' )
!$omp parallel private(ki0,kin,kkii,jr) 
!$omp%          shared(kitotal,kilon,ai,bi,ci)
!$omp do
      do i= 1,Gni
         do k= Minz, nkl
            do j= njl+1,Maxy
                  fdwfft(j,k,i)=zero
            enddo
         enddo

         do k= Nkl+1,Maxz
            do j= Miny,Maxy
                  fdwfft(j,k,i)=zero
            enddo
         enddo
      enddo
!$omp enddo
*
!$omp do
      do k=1,Nkl
      call ffft8(fdwfft(Miny,k,1),(Maxy-Miny+1)*(Maxz-Minz+1),
     %                                     1, (Maxy-Miny+1), -1 )
      enddo
!$omp enddo
c     call ffft8(fdwfft,(Maxy-Miny+1)*(Maxz-Minz+1),
c    %                                     1, (Maxy-Miny+1) * Nkl, -1 )

!$omp do
      do k = 1, Nkl
         do i = 0, (Gni)/2
            do jw = 1, (Maxy-Miny+1)
               fdwfft(jw,k,2*i+1) = pri * fdwfft(jw,k,2*i+1)
               fdwfft(jw,k,2*i+2) = pri * fdwfft(jw,k,2*i+2)
            enddo
         enddo

         do j = 1, (Maxy-Miny+1)
            fdwfft(j,k,Gni+2) = zero
            fdwfft(j,k,2)     = fdwfft(j,k,1)
         enddo
      enddo
!$omp enddo

*
!$omp single
      call tmg_start0 (47, 'TRP_2    ' )
      call rpn_comm_transpose
     $     ( fdwfft(1,1,2), Miny, Maxy, Gnj, (Maxz-Minz+1),
     $                         Minij, Maxij, Gni, fdg2, 2, 2 )
      call tmg_stop0(47)
!$omp end single
*
      kitotal = (Maxz-Minz+1)*L_nij
      kilon = (kitotal + Ptopo_npeOpenMP)/Ptopo_npeOpenMP

!$omp do
      do kkii = 1,Ptopo_npeOpenMP
          j =1
          ki0 = 1 + kilon*(kkii-1)
          kin = min(kitotal, kilon*kkii)
         
          do ki= ki0, kin
             fdg2(ki,1,j) = bi(ki,1,j)*fdg2(ki,1,j)
          enddo

          do j =2, Gnj
            jr =  j - 1
            do ki= ki0,kin
               fdg2(ki,1,j) = bi(ki,1,j)*fdg2(ki,1,j) - ai(ki,1,j)
     $                                          * fdg2(ki,1,jr)
            enddo
          enddo

          do j = Gnj-1, 1, -1
             jr =  j + 1
             do ki= ki0, kin
                fdg2(ki,1,j) = fdg2(ki,1,j) - ci(ki,1,j) * fdg2(ki,1,jr)
             enddo
          enddo
      enddo
!$omp enddo
*
!$omp single
      call tmg_start0 (48, 'TRP_3    ' )
      call rpn_comm_transpose
     $     ( fdwfft(1,1,2), Miny, Maxy, Gnj, (Maxz-Minz+1),
     $                        Minij, Maxij, Gni, fdg2,- 2, 2 )
      call tmg_stop0(48)
!$omp end single

!$omp do
      do k = 1, Nkl
        do j = 1, (Maxy-Miny+1)
            fdwfft(j,k,1)     = fdwfft(j,k,2)
            fdwfft(j,k,2)     = zero
            fdwfft(j,k,Gni+2) = zero
         enddo
      enddo
!$omp enddo

*     inverse projection ( r = x * w )

!$omp do
      do k=1, Nkl
      call ffft8( fdwfft(Miny,k,1), (Maxy-Miny+1) * (Maxz-Minz+1), 1,
     %                                    (Maxy-Miny+1), +1 )
      enddo
!$omp enddo
!$omp end parallel
      call tmg_stop0(46)
c     call ffft8( fdwfft, (Maxy-Miny+1) * (Maxz-Minz+1), 1,
c    %                                    (Maxy-Miny+1) * Nkl, +1 )
*
      call tmg_start0 (49, 'TRP_4    ' )
      call rpn_comm_transpose( Sol, Minx, Maxx, Gni, (Maxy-Miny+1),
     %                              Minz, Maxz, Nk, fdwfft, -1, 2 )
      call tmg_stop0(49)
*
      return
      end