!-------------------------------------- LICENCE BEGIN ------------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer, 
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms 
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer 
!version 3 or (at your option) any later version that should be found at: 
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html 
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software; 
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), 
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
***s/r hzd_solfft - parallel direct sol_8ution of high-order diffusion 
*                   equation with ffft8 

#include "model_macros_f.h"

       subroutine hzd_solfft (F_sol, F_Rhs_8, F_pri_8 , 46
     %                        F_a_8, F_c_8, F_deltai_8,
     %             minx1, maxx1, minx2, maxx2, nx1, nx2, nx3, F_pwr,
     %             minx,maxx,miny,maxy,gnk,gni,nil,njl,nkl,
     %             F_opsxp0_8, F_opsyp0_8,F_cdiff,F_npex,F_npey)
*
      implicit none
*
      integer  minx1, maxx1, minx2, maxx2 , nx1, nx2, nx3, F_pwr,
     $         minx , maxx , miny , maxy  , gnk, gni,
     $         njl  , nkl  , nil  , F_npex, F_npey
      real*8  F_opsxp0_8(*), F_opsyp0_8(*),F_pri_8,
     $            F_a_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3),
     $            F_c_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3),
     $       F_deltai_8(1:F_pwr,1:F_pwr,minx2:maxx2,nx3),
     $       F_Rhs_8
      real   F_cdiff, F_sol(minx:maxx,miny:maxy,gnk)
*
*author
*     Abdessamad Qaddouri
*
*revision
* v2_10 - Qaddouri A.        - initial version
* v3_02 - J. P. Toviessi     - remove data overflow bug
* v3_10 - Corbeil & Desgagne & Lee - AIXport+Opti+OpenMP
* v3_11 - Corbeil L.        - new RPNCOMM transpose
*
*object
*
*arguments
*  Name        I/O                 Description
*----------------------------------------------------------------
*  F_sol        I/O      r.h.s. and result of horizontal diffusion
*  F_Rhs_8         I        work vector
*
*----------------------------------------------------------------
*
**
#include "ptopo.cdk"
      real*8   fdg1_8 ( miny:maxy, minx1:maxx1, gni+F_npex ),
     $         fdg2_8 (minx1:maxx1,minx2:maxx2, nx3+F_npey ),
     $         dn3_8 (minx1:maxx1,minx2:maxx2,F_pwr,nx3),
     $         sol_8 (minx1:maxx1,minx2:maxx2,F_pwr,nx3),
     $         fwft_8( miny:maxy ,minx1:maxx1,gni+2+F_npex)
      real*8   ZERO_8, b_8, fact_8
      parameter( ZERO_8 = 0.0 )      
      integer o1,o2,i,j,k,jw
      integer ki, kkii, ki0, kin, kilon, kitotal
      integer pi0,pin
**
*     __________________________________________________________________
*

      kilon = (maxx2-minx2+1 +Ptopo_npeOpenMP)/Ptopo_npeOpenMP

*
***          F_Rhs_8(minx:maxx,miny:maxy,gnk)
*       
* Resolution
*
      fact_8=((-1)**F_pwr)*dble(F_cdiff)
      call rpn_comm_transpose48 ( F_sol, Minx, Maxx, Gni,1, (Maxy-Miny+1),
     %                          (Maxy-Miny+1),minx1, maxx1, Gnk, fdg1_8, 1,
     %                             fact_8,0.d0)
!$omp parallel private(ki0,kin,pi0,pin)
!$omp do
      do i = 1, Gni
         do k = 1, Nkl
         do j = 1, njl
            fdg1_8(j,k,i) = F_opsxp0_8(gni+i)*fdg1_8(j,k,i)
         enddo
         enddo
         do k = 1, Nkl
         do j= njl+1,maxy
            fwft_8(j,k,i) = ZERO_8
         enddo
         enddo
*
         do k= Nkl+1, maxx1
            do j= miny,maxy
               fwft_8(j,k,i) = ZERO_8
            enddo
         enddo
*
         do k=1,nkl
            do j= miny,0
               fwft_8(j,k,i) = ZERO_8
            enddo
            do j=1,njl
               fwft_8(j,k,i) = fdg1_8(j,k,i)
            enddo
         enddo
      enddo
!$omp enddo
!$omp do
      do i= 1,gni+2
         do k=minx1,0
            do j= miny,maxy
               fwft_8(j,k,i) = ZERO_8
            enddo
         enddo
      enddo
!$omp enddo
*
*     projection ( wfft = x transposed * g )
*
!$omp do
      do k=1,Nkl
         call ffft8 (fwft_8(miny,k,1),(Maxy-Miny+1)*(maxx1-minx1+1),
     %                     1, (Maxy-Miny+1) , -1 )
      enddo
!$omp enddo
*

!      b_8 = F_pri_8
!$omp do
      do i = 0, (Gni)/2
         do k = 1, Nkl
*JJ         do jw = 1, (Maxy-Miny+1)
            do jw = 1, njl
               fwft_8(jw,k,2*i+1) = F_pri_8 * fwft_8(jw,k,2*i+1)
               fwft_8(jw,k,2*i+2) = F_pri_8 * fwft_8(jw,k,2*i+2)
            enddo
         enddo
      enddo
!$omp enddo
!$omp do
      do k = 1, Nkl
         do j = 1, (Maxy-Miny+1)
            fwft_8(j,k,Gni+2) = ZERO_8
            fwft_8(j,k,2)     = fwft_8(j,k,1)
         enddo
      enddo
!$omp enddo
!$omp single
*
      call rpn_comm_transpose (fwft_8(Miny,1,2),Miny,Maxy,nx3,
     $            (Maxx1-Minx1+1),minx2, maxx2,gni,fdg2_8,2,2)
!$omp end single
*
* cote droit
*
!$omp do
      do j = 1, nx3
      do o1= 1, F_pwr
      do i = minx2, maxx2
      do k = minx1, maxx1
         sol_8(k,i,o1,j) = ZERO_8
         dn3_8(k,i,o1,j) = ZERO_8
      enddo
      enddo
      enddo
      do i = 1, nx2
      do k = 1, nx1
         dn3_8(k,i,1,j)= F_opsyp0_8(nx3+j)*fdg2_8(k,i,j)
      enddo
      enddo
      enddo
!$omp enddo
*
* resolution du systeme blok-tridiagonal
*
* aller
!$omp do
      do o1= 1,F_pwr
      do i = 1, nx2
      do k= 1, nx1
         sol_8(k,i,o1,1) = dn3_8(k,i,o1,1)
      enddo
      enddo
      enddo
!$omp enddo
*
!$omp do
      do kkii = 1, Ptopo_npeOpenMP
         ki0 = minx2 + kilon*(kkii-1)
         kin = min(ki0+kilon-1, maxx2)
         pi0 = 1+ kilon*(kkii-1)
         pin = min(pi0+kilon-1, nx2)

         do j = 2, nx3
            do o1= 1, F_pwr
               do o2= 1, F_pwr
                  do i = ki0, kin
                     do k = 1, nx1
                        sol_8(k,i,o1,j)= sol_8(k,i,o1,j)
     $                       + F_a_8(o1,o2,i,j)*sol_8(k,i,o2,j-1)
                     enddo
                  enddo
               enddo
            enddo
            do o1= 1,F_pwr
               do i = pi0, pin
                  do k= 1, nx1
                     sol_8(k,i,o1,j) = dn3_8(k,i,o1,j) - sol_8(k,i,o1,j)
                  enddo
               enddo
            enddo
         enddo
      enddo
!$omp enddo
*
* scale le cote droit pour retour
*
!$omp do
      do j = 1, nx3
         do o1= 1, F_pwr
         do i = minx2, maxx2
         do k = minx1, maxx1
            dn3_8(k,i,o1,j) = 0.0
         enddo
         enddo
         enddo
         do o2=1,F_pwr
         do o1=1,F_pwr
         do i= minx2,maxx2
         do k= minx1,maxx1
            dn3_8(k,i,o1,j)= dn3_8(k,i,o1,j)
     $                       + F_deltai_8(o1,o2,i,j)*sol_8(k,i,o2,j)
         enddo
         enddo
         enddo
         enddo
      enddo
!$omp enddo
*
* retour
*
!$omp do
      do j = 1, nx3-1
         do o1= 1, F_pwr
            do i = 1, nx2
               do k = 1, nx1
                  sol_8(k,i,o1,j)=0.0
               enddo
            enddo
         enddo
      enddo
!$omp enddo
!$omp do      
      do o1= 1, F_pwr
         do i = 1, nx2
            do k = 1, nx1     
               sol_8(k,i,o1,nx3)=dn3_8(k,i,o1,nx3)
            enddo
         enddo
      enddo
!$omp enddo
*
!$omp do
      do kkii = 1, Ptopo_npeOpenMP
         ki0 = minx2 + kilon*(kkii-1)
         kin = min(ki0+kilon-1, maxx2)
         pi0 = 1+ kilon*(kkii-1)
         pin = min(pi0+kilon-1, nx2)
         do j = nx3-1, 1, -1
            do o2= 1, F_pwr
               do o1= 1, F_pwr
                  do i = ki0, kin
                     do k = minx1, maxx1
                        sol_8(k,i,o1,j)= sol_8(k,i,o1,j)
     $                       + F_c_8(o1,o2,i,j)*sol_8(k,i,o2,j+1)
                     enddo
                  enddo
               enddo
            enddo

            do o1= 1, F_pwr
               do i = pi0, pin
                  do k = 1, nx1
                     sol_8(k,i,o1,j)=dn3_8(k,i,o1,j)-sol_8(k,i,o1,j)
                  enddo
               enddo
            enddo
         enddo
      enddo
!$omp enddo
*     
!$omp do
      do j = 1, nx3
      do i = 1, nx2
      do k = 1, nx1
         fdg2_8(k,i,j)=sol_8(k,i,F_pwr,j)
      enddo
      enddo
      enddo
!$omp enddo
*
*     inverse projection ( r = x * w )
*
!$omp single
      call rpn_comm_transpose
     $     ( fdg1_8 , Miny, Maxy, nx3, (Maxx1-Minx1+1),
     $               minx2, maxx2,gni, fdg2_8,- 2, 2 )
*
!$omp end single
!$omp do
      do i = 1, Gni
         do k = 1, nkl
            do j=1,njl
               fwft_8(j,k,i+1) = fdg1_8(j,k,i)
            enddo
         enddo
      enddo
!$omp enddo
!$omp do
      do k = 1, nkl
         do j = 1, (Maxy-Miny+1)
            fwft_8(j,k,1)     = fwft_8(j,k,2)
            fwft_8(j,k,2)     = ZERO_8
            fwft_8(j,k,Gni+2) = ZERO_8
         enddo
      enddo
!$omp enddo
*
!$omp do
      do k=1,Nkl
         call ffft8 (fwft_8(Miny,k,1),(Maxy-Miny+1)*(maxx1-minx1+1),1,
     %                               (Maxy-Miny+1), +1 )
      enddo
!$omp enddo
!$omp end parallel

      call rpn_comm_transpose48 ( F_sol, Minx, Maxx, Gni,1, (Maxy-Miny+1),
     %                          (Maxy-Miny+1),minx1, maxx1, Gnk, fwft_8, -1,
     %                             1.d0,0.d0)

*
*     __________________________________________________________________
*
      return
      end