!-------------------------------------- LICENCE BEGIN ------------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer, 
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms 
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer 
!version 3 or (at your option) any later version that should be found at: 
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html 
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software; 
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), 
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
***s/r ftridi6 - quasi-tridiagonal operator in one direction
*

      subroutine ftridi7 (F_r, F_rhs, F_a_8, F_b_8, F_c_8, 1
     %                     F_deltai,F_deltaj, NI, NJ, F_period_L) 
*
#include "impnone.cdk"
*
      logical F_period_L
      integer F_deltai, F_deltaj, NI, NJ
      real    F_r(*), F_rhs(*)
      real*8  F_a_8(NI), F_b_8(NI), F_c_8(NI)
*
*author
*   jean cote - october 1996 - generalization and optimization for SX4
*                              of fortran version of ctridi
*
*revision
* v2_00 - Lee/Patoine       - initial MPI version (from ftridi v1_03)
* v3_10 - Corbeil & Desgagne & Lee - AIXport+Opti+OpenMP
*
*object
*         see above id
*
*arguments
*  Name        I/O                 Description
*----------------------------------------------------------------
* F_r          O    - result in an array
* F_rhs        I    - right hand side
* F_a_8        I    - lower  diagonal of operator
* F_b_8        I    - middle diagonal of operator
* F_c_8        I    - upper  diagonal of operator
* F_deltai     I    - distance between successive elements in the
*                     direction of work
* F_deltaj     I    - distance between successive elements in the
*                     transverse direction
* NI           I    - dimension along the direction of work
* NJ           I    - dimension of the transverse direction
* F_period_L   I    - .true. if periodic in work direction
*
*notes
*     F_a_8,F_b_8,F_c_8 tridiagonal matrix acting on the i index,
*     F_a_8(1) and F_c_8(NI) contains the information for periodicity,
*     work done for NJ F_rhs vectors,
*     index of (i,j) element = 1+(i-1)*F_deltai+(j-1)*F_deltaj,
*     F_r and F_rhs can share the same space in memory.
**
*
      integer i, j, j0, j1, j2, j3, j4, h1, h2, h3, h4, i1, i2, i3, n
      real    s1, s2, s3, s4, s5, s6, s7, s8, s41, s51, s61, s71, s81
      real    v(256), w(256)
*
c     call tmg_start(98,'ftridi')
!$omp parallel private(v,w , i, j, j0, j1, j2, j3, j4, h1, h2, h3, h4,
!$omp&  i1, i2, i3, n, s1, s2, s3, s4, s5, s6, s7, s8, s41, s51, s61,
!$omp&  s71, s81)
!$omp do
      do 300 j0=0,NJ-1,256
         n = min( 256, NJ - j0 )
         h1 = 1 - F_deltaj + j0 * F_deltaj
         h2 = h1 + F_deltai
         j1 = h1
         j2 = h2
         if ( F_period_L ) then
            j3 = h1 + ( NI - 1 ) * F_deltai
*VDIR NODEP
c*VDIR VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               j2    = j2 + F_deltaj
               j3    = j3 + F_deltaj
               v(j)  = F_rhs(j1)
               F_r(j1) = (F_b_8(1)*v(j)+F_c_8(1)*F_rhs(j2)) + F_a_8(1)*F_rhs(j3)
               w(j)  = v(j)
            enddo
         else
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               j2    = j2 + F_deltaj
               v(j)  = F_rhs(j1)
               F_r(j1) = F_b_8(1) * v(j) + F_c_8(1) * F_rhs(j2)
            enddo
         endif
         h1 = h1 + F_deltai
         h2 = h1 + F_deltai
         h3 = h2 + F_deltai
         h4 = h3 + F_deltai
         i1 = 2
         i2 = i1 + 1 
         i3 = i2 + 1 
         do 200 i=2,NI-3,3
            j1 = h1
            j2 = h2
            j3 = h3
            j4 = h4
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do 100 j=1,n
               j1    = j1 + F_deltaj
               j2    = j2 + F_deltaj
               j3    = j3 + F_deltaj
               j4    = j4 + F_deltaj
               s1    = F_rhs(j1)
               s2    = F_rhs(j2)
               s3    = F_rhs(j3)
               F_r(j1) = F_a_8(i1) * v(j) + F_b_8(i1) * s1 + F_c_8(i1) * s2
               F_r(j2) = F_a_8(i2) * s1   + F_b_8(i2) * s2 + F_c_8(i2) * s3
               F_r(j3) = F_a_8(i3) * s2 + F_b_8(i3) * s3 + F_c_8(i3) * F_rhs(j4)
               v(j)  = s3
  100       continue
            h1 = h1 + 3 * F_deltai
            h2 = h1 +     F_deltai
            h3 = h2 +     F_deltai
            h4 = h3 +     F_deltai
            i1 = i1 + 3 
            i2 = i1 + 1 
            i3 = i2 + 1 
  200    continue
         j1 = h1
         j2 = h2
         j3 = h3
         if ( mod( NI, 3 ) .eq. 1 .and. F_period_L ) then
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               j2    = j2 + F_deltaj
               j3    = j3 + F_deltaj
               s41   = F_rhs(j1)
               s51   = F_rhs(j2)
               s61   = F_rhs(j3)
               F_r(j1) =   F_a_8(i1) * v(j) + F_b_8(i1) * s41 + F_c_8(i1) * s51
               F_r(j2) =   F_a_8(i2) * s41  + F_b_8(i2) * s51 + F_c_8(i2) * s61
               F_r(j3) = ( F_a_8(NI)*s51 + F_b_8(NI)*s61 ) + F_c_8(NI)*w(j)
            enddo
         elseif ( mod( NI, 3 ) .eq. 0 .and. F_period_L ) then
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               j2    = j2 + F_deltaj
               s71   = F_rhs(j1)
               s81   = F_rhs(j2)
               F_r(j1) =   F_a_8(i1) * v(j) + F_b_8(i1) * s71 + F_c_8(i1) * s81
               F_r(j2) = ( F_a_8(NI)*s71  + F_b_8(NI)*s81 ) + F_c_8(NI)*w(j)
            enddo
         elseif ( mod( NI, 3 ) .eq. 2 .and. F_period_L ) then
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               F_r(j1) = (F_a_8(NI)*v(j) + F_b_8(NI)*F_rhs(j1)) + F_c_8(NI)*w(j)
            enddo
         elseif ( mod( NI, 3 ) .eq. 1 ) then
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               j2    = j2 + F_deltaj
               j3    = j3 + F_deltaj
               s4    = F_rhs(j1)
               s5    = F_rhs(j2)
               s6    = F_rhs(j3)
               F_r(j1) = F_a_8(i1) * v(j) + F_b_8(i1) * s4 + F_c_8(i1) * s5
               F_r(j2) = F_a_8(i2) * s4   + F_b_8(i2) * s5 + F_c_8(i2) * s6
               F_r(j3) = F_a_8(NI) * s5   + F_b_8(NI) * s6
            enddo
         elseif ( mod( NI, 3 ) .eq. 0 ) then
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               j2    = j2 + F_deltaj
               s7    = F_rhs(j1)
               s8    = F_rhs(j2)
               F_r(j1) = F_a_8(i1) * v(j) + F_b_8(i1) * s7 + F_c_8(i1) * s8
               F_r(j2) = F_a_8(NI) * s7   + F_b_8(NI) * s8
            enddo
         elseif ( mod( NI, 3 ) .eq. 2 ) then
*VDIR NODEP
c, VREG(v(noload,nostore),w(noload,nostore))
            do j=1,n
               j1    = j1 + F_deltaj
               F_r(j1) = F_a_8(NI) * v(j)   + F_b_8(NI) * F_rhs(j1)
            enddo
         endif
  300 continue
!$omp enddo
!$omp end parallel
c     call tmg_stop(98)
      return
      end