!-------------------------------------- LICENCE BEGIN ------------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer, 
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms 
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer 
!version 3 or (at your option) any later version that should be found at: 
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html 
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software; 
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec), 
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
***s/r adw_tritrunc_lag3d - Tri-Lagrangian (truncated) interpolation.
*
#include "model_macros_f.h"
*

      subroutine adw_tritrunc_lag3d ( F_out, F_in, F_x, F_y, F_z, 2
     %                              F_num, F_mono_L, i0, in, j0, jn, kn )
*
      implicit none
*
      logical F_mono_L
*
      integer F_num, i0, in, j0, jn, kn
*
      real F_in(*)
*
      real F_out (F_num), F_x(F_num), F_y(F_num), F_z(F_num)
*
*authors
*     McTaggart-Cowan
*
*     (Based on adw_tricub_lag3d and adw_trilin v_3.2.1 with modifications
*     as per ECMWF http://www.ecmwf.int/research/ifsdocs/CY28r1/Dynamics/Dynamics-4-02.html)
*      
*revision
* v3_30 - McTaggart-Cowan - initial version 
*
*object
*     see id section
*
*arguments
*______________________________________________________________________
*              |                                                 |     |
* NAME         | DESCRIPTION                                     | I/O |
*--------------|-------------------------------------------------|-----|
* F_out        | result of interpolation                         |  o  |
* F_in         | field to interpolate                            |  i  |
*              |                                                 |     |
* F_x          | interpolation target X coordinate               |  i  |
* F_y          | interpolation target Y coordinate               |  i  |
* F_z          | interpolation target Z coordinate               |  i  |
*              |                                                 |     |
* F_num        | number of points to interpolate                 |  i  |
*              |                                                 |     |
* F_mono_L     | switch: .true. : monotonic interpolation        |  i  |
*______________|_________________________________________________|_____|
*
*implicits
#include "glb_ld.cdk"
#include "adw.cdk"
*
*notes
*  This algorithm is a truncated version of the full 3D Lagrangian
*  interpolation procedure (adw_trilag_3d).  Full Lagrangian interpolation
*  requires that the local calculation be done in a 4-point 3D cube,
*  thereby needing 64 values for each point.  This imposes an enormous
*  load on memory access during the gather/scatter operation.  In this
*  truncation, a 3D diamond rather than a cube is required for the
*  interpolation operation (32 values).  Interpolation to the points closest to the
*  back-trajectory origin is done using cubic function; however, interpolation
*  to points further from the origin are done linearly.  As a result,
*  each 3D truncated interpolation uses only 7 Lagragian interpolations
*  and 10 linear interpolations (compared to 21 Lagrangian interpolations
*  in the full 3D Lagrangian algorithm).
*
*  The organization of the diamond is shown in plan form here, along with
*  the order of each interpolation for the 'inner' layers (immediately above
*  and below the point of interest, indeces k and k+1), and 'outer' layers
*  (indeces k-1 and k+2).  The origin of the back-trajectory is denoted with
*  {} braces around the interpolation method used in the y-direction to
*  obtain it.  Both layers are plotted in the horizontal plane.  Cube points
*  not accessed are denoted with a '0', and those addressed and used by the
*  truncated algorithm are denoted with an 'X'.
*
*  Inner layers 2 x (2 linear; 3 cubic):
*	0		X -- linear  -- X		0
*				|
*				|
*				|
*	X ------------- X --- cubic --- X ------------- X
*				|
*			     {cubic}
*				|
*       X ------------- X --- cubic --- X ------------- X
*				|
*				|
*				|
*	0		X -- linear  -- X		0
*
*  Outer layers 2 x (3 linear):
*	0		0		0		0
*
*
*
*	0		X -- linear  -- X		0
*			 	|
*			    {linear}
*				|
*	0		X -- linear  -- X		0
*
*
*
*	0		0		0		0
*
*  The vertical interpolation is cubic (Lagrangian) through the
*  four points obtained using the layers shown above.  This interpolation
*  constitues the 7th (and final) higher order interpolation performed
*  by the truncated algorithm.
*
* **********************************************************************
      integer n,nijag,i,j,k,nij,iimax,jjmax,kkmax,ii,jj,kk
      logical zcubic_L
*


	integer count


      real prmin, prmax
*
      integer o1, o2, o3, o4

      real*8      a2, a3
      real*8  b1, b2, b3, b4
      real*8  c1, c2, c3, c4
      real*8      d2, d3

      real*8  p1, p2, p3, p4
*
      real*8 :: capx,capy,capz
      real*8 :: triprd,za,zb,zc,zd,rri,rrj,rrk,ra,rb,rc,rd
      triprd(za,zb,zc,zd)=(za-zb)*(za-zc)*(za-zd)
*
*     ----------------------------------------------------
*
      nij   = l_ni*l_nj
      nijag = Adw_nit * Adw_njt
*
      iimax = G_ni+2*Adw_halox-2
      jjmax = G_nj+Adw_haloy
      kkmax = l_nk-1
*



	count = 0




*
c!$omp parallel private(n,i,j,k,
c!$omp&  ii,jj,kk,capx,capy,capz,zcubic_L,
c!$omp&  o1, o2, o3, o4, a2, a3, b1, b2, b3, b4,
c!$omp&  c1, c2, c3, c4, d2, d3, p1, p2, p3, p4,
c!$omp&  rri,rrj,rrk,ra,rb,rc,rd,prmin,prmax)
c!$omp do
      do 100 k=1,kn
      do  90 j=j0,jn
      do  80 i=i0,in

	count = count+1


      n = (k-1)*nij + ((j-1)*l_ni) + i
*
      rri= F_x(n)
      ii = ( rri - Adw_x00_8 ) * Adw_ovdx_8
      ii = Adw_lcx( ii+1 ) + 1
      if ( rri .lt. Adw_bsx_8(ii) ) ii = ii - 1
      ii = max(2,min(ii,iimax))
      capx = (rri - Adw_bsx_8(ii)) / (Adw_bsx_8(ii+1) - Adw_bsx_8(ii))
*
      rrj= F_y(n)
      jj = ( rrj - Adw_y00_8 ) * Adw_ovdy_8
      jj = Adw_lcy( jj+1 ) + 1
      if ( rrj .lt. Adw_bsy_8(jj) ) jj = jj - 1
      jj = max(Adw_haloy,min(jj,jjmax))
      capy = (rrj - Adw_bsy_8(jj)) / (Adw_bsy_8(jj+1) - Adw_bsy_8(jj))
*
      rrk= F_z(n)
      kk = ( rrk - Adw_z00_8 ) * Adw_ovdz_8
      kk = Adw_lcz( kk+1 )
      if ( rrk .lt. Adw_bsz_8(kk) ) kk = kk - 1
      kk = min(kkmax-1,max(0,kk))
      capz = (rrk - Adw_bsz_8(kk)) / (Adw_bsz_8(kk+1) - Adw_bsz_8(kk))
*
      zcubic_L = (kk.gt.0) .and. (kk.lt.kkmax-1)
*
* *********************************************************************
*     x interpolation
* *********************************************************************
      ra = Adw_bsx_8(ii-1)
      rb = Adw_bsx_8(ii  )
      rc = Adw_bsx_8(ii+1)
      rd = Adw_bsx_8(ii+2)
      p1 = triprd(rri,rb,rc,rd)*Adw_xabcd_8(ii)
      p2 = triprd(rri,ra,rc,rd)*Adw_xbacd_8(ii)
      p3 = triprd(rri,ra,rb,rd)*Adw_xcabd_8(ii)
      p4 = triprd(rri,ra,rb,rc)*Adw_xdabc_8(ii)
*
      o2 = (kk-1)*nijag + (jj-Adw_int_j_off-1)*Adw_nit + (ii-Adw_int_i_off)
      o3 = o2+Adw_nit
*
      if(zcubic_L) then
        a2 = (1.d0 - capx) * F_in(o2) + capx * F_in(o2+1)
        a3 = (1.d0 - capx) * F_in(o3) + capx * F_in(o3+1)
      else
	a2 = 0.d0
	a3 = 0.d0
      endif
*
      o2 = o2 + nijag
      o3 = o3 + nijag
      o1 = o2-Adw_nit
      o4 = o3+Adw_nit
*
      if (F_mono_L) then
        prmax = max(F_in(o2),F_in(o2+1),F_in(o3),F_in(o3+1))
        prmin = min(F_in(o2),F_in(o2+1),F_in(o3),F_in(o3+1))
      else
	prmax = 0.d0
	prmin = 0.d0
      endif
      b1 = (1.d0 - capx) * F_in(o1) + capx * F_in(o1+1)	
      b2 = p1 * F_in  (o2-1) + p2 * F_in  (o2) + p3 * F_in  (o2+1) + p4 * F_in  (o2+2)
      b3 = p1 * F_in  (o3-1) + p2 * F_in  (o3) + p3 * F_in  (o3+1) + p4 * F_in  (o3+2)
      b4 = (1.d0 - capx) * F_in(o4) + capx * F_in(o4+1)
*
      o1 = o1 + nijag
      o2 = o2 + nijag
      o3 = o3 + nijag
      o4 = o4 + nijag
*
      if (F_mono_L) then
        prmax = max(prmax,F_in(o2),F_in(o2+1),F_in(o3),F_in(o3+1))
        prmin = min(prmin,F_in(o2),F_in(o2+1),F_in(o3),F_in(o3+1))
      endif
      c1 = (1.d0 - capx) * F_in(o1) + capx * F_in(o1+1)
      c2 = p1 * F_in  (o2-1) + p2 * F_in  (o2) + p3 * F_in  (o2+1) + p4 * F_in  (o2+2)
      c3 = p1 * F_in  (o3-1) + p2 * F_in  (o3) + p3 * F_in  (o3+1) + p4 * F_in  (o3+2)
      c4 = (1.d0 - capx) * F_in(o4) + capx * F_in(o4+1)
*
      o2 = o2 + nijag
      o3 = o3 + nijag
*
      if(zcubic_L) then
	d2 = (1.d0 - capx) * F_in(o2) + capx * F_in(o2+1)
	d3 = (1.d0 - capx) * F_in(o3) + capx * F_in(o3+1)
      else
	d2 = 0.d0
	d3 = 0.d0
      endif
* *********************************************************************
*     y interpolation
* *********************************************************************
      ra = Adw_bsy_8(jj-1)
      rb = Adw_bsy_8(jj  )
      rc = Adw_bsy_8(jj+1)
      rd = Adw_bsy_8(jj+2)
      p1 = triprd(rrj,rb,rc,rd)*Adw_yabcd_8(jj)
      p2 = triprd(rrj,ra,rc,rd)*Adw_ybacd_8(jj)
      p3 = triprd(rrj,ra,rb,rd)*Adw_ycabd_8(jj)
      p4 = triprd(rrj,ra,rb,rc)*Adw_ydabc_8(jj)
*
      if (zcubic_L) a2 = (1.d0 - capy) * a2 + capy * a3
      b1 = p1 * b1 + p2 * b2 + p3 * b3 + p4 * b4
      c1 = p1 * c1 + p2 * c2 + p3 * c3 + p4 * c4
      if (zcubic_L) d2 = (1.d0 - capy) * d2 + capy * d3
* *********************************************************************
*     z interpolation
* *********************************************************************
      if(zcubic_L) then
      ra = Adw_bsz_8(kk-1)
      rb = Adw_bsz_8(kk  )
      rc = Adw_bsz_8(kk+1)
      rd = Adw_bsz_8(kk+2)
      p1 = triprd(rrk,rb,rc,rd)*Adw_zabcd_8(kk+1)
      p2 = triprd(rrk,ra,rc,rd)*Adw_zbacd_8(kk+1)
      p3 = triprd(rrk,ra,rb,rd)*Adw_zcabd_8(kk+1)
      p4 = triprd(rrk,ra,rb,rc)*Adw_zdabc_8(kk+1)
*
      F_out(n) = p1 * a2 + p2 * b1 + p3 * c1 + p4 * d2
*
      else
*
      F_out(n) = (1.d0 - capz) * b1 + capz * c1
*
      endif
*
      if (F_mono_L) F_out(n) = max ( prmin , min(prmax,F_out(n)) )
*
   80 continue
   90 continue
  100 continue
c!$omp enddo
c!$omp end parallel
*
      return
      end