!--------------------------------------- LICENCE BEGIN -----------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer,
!                     version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer
!version 3 or (at your option) any later version that should be found at:
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software;
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec),
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------


      subroutine n1qn3a (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1, 1,11
     /                   epsg,impres,io,mode,niter,nsim,inmemo,m,jmin,
     /                   jmax,d,gg,diag,aux,ybar,sbar,izs,rzs,dzs)
c----
c
c     Code d'optimisation proprement dit.
c
c----
c
c         arguments
c
      use mpi_mod

      logical inmemo
      integer n,impres,io,mode,niter,nsim,m,jmin,jmax,izs(1)
      real rzs(1)
      double precision x(n),f,g(n),dxmin,df1,epsg,d(n),gg(n),diag(n),
     &    aux(n),ybar(n,1),sbar(n,1),dzs(1)
      external simul,prosca,dtonb,dtcab
c
c         variables locales
c
      logical sscale,cold,warm
      integer i,itmax,moderl,isim,jcour,indic,ierr,impresmax
      double precision d1,t,tmin,tmax,gnorm,eps1,ff,preco,precos,ys,den,
     &    dk,dk1,ps,ps2,hp0
c
c         parametres
c
      double precision rm1,rm2
      parameter (rm1=0.0001d+0,rm2=0.9d+0)
      double precision pi
      parameter (pi=3.1415927d+0)
      double precision rmin
c
c---- initialisation
c
      call tmg_start(73,'N1QN3A')
      rmin=1.d-20
c
      sscale=.true.
      if (mod(mode,2).eq.0) sscale=.false.
c
      warm=.false.
      if (mode/2.eq.1) warm=.true.
      cold=.not.warm
c
      itmax=niter
      niter=0
      isim=1
      eps1=1.d+0
c
      call rpn_comm_allreduce(impres,impresmax,1,"mpi_integer",
     &                        "mpi_max","GRID",ierr)
c
      call tmg_stop(73)
      call prosca (n,g,g,ps,izs,rzs,dzs)
      call tmg_start(73,'N1QN3A')
      gnorm=dsqrt(ps)
      if (impres.ge.1) write (io,900) f,gnorm
  900 format (5x,"f         = ",d15.8
     /       /5x,"norm of g = ",d15.8)
      if (gnorm.lt.rmin) then
          mode=2
          if (impres.ge.1) write (io,901)
          goto 1000
      endif
  901 format (/" >>> n1qn3a: initial gradient is too small")
c
c     --- initialisation pour ddd
c
      if (cold) then
          jmin=1
          jmax=0
      endif
      jcour=1
      if (inmemo) jcour=jmax
c
c     --- mise a l'echelle de la premiere direction de descente
c
      if (cold) then
c
c         --- use Fletcher's scaling and initialize diag to 1.
c
          precos=2.d+0*df1/gnorm**2
          do 10 i=1,n
              d(i)=-g(i)*precos
              diag(i)=1.d+0
   10     continue
          if (impres.ge.5) write(io,902) precos
  902     format (/" n1qn3a: descent direction -g: precon = ",d10.3)
      else
c
c         --- use the matrix stored in [diag and] the (y,s) pairs
c
          if (sscale) then
              call tmg_stop(73)
              call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
              precos=1.d+0/ps
          endif
          do 11 i=1,n
              d(i)=-g(i)
  11      continue
          if (inmemo) then
              call tmg_stop(73)
              call ddd (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
     /                 precos,diag,ybar,sbar,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
          else
              call tmg_stop(73)
              call ddds (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
     /                  precos,diag,ybar,sbar,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
          endif
      endif
c
      if (impres.eq.3) then
          write(io,903)
          write(io,904)
      endif
      if (impres.eq.4) write(io,903)
  903 format (/,1x,79("-"))
  904 format (1x)
c
c     --- initialisation pour nlis0
c
      tmax=1.d+20
      call tmg_stop(73)
      call prosca (n,d,g,hp0,izs,rzs,dzs)
      call tmg_start(73,'N1QN3A')
      if (hp0.ge.0.d+0) then
          mode=7
          if (impres.ge.1) write (io,905) niter,hp0
          goto 1000
      endif
  905 format (/" >>> n1qn3 (iteration ",i2,"): "
     /        /5x," the search direction d is not a ",
     /         "descent direction: (g,d) = ",d12.5)
c
c     --- compute the angle (-g,d)
c
      if (warm.and.impresmax.ge.5) then
          call tmg_stop(73)
          call prosca (n,g,g,ps,izs,rzs,dzs)
          ps=dsqrt(ps)
          call prosca (n,d,d,ps2,izs,rzs,dzs)
          call tmg_start(73,'N1QN3A')
          ps2=dsqrt(ps2)
          ps=hp0/ps/ps2
          ps=dmin1(-ps,1.d+0)
          ps=dacos(ps)
          d1=ps*180.d+0/pi
          if(impres.ge.5) write (io,906) sngl(d1)
      endif
  906 format (/" n1qn3: descent direction d: ",
     /        "angle(-g,d) = ",f5.1," degrees")
c
c---- Debut de l'iteration. on cherche x(k+1) de la forme x(k) + t*d,
c     avec t > 0. On connait d.
c
c         Debut de la boucle: etiquette 100,
c         Sortie de la boucle: goto 1000.
c
100   niter=niter+1
      if (impres.lt.0) then
          if (mod(niter,-impres).eq.0) then
              indic=1
              call tmg_stop(73)
              call simul (indic,n,x,f,g,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
          endif
      endif
      if (impres.ge.5) write(io,903)
      if (impres.ge.4) write(io,904)
      if (impres.ge.3) write (io,910) niter,isim,f,hp0
  910 format (" n1qn3: iter ",i3,", simul ",i3,
     /        ", f=",d15.8,", h'(0)=",d12.5)
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
      do 101 i=1,n
          gg(i)=g(i)
101   continue
!$OMP END PARALLEL DO
      ff=f
c
c     --- recherche lineaire et nouveau point x(k+1)
c
      if (impres.ge.5) write (io,911)
  911 format (/" n1qn3: line search")
c
c         --- calcul de tmin
c
      tmin=0.d+0
      do 200 i=1,n
          tmin=max(tmin,dabs(d(i)))
200   continue
      call tmg_start(79,'QN_COMM')
      call rpn_comm_allreduce(tmin,tmin,1,"mpi_double_precision",
     &                        "mpi_max","GRID",ierr)
      call tmg_stop(79)

      tmin=dxmin/tmin
      t=1.d+0
      d1=hp0
c
      call tmg_stop(73)
      call nlis0 (n,simul,prosca,x,f,d1,t,tmin,tmax,d,g,rm2,rm1,
     /           impres,io,moderl,isim,nsim,aux,izs,rzs,dzs)
      call tmg_start(73,'N1QN3A')
c
c         --- nlis0 renvoie les nouvelles valeurs de x, f et g
c
      if (moderl.ne.0) then
          if (moderl.lt.0) then
c
c             --- calcul impossible
c                 t, g: ou les calculs sont impossibles
c                 x, f: ceux du t_gauche (donc f <= ff)
c
              mode=moderl
          elseif (moderl.eq.1) then
c
c             --- descente bloquee sur tmax
c                 [sortie rare (!!) d'apres le code de nlis0]
c
              mode=3
              if (impres.ge.1) write(io,912) niter
  912         format (/" >>> n1qn3 (iteration ",i3,
     /                "): line search blocked on tmax: ",
     /                "decrease the scaling")
          elseif (moderl.eq.4) then
c
c             --- nsim atteint
c                 x, f: ceux du t_gauche (donc f <= ff)
c
              mode=5
          elseif (moderl.eq.5) then
c
c             --- arret demande par l'utilisateur (indic = 0)
c                 x, f: ceux en sortie du simulateur
c
              mode=0
          elseif (moderl.eq.6) then
c
c             --- arret sur dxmin ou appel incoherent
c                 x, f: ceux du t_gauche (donc f <= ff)
c
              mode=6
          endif
          goto 1000
      endif
c
c NOTE: stopping tests are now done after having updated the matrix, so
c that update information can be stored in case of a later warm restart
c
c     --- mise a jour de la matrice
c
      if (m.gt.0) then
c
c         --- mise a jour des pointeurs
c
          jmax=jmax+1
          if (jmax.gt.m) jmax=jmax-m
          if ((cold.and.niter.gt.m).or.(warm.and.jmin.eq.jmax)) then
              jmin=jmin+1
              if (jmin.gt.m) jmin=jmin-m
          endif
          if (inmemo) jcour=jmax
c
c         --- y, s et (y,s)
c
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
          do 400 i=1,n
              sbar(i,jcour)=t*d(i)
              ybar(i,jcour)=g(i)-gg(i)
400       continue
!$OMP END PARALLEL DO
          if (impresmax.ge.5) then
              call tmg_stop(73)
              call prosca (n,sbar(1,jcour),sbar(1,jcour),ps,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
              dk1=dsqrt(ps)
              if (impres.ge.5.and.niter.gt.1) write (io,930) dk1/dk
  930         format (/" n1qn3: convergence rate, s(k)/s(k-1) = ",
     /                d12.5)
              dk=dk1
          endif
          call tmg_stop(73)
          call prosca (n,ybar(1,jcour),sbar(1,jcour),ys,izs,rzs,dzs)
          call tmg_start(73,'N1QN3A')
          if (ys.le.0.d+0) then
              mode=7
              if (impres.ge.1) write (io,931) niter,ys
  931         format (/" >>> n1qn3 (iteration ",i2,
     /                "): the scalar product (y,s) = ",d12.5
     /                /27x,"is not positive")
              goto 1000
          endif
c
c         --- ybar et sbar
c
          d1=dsqrt(1.d+0/ys)
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
          do 410 i=1,n
              sbar(i,jcour)=d1*sbar(i,jcour)
              ybar(i,jcour)=d1*ybar(i,jcour)
  410     continue
!$OMP END PARALLEL DO
          call tmg_stop(73)
          if (.not.inmemo) call dystbl (.true.,ybar,sbar,n,jmax)
          call tmg_start(73,'N1QN3A')
c
c         --- compute the scalar or diagonal preconditioner
c
          if (impres.ge.5) write(io,932)
  932     format (/" n1qn3: matrix update:")
c
c             --- Here is the Oren-Spedicato factor, for scalar scaling
c
          if (sscale) then
              call tmg_stop(73)
              call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
              precos=1.d+0/ps
c
              if (impres.ge.5) write (io,933) precos
  933         format (5x,"Oren-Spedicato factor = ",d10.3)
c
c             --- Scale the diagonal to Rayleigh's ellipsoid.
c                 Initially (niter.eq.1) and for a cold start, this is
c                 equivalent to an Oren-Spedicato scaling of the
c                 identity matrix.
c
          else
              call tmg_stop(73)
              call dtonb (n,ybar(1,jcour),aux,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
              ps=0.d0
              do 420 i=1,n
                  ps=ps+diag(i)*aux(i)*aux(i)
  420         continue
              call tmg_start(79,'QN_COMM')
              call mpi_allreduce_sumreal8scalar(ps,"GRID")
              call tmg_stop(79)
              d1=1.d0/ps
              if (impres.ge.5) then
                  write (io,934) d1
  934             format(5x,"fitting the ellipsoid: factor = ",d10.3)
              endif
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
              do 421 i=1,n
                  diag(i)=diag(i)*d1
  421         continue
!$OMP END PARALLEL DO
c
c             --- update the diagonal
c                 (gg is used as an auxiliary vector)
c
              call tmg_stop(73)
              call dtonb (n,sbar(1,jcour),gg,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
              ps=0.d0
              do 430 i=1,n
                  ps=ps+gg(i)*gg(i)/diag(i)
  430         continue
              call tmg_start(79,'QN_COMM')
              call mpi_allreduce_sumreal8scalar(ps,"GRID")
              call tmg_stop(79)
              den=ps
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
              do 431 i=1,n
                  diag(i)=1.d0/
     &                   (1.d0/diag(i)+aux(i)**2-(gg(i)/diag(i))**2/den)
                  if (diag(i).le.0.d0) then
                      if (impres.ge.5) write (io,935) i,diag(i),rmin
                      diag(i)=rmin
                  endif
  431         continue
!$OMP END PARALLEL DO
  935         format (/" >>> n1qn3-WARNING: diagonal element ",i8,
     &                 " is negative (",d10.3,"), reset to ",d10.3)
c
              if (impresmax.ge.5) then
                  ps=0.d0
                  do 440 i=1,n
                      ps=ps+diag(i)
  440             continue
                  call tmg_start(79,'QN_COMM')
                  call mpi_allreduce_sumreal8scalar(ps,"GRID")
                  call tmg_stop(79)
                  ps=ps/n
                  preco=ps
c
                  ps2=0.d0
                  do 441 i=1,n
                      ps2=ps2+(diag(i)-ps)**2
  441             continue
                  call tmg_start(79,'QN_COMM')
                  call mpi_allreduce_sumreal8scalar(ps2,"GRID")
                  call tmg_stop(79)
                  ps2=dsqrt(ps2/n)
                  if (impres.ge.5) write (io,936) preco,ps2
  936             format (5x,"updated diagonal: average value = ",d10.3,
     &                   ", sqrt(variance) = ",d10.3)
              endif
          endif
      endif
c
c     --- tests d'arret
c
      call tmg_stop(73)
      call prosca(n,g,g,ps,izs,rzs,dzs)
      call tmg_start(73,'N1QN3A')
      eps1=ps
      eps1=dsqrt(eps1)/gnorm
c
      if (impres.ge.5) write (io,940) eps1
  940 format (/" n1qn3: stopping criterion on g: ",d12.5)
      if (eps1.lt.epsg) then
          mode=1
          goto 1000
      endif
      if (niter.eq.itmax) then
          mode=4
          if (impres.ge.1) write (io,941) niter
  941     format (/" >>> n1qn3 (iteration ",i3,
     /            "): maximal number of iterations")
          goto 1000
      endif
      if (isim.gt.nsim) then
          mode=5
          if (impres.ge.1) write (io,942) niter,isim
  942     format (/" >>> n1qn3 (iteration ",i3,"): ",i6,
     /            " simulations (maximal number reached)")
          goto 1000
      endif
c
c     --- calcul de la nouvelle direction de descente d = - H.g
c
      if (m.eq.0) then
          preco=2.d0*(ff-f)/(eps1*gnorm)**2
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
          do 500 i=1,n
              d(i)=-g(i)*preco
  500     continue
!$OMP END PARALLEL DO
      else
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I) 
          do 510 i=1,n
              d(i)=-g(i)
  510     continue
!$OMP END PARALLEL DO
          if (inmemo) then
              call tmg_stop(73)
              call ddd (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
     /                  precos,diag,ybar,sbar,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
          else
              call tmg_stop(73)
              call ddds (prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
     /                   precos,diag,ybar,sbar,izs,rzs,dzs)
              call tmg_start(73,'N1QN3A')
          endif
      endif
c
c         --- test: la direction d est-elle de descente ?
c             hp0 sera utilise par nlis0
c
      call tmg_stop(73)
      call prosca (n,d,g,hp0,izs,rzs,dzs)
      call tmg_start(73,'N1QN3A')
      if (hp0.ge.0.d+0) then
          mode=7
          if (impres.ge.1) write (io,905) niter,hp0
          goto 1000
      endif
      if (impresmax.ge.5) then
          call tmg_stop(73)
          call prosca (n,g,g,ps,izs,rzs,dzs)
          ps=dsqrt(ps)
          call prosca (n,d,d,ps2,izs,rzs,dzs)
          call tmg_start(73,'N1QN3A')
          ps2=dsqrt(ps2)
          ps=hp0/ps/ps2
          ps=dmin1(-ps,1.d+0)
          ps=dacos(ps)
          d1=ps
          d1=d1*180.d0/pi
          if (impres.ge.5) write (io,906) sngl(d1)
      endif
c
c---- on poursuit les iterations
c
      goto 100
c
c---- retour
c
 1000 continue
      nsim=isim
      epsg=eps1

      call tmg_stop(73)

      return
      end