!--------------------------------------- LICENCE BEGIN -----------------------------------
!Environment Canada - Atmospheric Science and Technology License/Disclaimer,
! version 3; Last Modified: May 7, 2008.
!This is free but copyrighted software; you can use/redistribute/modify it under the terms
!of the Environment Canada - Atmospheric Science and Technology License/Disclaimer
!version 3 or (at your option) any later version that should be found at:
!http://collaboration.cmc.ec.gc.ca/science/rpn.comm/license.html
!
!This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
!without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!See the above mentioned License/Disclaimer for more details.
!You should have received a copy of the License/Disclaimer along with this software;
!if not, you can write to: EC-RPN COMM Group, 2121 TransCanada, suite 500, Dorval (Quebec),
!CANADA, H9P 1J3; or send e-mail to service.rpn@ec.gc.ca
!-------------------------------------- LICENCE END --------------------------------------
subroutine n1qn3a (simul,prosca,dtonb,dtcab,n,x,f,g,dxmin,df1, 1,11
/ epsg,impres,io,mode,niter,nsim,inmemo,m,jmin,
/ jmax,d,gg,diag,aux,ybar,sbar,izs,rzs,dzs)
c----
c
c Code d'optimisation proprement dit.
c
c----
c
c arguments
c
use mpi_mod
logical inmemo
integer n,impres,io,mode,niter,nsim,m,jmin,jmax,izs(1)
real rzs(1)
double precision x(n),f,g(n),dxmin,df1,epsg,d(n),gg(n),diag(n),
& aux(n),ybar(n,1),sbar(n,1),dzs(1)
external simul,prosca,dtonb,dtcab
c
c variables locales
c
logical sscale,cold,warm
integer i,itmax,moderl,isim,jcour,indic,ierr,impresmax
double precision d1,t,tmin,tmax,gnorm,eps1,ff,preco,precos,ys,den,
& dk,dk1,ps,ps2,hp0
c
c parametres
c
double precision rm1,rm2
parameter (rm1=0.0001d+0,rm2=0.9d+0)
double precision pi
parameter (pi=3.1415927d+0)
double precision rmin
c
c---- initialisation
c
call tmg_start(73,'N1QN3A')
rmin=1.d-20
c
sscale=.true.
if (mod(mode,2).eq.0) sscale=.false.
c
warm=.false.
if (mode/2.eq.1) warm=.true.
cold=.not.warm
c
itmax=niter
niter=0
isim=1
eps1=1.d+0
c
call rpn_comm_allreduce(impres,impresmax,1,"mpi_integer",
& "mpi_max","GRID",ierr)
c
call tmg_stop(73)
call prosca (n,g,g,ps,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
gnorm=dsqrt(ps)
if (impres.ge.1) write (io,900) f,gnorm
900 format (5x,"f = ",d15.8
/ /5x,"norm of g = ",d15.8)
if (gnorm.lt.rmin) then
mode=2
if (impres.ge.1) write (io,901)
goto 1000
endif
901 format (/" >>> n1qn3a: initial gradient is too small")
c
c --- initialisation pour ddd
c
if (cold) then
jmin=1
jmax=0
endif
jcour=1
if (inmemo) jcour=jmax
c
c --- mise a l'echelle de la premiere direction de descente
c
if (cold) then
c
c --- use Fletcher's scaling and initialize diag to 1.
c
precos=2.d+0*df1/gnorm**2
do 10 i=1,n
d(i)=-g(i)*precos
diag(i)=1.d+0
10 continue
if (impres.ge.5) write(io,902) precos
902 format (/" n1qn3a: descent direction -g: precon = ",d10.3)
else
c
c --- use the matrix stored in [diag and] the (y,s) pairs
c
if (sscale) then
call tmg_stop(73)
call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
precos=1.d+0/ps
endif
do 11 i=1,n
d(i)=-g(i)
11 continue
if (inmemo) then
call tmg_stop(73)
call ddd
(prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
/ precos,diag,ybar,sbar,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
else
call tmg_stop(73)
call ddds
(prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
/ precos,diag,ybar,sbar,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
endif
endif
c
if (impres.eq.3) then
write(io,903)
write(io,904)
endif
if (impres.eq.4) write(io,903)
903 format (/,1x,79("-"))
904 format (1x)
c
c --- initialisation pour nlis0
c
tmax=1.d+20
call tmg_stop(73)
call prosca (n,d,g,hp0,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
if (hp0.ge.0.d+0) then
mode=7
if (impres.ge.1) write (io,905) niter,hp0
goto 1000
endif
905 format (/" >>> n1qn3 (iteration ",i2,"): "
/ /5x," the search direction d is not a ",
/ "descent direction: (g,d) = ",d12.5)
c
c --- compute the angle (-g,d)
c
if (warm.and.impresmax.ge.5) then
call tmg_stop(73)
call prosca (n,g,g,ps,izs,rzs,dzs)
ps=dsqrt(ps)
call prosca (n,d,d,ps2,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
ps2=dsqrt(ps2)
ps=hp0/ps/ps2
ps=dmin1(-ps,1.d+0)
ps=dacos(ps)
d1=ps*180.d+0/pi
if(impres.ge.5) write (io,906) sngl(d1)
endif
906 format (/" n1qn3: descent direction d: ",
/ "angle(-g,d) = ",f5.1," degrees")
c
c---- Debut de l'iteration. on cherche x(k+1) de la forme x(k) + t*d,
c avec t > 0. On connait d.
c
c Debut de la boucle: etiquette 100,
c Sortie de la boucle: goto 1000.
c
100 niter=niter+1
if (impres.lt.0) then
if (mod(niter,-impres).eq.0) then
indic=1
call tmg_stop(73)
call simul (indic,n,x,f,g,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
endif
endif
if (impres.ge.5) write(io,903)
if (impres.ge.4) write(io,904)
if (impres.ge.3) write (io,910) niter,isim,f,hp0
910 format (" n1qn3: iter ",i3,", simul ",i3,
/ ", f=",d15.8,", h'(0)=",d12.5)
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
do 101 i=1,n
gg(i)=g(i)
101 continue
!$OMP END PARALLEL DO
ff=f
c
c --- recherche lineaire et nouveau point x(k+1)
c
if (impres.ge.5) write (io,911)
911 format (/" n1qn3: line search")
c
c --- calcul de tmin
c
tmin=0.d+0
do 200 i=1,n
tmin=max(tmin,dabs(d(i)))
200 continue
call tmg_start(79,'QN_COMM')
call rpn_comm_allreduce(tmin,tmin,1,"mpi_double_precision",
& "mpi_max","GRID",ierr)
call tmg_stop(79)
tmin=dxmin/tmin
t=1.d+0
d1=hp0
c
call tmg_stop(73)
call nlis0
(n,simul,prosca,x,f,d1,t,tmin,tmax,d,g,rm2,rm1,
/ impres,io,moderl,isim,nsim,aux,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
c
c --- nlis0 renvoie les nouvelles valeurs de x, f et g
c
if (moderl.ne.0) then
if (moderl.lt.0) then
c
c --- calcul impossible
c t, g: ou les calculs sont impossibles
c x, f: ceux du t_gauche (donc f <= ff)
c
mode=moderl
elseif (moderl.eq.1) then
c
c --- descente bloquee sur tmax
c [sortie rare (!!) d'apres le code de nlis0]
c
mode=3
if (impres.ge.1) write(io,912) niter
912 format (/" >>> n1qn3 (iteration ",i3,
/ "): line search blocked on tmax: ",
/ "decrease the scaling")
elseif (moderl.eq.4) then
c
c --- nsim atteint
c x, f: ceux du t_gauche (donc f <= ff)
c
mode=5
elseif (moderl.eq.5) then
c
c --- arret demande par l'utilisateur (indic = 0)
c x, f: ceux en sortie du simulateur
c
mode=0
elseif (moderl.eq.6) then
c
c --- arret sur dxmin ou appel incoherent
c x, f: ceux du t_gauche (donc f <= ff)
c
mode=6
endif
goto 1000
endif
c
c NOTE: stopping tests are now done after having updated the matrix, so
c that update information can be stored in case of a later warm restart
c
c --- mise a jour de la matrice
c
if (m.gt.0) then
c
c --- mise a jour des pointeurs
c
jmax=jmax+1
if (jmax.gt.m) jmax=jmax-m
if ((cold.and.niter.gt.m).or.(warm.and.jmin.eq.jmax)) then
jmin=jmin+1
if (jmin.gt.m) jmin=jmin-m
endif
if (inmemo) jcour=jmax
c
c --- y, s et (y,s)
c
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
do 400 i=1,n
sbar(i,jcour)=t*d(i)
ybar(i,jcour)=g(i)-gg(i)
400 continue
!$OMP END PARALLEL DO
if (impresmax.ge.5) then
call tmg_stop(73)
call prosca (n,sbar(1,jcour),sbar(1,jcour),ps,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
dk1=dsqrt(ps)
if (impres.ge.5.and.niter.gt.1) write (io,930) dk1/dk
930 format (/" n1qn3: convergence rate, s(k)/s(k-1) = ",
/ d12.5)
dk=dk1
endif
call tmg_stop(73)
call prosca (n,ybar(1,jcour),sbar(1,jcour),ys,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
if (ys.le.0.d+0) then
mode=7
if (impres.ge.1) write (io,931) niter,ys
931 format (/" >>> n1qn3 (iteration ",i2,
/ "): the scalar product (y,s) = ",d12.5
/ /27x,"is not positive")
goto 1000
endif
c
c --- ybar et sbar
c
d1=dsqrt(1.d+0/ys)
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
do 410 i=1,n
sbar(i,jcour)=d1*sbar(i,jcour)
ybar(i,jcour)=d1*ybar(i,jcour)
410 continue
!$OMP END PARALLEL DO
call tmg_stop(73)
if (.not.inmemo) call dystbl
(.true.,ybar,sbar,n,jmax)
call tmg_start(73,'N1QN3A')
c
c --- compute the scalar or diagonal preconditioner
c
if (impres.ge.5) write(io,932)
932 format (/" n1qn3: matrix update:")
c
c --- Here is the Oren-Spedicato factor, for scalar scaling
c
if (sscale) then
call tmg_stop(73)
call prosca (n,ybar(1,jcour),ybar(1,jcour),ps,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
precos=1.d+0/ps
c
if (impres.ge.5) write (io,933) precos
933 format (5x,"Oren-Spedicato factor = ",d10.3)
c
c --- Scale the diagonal to Rayleigh's ellipsoid.
c Initially (niter.eq.1) and for a cold start, this is
c equivalent to an Oren-Spedicato scaling of the
c identity matrix.
c
else
call tmg_stop(73)
call dtonb (n,ybar(1,jcour),aux,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
ps=0.d0
do 420 i=1,n
ps=ps+diag(i)*aux(i)*aux(i)
420 continue
call tmg_start(79,'QN_COMM')
call mpi_allreduce_sumreal8scalar
(ps,"GRID")
call tmg_stop(79)
d1=1.d0/ps
if (impres.ge.5) then
write (io,934) d1
934 format(5x,"fitting the ellipsoid: factor = ",d10.3)
endif
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
do 421 i=1,n
diag(i)=diag(i)*d1
421 continue
!$OMP END PARALLEL DO
c
c --- update the diagonal
c (gg is used as an auxiliary vector)
c
call tmg_stop(73)
call dtonb (n,sbar(1,jcour),gg,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
ps=0.d0
do 430 i=1,n
ps=ps+gg(i)*gg(i)/diag(i)
430 continue
call tmg_start(79,'QN_COMM')
call mpi_allreduce_sumreal8scalar
(ps,"GRID")
call tmg_stop(79)
den=ps
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
do 431 i=1,n
diag(i)=1.d0/
& (1.d0/diag(i)+aux(i)**2-(gg(i)/diag(i))**2/den)
if (diag(i).le.0.d0) then
if (impres.ge.5) write (io,935) i,diag(i),rmin
diag(i)=rmin
endif
431 continue
!$OMP END PARALLEL DO
935 format (/" >>> n1qn3-WARNING: diagonal element ",i8,
& " is negative (",d10.3,"), reset to ",d10.3)
c
if (impresmax.ge.5) then
ps=0.d0
do 440 i=1,n
ps=ps+diag(i)
440 continue
call tmg_start(79,'QN_COMM')
call mpi_allreduce_sumreal8scalar
(ps,"GRID")
call tmg_stop(79)
ps=ps/n
preco=ps
c
ps2=0.d0
do 441 i=1,n
ps2=ps2+(diag(i)-ps)**2
441 continue
call tmg_start(79,'QN_COMM')
call mpi_allreduce_sumreal8scalar
(ps2,"GRID")
call tmg_stop(79)
ps2=dsqrt(ps2/n)
if (impres.ge.5) write (io,936) preco,ps2
936 format (5x,"updated diagonal: average value = ",d10.3,
& ", sqrt(variance) = ",d10.3)
endif
endif
endif
c
c --- tests d'arret
c
call tmg_stop(73)
call prosca(n,g,g,ps,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
eps1=ps
eps1=dsqrt(eps1)/gnorm
c
if (impres.ge.5) write (io,940) eps1
940 format (/" n1qn3: stopping criterion on g: ",d12.5)
if (eps1.lt.epsg) then
mode=1
goto 1000
endif
if (niter.eq.itmax) then
mode=4
if (impres.ge.1) write (io,941) niter
941 format (/" >>> n1qn3 (iteration ",i3,
/ "): maximal number of iterations")
goto 1000
endif
if (isim.gt.nsim) then
mode=5
if (impres.ge.1) write (io,942) niter,isim
942 format (/" >>> n1qn3 (iteration ",i3,"): ",i6,
/ " simulations (maximal number reached)")
goto 1000
endif
c
c --- calcul de la nouvelle direction de descente d = - H.g
c
if (m.eq.0) then
preco=2.d0*(ff-f)/(eps1*gnorm)**2
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
do 500 i=1,n
d(i)=-g(i)*preco
500 continue
!$OMP END PARALLEL DO
else
!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(I)
do 510 i=1,n
d(i)=-g(i)
510 continue
!$OMP END PARALLEL DO
if (inmemo) then
call tmg_stop(73)
call ddd
(prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
/ precos,diag,ybar,sbar,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
else
call tmg_stop(73)
call ddds
(prosca,dtonb,dtcab,n,sscale,m,d,aux,jmin,jmax,
/ precos,diag,ybar,sbar,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
endif
endif
c
c --- test: la direction d est-elle de descente ?
c hp0 sera utilise par nlis0
c
call tmg_stop(73)
call prosca (n,d,g,hp0,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
if (hp0.ge.0.d+0) then
mode=7
if (impres.ge.1) write (io,905) niter,hp0
goto 1000
endif
if (impresmax.ge.5) then
call tmg_stop(73)
call prosca (n,g,g,ps,izs,rzs,dzs)
ps=dsqrt(ps)
call prosca (n,d,d,ps2,izs,rzs,dzs)
call tmg_start(73,'N1QN3A')
ps2=dsqrt(ps2)
ps=hp0/ps/ps2
ps=dmin1(-ps,1.d+0)
ps=dacos(ps)
d1=ps
d1=d1*180.d0/pi
if (impres.ge.5) write (io,906) sngl(d1)
endif
c
c---- on poursuit les iterations
c
goto 100
c
c---- retour
c
1000 continue
nsim=isim
epsg=eps1
call tmg_stop(73)
return
end