subroutine cg (mvprod,n,x,b,a,xx,bb,two,q,r,rr,v,rm,epsneg, 1,5 & eps2,iter,imp,io,imode,mode, & bfgsp,pmat0, & m0,nys0,jmin0,jmax0,ybar0,sbar0,size0, & bfgsb,pmat1, & m1,nys1,jmin1,jmax1,jol,ybar1,sbar1,size1,ol, & select, & rho,aux,izs,rzs,dzs) c external mvprod logical two integer n,iter,imp,io,imode(3),mode,bfgsb,select,bfgsp, & m0,nys0,jmin0,jmax0, & m1,nys1,jmin1,jmax1, & jol(m1),izs(1) real rzs(1) double precision x(n),b(n),a(1),xx(n),bb(n),q(n),r(n),rr(n),v(n), & rm(n),epsneg,eps2, & pmat0(n,n),ybar0(n,m0),sbar0(n,m0),size0, & pmat1(n,n),ybar1(n,m1),sbar1(n,m1),size1, & aux(n),ol(m1),rho(m0),dzs(1) c c---- c c This routine solves the linear system (LS) Ax=b, for x, c by the possibly preconditioned Fletcher-Reeves CG algorithm. c The preconditioning can come from l-BFGS updates stored in ybar0, c sbar0 and rho. c c The residual is denoted by r = Ax-b and the preconditioning c matrix by P (=+- inverse of A). c c The first iterate is supposed to be in x. c c Input: c c n (integer): dimension of the LS to solve. c a (double precision vector): name of the vector storing the c matrix A, when necessary. c iter = max number of iterations c imp: printing levels c <= 0: nothing is printed, c = 1: error, terminal, and stopping messages, c = 2: also, one line per iteration. c bfgsb: c =0: don't build a BFGS preconditioning matrix c =1: build a full BFGS preconditioning matrix in pmat1, if c pmat1(1,1) <= 0., the matrix is re-initialized c =2: build a limited memory BFGS preconditioning matrix c bfgsp: c =0: no BFGS preconditioning c =1: full BFGS preconditioning with pmat0 c =2: limited memory BFGS preconditioning c c Output: c c x (double precision vector of dim n): approximate solution of c the (LS). c b (double precision vector of dim n): RHS of the (LS). c q (double precision vector of dim n): auxiliary vector for c storing Av. c r (double precision vector of dim n): updated residual. c v (double precision vector of dim n): inner CG directions. c rm (double precision vector of dim n): auxiliary vector for c storing Pr. c>>>>>>>>>>>> a mon avis inutile, pourrait etre stocke dans q. c epsneg = value of the last Rayleigh quotient v'Av/v'v c encountered (not modified if v=0) c iter = number of iterations c c---- c c --- parameter c double precision pi parameter (pi=3.1415927d0) c c --- local variables c restart=.true.: c restarts are allowed. A restart of the CG iterations c occurs at iterations n, 3n/2, 9n/4, ... or when c (Pr_,r)>=0.2|Pr_||r| (Powell's criterion). c r2i = |initial residual|^2 c v2 = |v|^2 c pprr = previous prr c prr = <Pr,r> c r2 = |r|^2 c avv = <Av,v> c rel_r=(|r|/|r0|)**2: relative r c cost = (Ax,x)/2 - (b,x) c x2 = |x|^2 c logical restart integer miter,iterr,niter,biter,i,j,ysmode character sc double precision r2i,rr2i,v2,pprr,a1,prr,r2,rr2,avv,rel_r,alpha, & aalpha,beta,cost,ccost,mcost,rayl,raylmax,gperp,rm2,dangle,x2, & t,sp,nu c c --- initialisation c miter = max iterations c niter = iteration nbr of the next forced restart c iterr = iteration nbr of the previous restart c biter = bad iteration nbr (will produce failure) c miter=iter niter=n iterr=1 biter=int(sqrt(dble(n*n*n))) iter=0 rel_r=1.d0 gperp=0.d0 if (select.eq.0) sc='S' nu=epsneg raylmax=0.d0 c c --- In my experience, it is better not doing restarts c restart=.false. C+ restart = .true. c c --- force initialization of the l-BFGS matrix if appropriate c nys1=0 c c --- initial residual r = Ax-b (and rr = A xx - bb, if appropriate) c r2i = |initial residual|^2 c if (imode(1).eq.0) then do i=1,n x(i)=0.d0 a1=-b(i) r(i)=a1 rm(i)=a1 enddo else call mvprod (n,a,x,r,izs,rzs,dzs) do i=1,n a1=r(i)-b(i) r(i)=a1 rm(i)=a1 enddo endif if (two) then if (imode(2).eq.0) then do i=1,n xx(i)=0.d0 rr(i)=-bb(i) enddo else call mvprod (n,a,xx,rr,izs,rzs,dzs) do i=1,n a1=rr(i)-bb(i) rr(i)=a1 enddo endif endif c call deuclid (n,r,r,r2i,izs,rzs,dzs) r2=r2i if (r2i.eq.0.d0) then mode=6 if (imp.gt.0) write (io,900) goto 1000 endif 900 format(/4x,"n1cga-WARNING: zero initial residual") if (two) then call deuclid (n,rr,rr,rr2i,izs,rzs,dzs) if (rr2i.eq.0.d0) rr2i=1.d0 endif c c --- rm = Pr c prr = <Pr,r> c c>>>>>>>>> les 3 lignes ci-dessous me semblent inutiles (4/3/99) do i=1,n rm(i)=r(i) enddo if (bfgsp.eq.1) then do i=1,n rm(i)=0.d0 enddo do j=1,n a1=r(j) do i=1,n rm(i)=rm(i)+pmat0(i,j)*a1 enddo enddo elseif (bfgsp.eq.2) then call dbfgsl
(n,rm,m0,nys0,jmin0,jmax0,ybar0,sbar0,rho, & size0,izs,rzs,dzs) endif call deuclid (n,rm,rm,rm2,izs,rzs,dzs) call deuclid (n,r,rm,prr,izs,rzs,dzs) c c --- initial search direction v c v2 = |v|^2 c do i=1,n v(i)=-rm(i) enddo v2=rm2 c c --- cost c cost = current cost c mcost = min cost encountered c cost=0.d0 ccost=0.d0 mcost=0.d0 c c --- printings c if (imp.ge.2) then write (io,901) sqrt(r2i) if (two) write (io,902) sqrt(rr2i) endif 901 format (/4x,"n1cga: initial residual |r0| =",1pd13.6) 902 format (24x,"LS2 |r0| =",1pd13.6) c c --- start the CG iterations at 100, end at 1000 c 100 iter=iter+1 c c --- check iter (force convergence, however) c if (iter.gt.miter) then mode=4 iter=iter-1 if (imp.gt.0) write (io,910) goto 1000 endif 910 format(/4x,"n1cga: max number iterations reached") c c --- too many iterations ? c c if (iter.gt.biter) then c mode=5 c if (imp.gt.0) write (io,911) biter c goto 1000 c endif c 911 format(/4x,"n1cga-ERROR: number of iterations exceeds",i6) c c --- restart ? c c>>>>>>>>>> this part of the code has not been modified to take care of c the case when there are two linear systems to solve c simultaneously (two=.true.) c c if (restart.and.(gperp.gt.0.2d0.or.iter.gt.niter)) then if (restart.and.(gperp.gt.0.2d0.or.iter.gt.niter)) then if (iter.gt.niter) niter=niter*3/2 iterr=iter if (imp.ge.2) write (io,912) 912 format(4x,"n1cga: restart") c c --- recompute the residuals r and rm c call mvprod (n,a,x,r,izs,rzs,dzs) do i=1,n a1=r(i)-b(i) r(i)=a1 rm(i)=a1 enddo call deuclid (n,r,r,r2,izs,rzs,dzs) if (bfgsp.eq.1) then do i=1,n rm(i)=0.d0 enddo do j=1,n a1=r(j) do i=1,n rm(i)=rm(i)+pmat0(i,j)*a1 enddo enddo elseif (bfgsp.eq.2) then call dbfgsl
(n,rm,m0,nys0,jmin0,jmax0,ybar0,sbar0,rho, & size0,izs,rzs,dzs) endif call deuclid (n,rm,rm,rm2,izs,rzs,dzs) call deuclid (n,r,rm,prr,izs,rzs,dzs) c c --- and the search direction v c do i=1,n v(i)=-rm(i) enddo v2=rm2 endif c c --- q = Av c avv = <v,q> = <Av,v> c c +++ La condition if est ajoutee car le calcul de q et avv c +++ a deja ete effectue lors du calcul et eta et delta c +++ pendant l'iteration precente c+ if(iter == 1) then call mvprod (n,a,v,q,izs,rzs,dzs) call deuclid (n,v,q,avv,izs,rzs,dzs) c+ end if c c --- check positive definiteness c if (v2.ne.0.d0) then rayl=avv/v2 raylmax=max(raylmax,rayl) if (imode(3).eq.1) nu=epsneg*raylmax endif if (avv.le.nu*v2) then mode=3 if (v2.ne.0.d0) epsneg=avv/v2 if (imp.gt.0.and.v2.ne.0.d0) write (io,913) rayl goto 1000 endif 913 format(/4x,"n1cga-ERROR: non positive definite matrix,", & " <Av,v>/|v|^2 = ",1pd9.2) c c --- update the preconditioning matrix when appropriate c y=q=Av, s=v, ys=avv c if (bfgsb.eq.1) then c c --- full inverse BFGS update c c --- initialize the matrix pmat1 ? c if (iter.eq.1.and.pmat1(1,1).le.0.d0) then c call deuclid (n,q,q,a1,izs,rzs,dzs) c a1=a1/avv call deuclid (n,v,v,a1,izs,rzs,dzs) a1=a1/avv if (imp.ge.2) write (io,914) a1 do i=1,n do j=1,n pmat1(i,j)=0.d0 enddo pmat1(i,i)=a1 enddo endif c c --- update pmat1 c call dbfgsi
(n,pmat1,q,v,avv,aux,izs,rzs,dzs) elseif (bfgsb.eq.2) then c c --- limited memory inverse BFGS update c sc='-' call dysave
(n,q,v,avv,m1,nys1,jmin1,jmax1,ybar1,sbar1,select, & iter,ol,jol,nu,size1,ysmode,4,io,izs, & rzs,dzs) if (ysmode.eq.0) sc='S' if (ysmode.gt.0) then mode=7 goto 1001 endif endif 914 format(4x,"n1cga: initialization of the BFGS matrix with OL", & " factor =",1pd13.6) c c --- new iterate x and residual r c new iterate xx and residual rr c alpha=prr/avv x2=0.d0 do i=1,n t=x(i)+alpha*v(i) x(i)=t x2=x2+t*t r(i)=r(i)+alpha*q(i) enddo if (two) then call deuclid (n,rr,v,t,izs,rzs,dzs) aalpha=-t/avv do i=1,n xx(i)=xx(i)+aalpha*v(i) rr(i)=rr(i)+aalpha*q(i) enddo endif c c --- stop if the cost increases c mcost=min(cost,mcost) cost=0.d0 do i=1,n cost=cost+(r(i)-b(i))*x(i) enddo cost=0.5d0*cost if (cost.gt.0.999999d0*mcost) then mode=8 if (imp.gt.0) write (io,915) print*,'cost mcost',cost, mcost goto 1000 endif 915 format(/4x,"n1cga-ERROR: cost increase (rounding error)") c if (two) then ccost=0.d0 do i=1,n ccost=ccost+(rr(i)-bb(i))*xx(i) enddo ccost=0.5d0*ccost endif c c --- r2=|r|^2 c call deuclid (n,r,r,r2,izs,rzs,dzs) if (two) call deuclid (n,rr,rr,rr2,izs,rzs,dzs) c c --- prepare to check conjugacy c gperp = (rmo,r)/|rmo|/|r| == 0 ? c call deuclid (n,rm,r,gperp,izs,rzs,dzs) t=sqrt(r2*rm2) if (t.ne.0.d0) then gperp=gperp/t else gperp=0.d0 endif c c --- new rm c rm2 = |rm|^2 c new pprr=prr c prr=<Pr,r> c do i=1,n rm(i)=r(i) enddo if (bfgsp.eq.1) then do i=1,n rm(i)=0.d0 enddo do j=1,n a1=r(j) do i=1,n rm(i)=rm(i)+pmat0(i,j)*a1 enddo enddo elseif (bfgsp.eq.2) then call dbfgsl
(n,rm,m0,nys0,jmin0,jmax0,ybar0,sbar0,rho, & size0,izs,rzs,dzs) endif call deuclid (n,rm,rm,rm2,izs,rzs,dzs) pprr=prr call deuclid (n,r,rm,prr,izs,rzs,dzs) c c --- some printings c if (imp.ge.2) then call deuclid (n,rm,rm,sp,izs,rzs,dzs) dangle=0.d0 if (sqrt(sp*r2).ne.0.d0) & dangle=acos(prr/sqrt(sp*r2))/pi*180.d0 if (iter.eq.1) write (io,916) write (io,917) iter,cost,sqrt(r2/r2i),dangle,rayl,gperp, & alpha,sqrt(x2),sc if (two) write (io,918) ccost,sqrt(rr2/rr2i) call flush (io) endif 916 format (/4x,"n1cga: iter",6x,"cost",7x,"|r|/|r0|",2x,"<(r,Pr)", & "(Av,v)/|v|^2",4x,"conj",3x,"alpha",4x,"|x|",3x,"S") 917 format (4x,"n1cga:",i5,1x,1pe13.6,1x,e12.6,1x,0pf5.2,1x,1pe12.6, & 1x,e9.2,1x,e6.0,1x,e7.1,1x,a1) 918 format (6x,"LS2 ",6x,1pe13.6,1x,e12.6) c c --- stop on the relative residual (eps2) c rel_r=r2/r2i if (rel_r.le.eps2) then mode=0 if (imp.gt.0) write (io,919) sqrt(rel_r),sqrt(eps2) goto 1000 endif 919 format(/4x,"n1cga: stopping criterion satisfied, |r|/|r0| = ", & 1pd8.2," <= ",d8.2) c c --- FR beta, new v, v2=|v|^2 c 80 beta=prr/pprr do i=1,n v(i)=-rm(i)+beta*v(i) enddo call deuclid (n,v,v,v2,izs,rzs,dzs) c c --- loop c goto 100 c c --- terminaison c 1000 continue eps2=rel_r if (imp.eq.1) write (io,920) iter,sqrt(r2i),sqrt(r2) 920 format(4x,"n1cga: niter=",i5,", |r|:",d13.6," -> ",d13.6) c 1001 continue if (imp.ge.1) then write (io,922) mode,iter,sqrt(eps2) elseif (imp.ge.2) then call mvprod (n,a,x,q,izs,rzs,dzs) cost=0.d0 do i=1,n cost=cost+x(i)*(q(i)/2.d0-b(i)) q(i)=q(i)-b(i) enddo call deuclid (n,q,q,t,izs,rzs,dzs) t=dsqrt(t) write (io,921) write (io,923) mode,iter,cost,sqrt(eps2),t c if (two) then call mvprod (n,a,xx,q,izs,rzs,dzs) cost=0.d0 do i=1,n cost=cost+xx(i)*(q(i)/2.d0-bb(i)) q(i)=q(i)-bb(i) enddo call deuclid (n,q,q,t,izs,rzs,dzs) write (io,924) cost,sqrt(t/rr2i),dsqrt(t) endif endif 921 format (/1x,79("-")) 922 format (/" n1cg1: output mode is ",i8 & /4x,"number of iterations = ",i4 & /4x,"relative residual = ",5x,1pd22.15) 923 format (/" n1cg1: output mode is ",i8 & /4x,"number of iterations = ",i4 & /4x,"cost function =",10x,1pd22.15 & /4x,"relative residual = ",5x,d22.15 & /4x,"residual = ",14x,d22.15) 924 format (/" n1cg1: LS2" & /4x,"cost function =",10x,1pd22.15 & /4x,"relative residual = ",5x,d22.15 & /4x,"residual = ",14x,d22.15) return end c c--------0---------0---------0---------0---------0---------0---------0-- c