subroutine cg (mvprod,n,x,b,a,xx,bb,two,q,r,rr,v,rm,epsneg, 1,5
     &                  eps2,iter,imp,io,imode,mode,
     &                  bfgsp,pmat0,
     &                    m0,nys0,jmin0,jmax0,ybar0,sbar0,size0,
     &                  bfgsb,pmat1,
     &                    m1,nys1,jmin1,jmax1,jol,ybar1,sbar1,size1,ol,
     &                    select,
     &                  rho,aux,izs,rzs,dzs)
c
      external mvprod
      logical two
      integer n,iter,imp,io,imode(3),mode,bfgsb,select,bfgsp,
     &    m0,nys0,jmin0,jmax0,
     &    m1,nys1,jmin1,jmax1,
     &    jol(m1),izs(1)
      real rzs(1)
      double precision x(n),b(n),a(1),xx(n),bb(n),q(n),r(n),rr(n),v(n),
     &    rm(n),epsneg,eps2,
     &    pmat0(n,n),ybar0(n,m0),sbar0(n,m0),size0,
     &    pmat1(n,n),ybar1(n,m1),sbar1(n,m1),size1,
     &    aux(n),ol(m1),rho(m0),dzs(1)
c
c----
c
c     This routine solves the linear system (LS) Ax=b, for x,
c     by the possibly preconditioned Fletcher-Reeves CG algorithm.
c     The preconditioning can come from l-BFGS updates stored in ybar0,
c     sbar0 and rho.
c
c     The residual is denoted by r = Ax-b and the preconditioning
c     matrix by P (=+- inverse of A).
c
c     The first iterate is supposed to be in x.
c
c     Input:
c
c       n (integer): dimension of the LS to solve.
c       a (double precision vector): name of the vector storing the
c           matrix A, when necessary.
c       iter = max number of iterations
c       imp: printing levels
c         <= 0: nothing is printed,
c          = 1: error, terminal, and stopping messages,
c          = 2: also, one line per iteration.
c       bfgsb:
c         =0: don't build a BFGS preconditioning matrix
c         =1: build a full BFGS preconditioning matrix in pmat1, if
c             pmat1(1,1) <= 0., the matrix is re-initialized
c         =2: build a limited memory BFGS preconditioning matrix
c       bfgsp:
c         =0: no BFGS preconditioning
c         =1: full BFGS preconditioning with pmat0
c         =2: limited memory BFGS preconditioning
c
c     Output:
c
c       x (double precision vector of dim n): approximate solution of
c           the (LS).
c       b (double precision vector of dim n): RHS of the (LS).
c       q (double precision vector of dim n): auxiliary vector for
c           storing Av.
c       r (double precision vector of dim n): updated residual.
c       v (double precision vector of dim n): inner CG directions.
c       rm (double precision vector of dim n): auxiliary vector for
c           storing Pr.
c>>>>>>>>>>>> a mon avis inutile, pourrait etre stocke dans q.
c       epsneg = value of the last Rayleigh quotient v'Av/v'v
c         encountered (not modified if v=0)
c       iter = number of iterations
c
c----
c
c --- parameter
c
      double precision pi
      parameter (pi=3.1415927d0)
c
c --- local variables
c     restart=.true.:
c           restarts are allowed. A restart of the CG iterations
c           occurs at iterations n, 3n/2, 9n/4, ... or when
c           (Pr_,r)>=0.2|Pr_||r| (Powell's criterion).
c     r2i = |initial residual|^2
c     v2 = |v|^2
c     pprr = previous prr
c     prr = <Pr,r>
c     r2 = |r|^2
c     avv = <Av,v>
c     rel_r=(|r|/|r0|)**2: relative r
c     cost = (Ax,x)/2 - (b,x)
c     x2 = |x|^2
c
      logical restart
      integer miter,iterr,niter,biter,i,j,ysmode
      character sc
      double precision r2i,rr2i,v2,pprr,a1,prr,r2,rr2,avv,rel_r,alpha,
     &    aalpha,beta,cost,ccost,mcost,rayl,raylmax,gperp,rm2,dangle,x2,
     &    t,sp,nu

c
c --- initialisation
c     miter = max iterations
c     niter = iteration nbr of the next forced restart
c     iterr = iteration nbr of the previous restart
c     biter = bad iteration nbr (will produce failure)
c
      miter=iter
      niter=n
      iterr=1
      biter=int(sqrt(dble(n*n*n)))
      iter=0
      rel_r=1.d0
      gperp=0.d0
      if (select.eq.0) sc='S'
      nu=epsneg
      raylmax=0.d0
c
c     --- In my experience, it is better not doing restarts
c
      restart=.false.
C+       restart = .true.
c
c     --- force initialization of the l-BFGS matrix if appropriate
c
      nys1=0
c
c     --- initial residual r = Ax-b (and rr = A xx - bb, if appropriate)
c         r2i = |initial residual|^2
c
      if (imode(1).eq.0) then
          do i=1,n
              x(i)=0.d0
              a1=-b(i)
              r(i)=a1
              rm(i)=a1
          enddo
      else
          call mvprod (n,a,x,r,izs,rzs,dzs)
          do i=1,n
              a1=r(i)-b(i)
              r(i)=a1
              rm(i)=a1
          enddo
      endif
      if (two) then
          if (imode(2).eq.0) then
              do i=1,n
                  xx(i)=0.d0
                  rr(i)=-bb(i)
              enddo
          else
              call mvprod (n,a,xx,rr,izs,rzs,dzs)
              do i=1,n
                  a1=rr(i)-bb(i)
                  rr(i)=a1
              enddo
          endif
      endif
c
      call deuclid (n,r,r,r2i,izs,rzs,dzs)
      r2=r2i
      if (r2i.eq.0.d0) then
          mode=6
          if (imp.gt.0) write (io,900)
          goto 1000
      endif
  900 format(/4x,"n1cga-WARNING: zero initial residual")
      if (two) then
          call deuclid (n,rr,rr,rr2i,izs,rzs,dzs)
          if (rr2i.eq.0.d0) rr2i=1.d0
      endif
c
c     --- rm = Pr
c         prr = <Pr,r>
c
c>>>>>>>>> les 3 lignes ci-dessous me semblent inutiles (4/3/99)
      do i=1,n
          rm(i)=r(i)
      enddo
      if (bfgsp.eq.1) then
          do i=1,n
              rm(i)=0.d0
          enddo
          do j=1,n
              a1=r(j)
              do i=1,n
                  rm(i)=rm(i)+pmat0(i,j)*a1
              enddo
          enddo
      elseif (bfgsp.eq.2) then
          call dbfgsl (n,rm,m0,nys0,jmin0,jmax0,ybar0,sbar0,rho,
     &                 size0,izs,rzs,dzs)
      endif
      call deuclid (n,rm,rm,rm2,izs,rzs,dzs)
      call deuclid (n,r,rm,prr,izs,rzs,dzs)
c
c     --- initial search direction v
c         v2 = |v|^2
c
      do i=1,n
          v(i)=-rm(i)
      enddo
      v2=rm2
c
c --- cost
c     cost = current cost
c     mcost = min cost encountered
c
      cost=0.d0
      ccost=0.d0
      mcost=0.d0
c
c --- printings
c
      if (imp.ge.2) then
          write (io,901) sqrt(r2i)
          if (two) write (io,902) sqrt(rr2i)
      endif
  901 format (/4x,"n1cga: initial residual |r0| =",1pd13.6)
  902 format (24x,"LS2 |r0| =",1pd13.6)

c
c --- start the CG iterations at 100, end at 1000
c
  100 iter=iter+1
c
c     --- check iter (force convergence, however)
c
      if (iter.gt.miter) then
          mode=4
          iter=iter-1
          if (imp.gt.0) write (io,910)
          goto 1000
      endif
  910 format(/4x,"n1cga: max number iterations reached")
c
c     --- too many iterations ?
c
c     if (iter.gt.biter) then
c         mode=5
c         if (imp.gt.0) write (io,911) biter
c         goto 1000
c     endif
c 911 format(/4x,"n1cga-ERROR: number of iterations exceeds",i6)
c
c     --- restart ?
c
c>>>>>>>>>> this part of the code has not been modified to take care of
c           the case when there are two linear systems to solve
c           simultaneously (two=.true.)
c
c      if (restart.and.(gperp.gt.0.2d0.or.iter.gt.niter)) then
       if (restart.and.(gperp.gt.0.2d0.or.iter.gt.niter)) then
          if (iter.gt.niter) niter=niter*3/2
          iterr=iter
          if (imp.ge.2) write (io,912)
  912     format(4x,"n1cga: restart")
c
c         --- recompute the residuals r and rm
c
          call mvprod (n,a,x,r,izs,rzs,dzs)
          do i=1,n
              a1=r(i)-b(i)
              r(i)=a1
              rm(i)=a1
          enddo
          call deuclid (n,r,r,r2,izs,rzs,dzs)
          if (bfgsp.eq.1) then
              do i=1,n
                  rm(i)=0.d0
              enddo
              do j=1,n
                  a1=r(j)
                  do i=1,n
                      rm(i)=rm(i)+pmat0(i,j)*a1
                  enddo
              enddo
          elseif (bfgsp.eq.2) then
              call dbfgsl (n,rm,m0,nys0,jmin0,jmax0,ybar0,sbar0,rho,
     &                     size0,izs,rzs,dzs)
          endif
          call deuclid (n,rm,rm,rm2,izs,rzs,dzs)
          call deuclid (n,r,rm,prr,izs,rzs,dzs)
c
c         --- and the search direction v
c
          do i=1,n
              v(i)=-rm(i)
          enddo
          v2=rm2
      endif
c
c     --- q = Av
c         avv = <v,q> = <Av,v>
c
c +++ La condition if est ajoutee car le calcul de q et avv
c +++ a deja ete effectue lors du calcul et eta et delta
c +++ pendant l'iteration precente
c+      if(iter == 1) then
      call mvprod (n,a,v,q,izs,rzs,dzs)
      call deuclid (n,v,q,avv,izs,rzs,dzs)
c+      end if
c
c     --- check positive definiteness
c
      if (v2.ne.0.d0) then
          rayl=avv/v2
          raylmax=max(raylmax,rayl)
          if (imode(3).eq.1) nu=epsneg*raylmax
      endif
      if (avv.le.nu*v2) then
          mode=3
          if (v2.ne.0.d0) epsneg=avv/v2
          if (imp.gt.0.and.v2.ne.0.d0) write (io,913) rayl
          goto 1000
      endif
  913 format(/4x,"n1cga-ERROR: non positive definite matrix,",
     &           " <Av,v>/|v|^2 = ",1pd9.2)
c
c     --- update the preconditioning matrix when appropriate
c         y=q=Av, s=v, ys=avv
c
      if (bfgsb.eq.1) then
c
c         --- full inverse BFGS update
c
c             --- initialize the matrix pmat1 ?
c
          if (iter.eq.1.and.pmat1(1,1).le.0.d0) then
c             call deuclid (n,q,q,a1,izs,rzs,dzs)
c             a1=a1/avv
              call deuclid (n,v,v,a1,izs,rzs,dzs)
              a1=a1/avv
              if (imp.ge.2) write (io,914) a1
              do i=1,n
                  do j=1,n
                      pmat1(i,j)=0.d0
                  enddo
                  pmat1(i,i)=a1
              enddo
          endif
c
c             --- update pmat1
c
          call dbfgsi (n,pmat1,q,v,avv,aux,izs,rzs,dzs)
      elseif (bfgsb.eq.2) then
c
c         --- limited memory inverse BFGS update
c
          sc='-'
          call dysave (n,q,v,avv,m1,nys1,jmin1,jmax1,ybar1,sbar1,select,
     &                 iter,ol,jol,nu,size1,ysmode,4,io,izs,
     &                 rzs,dzs)
          if (ysmode.eq.0) sc='S'
          if (ysmode.gt.0) then
              mode=7
              goto 1001
          endif
      endif
  914 format(4x,"n1cga: initialization of the BFGS matrix with OL",
     &       " factor =",1pd13.6)
c
c     --- new iterate x and residual r
c         new iterate xx and residual rr
c
      alpha=prr/avv
      x2=0.d0
      do i=1,n
          t=x(i)+alpha*v(i)
          x(i)=t
          x2=x2+t*t
          r(i)=r(i)+alpha*q(i)
      enddo
      if (two) then
          call deuclid (n,rr,v,t,izs,rzs,dzs)
          aalpha=-t/avv
          do i=1,n
              xx(i)=xx(i)+aalpha*v(i)
              rr(i)=rr(i)+aalpha*q(i)
          enddo
      endif
c
c --- stop if the cost increases
c
      mcost=min(cost,mcost)
      cost=0.d0
      do i=1,n
          cost=cost+(r(i)-b(i))*x(i)
      enddo
      cost=0.5d0*cost
       if (cost.gt.0.999999d0*mcost) then
          mode=8
          if (imp.gt.0) write (io,915)
          print*,'cost mcost',cost, mcost
          goto 1000
       endif

  915 format(/4x,"n1cga-ERROR: cost increase (rounding error)")
c
      if (two) then
          ccost=0.d0
          do i=1,n
              ccost=ccost+(rr(i)-bb(i))*xx(i)
          enddo
          ccost=0.5d0*ccost
      endif
c
c     --- r2=|r|^2
c
      call deuclid (n,r,r,r2,izs,rzs,dzs)
      if (two) call deuclid (n,rr,rr,rr2,izs,rzs,dzs)
c
c     --- prepare to check conjugacy
c         gperp = (rmo,r)/|rmo|/|r| == 0 ?
c
      call deuclid (n,rm,r,gperp,izs,rzs,dzs)
      t=sqrt(r2*rm2)
      if (t.ne.0.d0) then
          gperp=gperp/t
      else
          gperp=0.d0
      endif

c
c     --- new rm
c         rm2 = |rm|^2
c         new pprr=prr
c         prr=<Pr,r>
c
      do i=1,n
          rm(i)=r(i)
      enddo
      if (bfgsp.eq.1) then
          do i=1,n
              rm(i)=0.d0
          enddo
          do j=1,n
              a1=r(j)
              do i=1,n
                  rm(i)=rm(i)+pmat0(i,j)*a1
              enddo
          enddo
      elseif (bfgsp.eq.2) then
          call dbfgsl (n,rm,m0,nys0,jmin0,jmax0,ybar0,sbar0,rho,
     &                 size0,izs,rzs,dzs)
      endif
      call deuclid (n,rm,rm,rm2,izs,rzs,dzs)
      pprr=prr 
      call deuclid (n,r,rm,prr,izs,rzs,dzs)
c
c     --- some printings
c
      if (imp.ge.2) then
          call deuclid (n,rm,rm,sp,izs,rzs,dzs)
          dangle=0.d0
          if (sqrt(sp*r2).ne.0.d0)
     &        dangle=acos(prr/sqrt(sp*r2))/pi*180.d0
          if (iter.eq.1) write (io,916)
          write (io,917) iter,cost,sqrt(r2/r2i),dangle,rayl,gperp,
     &                   alpha,sqrt(x2),sc
          if (two) write (io,918) ccost,sqrt(rr2/rr2i)
          call flush (io)
      endif
  916 format (/4x,"n1cga: iter",6x,"cost",7x,"|r|/|r0|",2x,"<(r,Pr)",
     &         "(Av,v)/|v|^2",4x,"conj",3x,"alpha",4x,"|x|",3x,"S")
  917 format (4x,"n1cga:",i5,1x,1pe13.6,1x,e12.6,1x,0pf5.2,1x,1pe12.6,
     &        1x,e9.2,1x,e6.0,1x,e7.1,1x,a1)
  918 format (6x,"LS2 ",6x,1pe13.6,1x,e12.6)
c
c     --- stop on the relative residual (eps2)
c
       rel_r=r2/r2i
       if (rel_r.le.eps2) then
            mode=0
            if (imp.gt.0) write (io,919) sqrt(rel_r),sqrt(eps2)
            goto 1000
        endif
  919 format(/4x,"n1cga: stopping criterion satisfied, |r|/|r0| = ",
     &    1pd8.2," <= ",d8.2)
c
c     --- FR beta, new v, v2=|v|^2
c
   80 beta=prr/pprr
      do i=1,n
          v(i)=-rm(i)+beta*v(i)
      enddo
      call deuclid (n,v,v,v2,izs,rzs,dzs)

c
c     --- loop
c
      goto 100
c
c --- terminaison
c
 1000 continue
      eps2=rel_r
      if (imp.eq.1) write (io,920) iter,sqrt(r2i),sqrt(r2)
  920 format(4x,"n1cga: niter=",i5,", |r|:",d13.6," -> ",d13.6)
c  
 1001 continue
      if (imp.ge.1) then
          write (io,922) mode,iter,sqrt(eps2)
      elseif (imp.ge.2) then
          call mvprod (n,a,x,q,izs,rzs,dzs)
          cost=0.d0
          do i=1,n
              cost=cost+x(i)*(q(i)/2.d0-b(i))
              q(i)=q(i)-b(i)
          enddo
          call deuclid (n,q,q,t,izs,rzs,dzs)
          t=dsqrt(t)
          write (io,921)
          write (io,923) mode,iter,cost,sqrt(eps2),t
c
          if (two) then
              call mvprod (n,a,xx,q,izs,rzs,dzs)
              cost=0.d0
              do i=1,n
                  cost=cost+xx(i)*(q(i)/2.d0-bb(i))
                  q(i)=q(i)-bb(i)
              enddo
              call deuclid (n,q,q,t,izs,rzs,dzs)
              write (io,924) cost,sqrt(t/rr2i),dsqrt(t)
          endif
      endif
  921 format (/1x,79("-"))
  922 format (/" n1cg1: output mode is ",i8
     &        /4x,"number of iterations = ",i4
     &        /4x,"relative residual = ",5x,1pd22.15)
  923 format (/" n1cg1: output mode is ",i8
     &        /4x,"number of iterations = ",i4
     &        /4x,"cost function =",10x,1pd22.15
     &        /4x,"relative residual = ",5x,d22.15
     &        /4x,"residual = ",14x,d22.15)
  924 format (/" n1cg1: LS2"
     &        /4x,"cost function =",10x,1pd22.15
     &        /4x,"relative residual = ",5x,d22.15
     &        /4x,"residual = ",14x,d22.15)
      return
      end
c
c--------0---------0---------0---------0---------0---------0---------0--
c