c
c *************************************************
c
subroutine fft99(a,work,trigs,ifax,inc,jump,n,lot,isign),4
c
* ******************************************************************* *
* c06-summation of series b6.1/3 *
* *
* fft99 *
* fft991 *
* *
* *
* subprogram subroutine fft99 *
* fft991 *
* *
* purpose perform multiple fast fourier transforms *
* *
* *
* version cyber cray-1 *
* *
* jan 1979 original jan 1979 original *
* *
* usage *
* call fft99 (a,work,trigs,ifax,inc,jump,n,m,isign) *
* call fft991(a,work,trigs,ifax,inc,jump,n,m,isign) *
* *
* arguments 1.dimension *
* a(idim),work((n+1)*m),trigs(3*n/2),ifax(10) *
* work is a work array *
* *
* 2.input *
* a - an array containing the input data or *
* coefficient vectors. *
* this array is overwritten by the results. *
* trigs and ifax - arrays set up by fftrig and fax*
* - see writeup of fftrig and fax *
* inc - the word increment between successive *
* elements of each data or coefficient vector*
* e.g. inc=1 for consecutively stored data. *
* jump - the word increment between the first *
* elements of successive data or coefficient*
* vectors. *
* n - the length of each transform. (see note x) *
* m - the number of transforms to be done *
* simultaneously. *
* isign - +1 for a transform from fourier *
* coefficients to data values. *
* -1 for a transform from data values *
* to fourier coefficients. *
* *
* 3.output *
* a - contains either the coefficients or the *
* data values,depending on isign. *
* in each case n independent quantities *
* occupy n+2 words. the coefficients are *
* stored as successive pairs of real and *
* imaginary parts - *
* a(k),b(k) , k=0,1,...n/2 *
* b(0) and b(n/2) are stored although they *
* must be 0. *
* for fft99 the data is stored with explicit *
* periodicity - *
* x(n-1),x(0),x(1),....x(n-1),x(0) *
* for fft991 the data appears as - *
* x(0),x(1),x(2),......x(n-1),0,0 *
* *
* notes 1. on cray-1, arrange data so that jump is not a *
* multiple of 8 (to avoid memory bank conflicts) *
* *
* write up computer bulletin b6.6/1 *
* *
* entry points fft99,fft991 *
* *
* common blocks none *
* *
* i/o none *
* *
* precision single *
* *
* other routines fft99a,fft99b,vpassm (cy) *
* required cal99,cpass (cr) *
* *
* *
* 7/80 fft99-1 *
* *
************************************************************************
* *
* c06-summation of series b6.1/3 *
* *
* fft99 *
* fft991 *
* *
* access (object) cyber: *
* attach,eclib. *
* ldset(lib=eclib) *
* cray 1: *
* ldr(lib=eclib...) *
* *
* access (source) attach,oldpl,eclibpl *
* *
* cyber : %define cyber *
* cray: %define cray *
* %c fft99,fft991 *
* *
* language fortran *
* but cray implementation of pass is in cal *
* *
* specialist clive temperton *
* *
* history written by c.temperton jan 1979 *
* *
* algorithm the algorithm is the self-sorting (temperton) *
* version of the fast fourier transform *
* *
* references ecmwf technical report no.3 *
* ecmwf internal report no.21 - c.temperton *
* *
* object size fft991 fft99 (octal words) *
* cyber: 2665 2676 *
* cray : 1250 1260 *
* *
* *
* accuracy *
* *
* timing vectorization is on vectors of length m. (cr) *
* hence timing is strongly dependent on m. *
* time per transform on cray-1 (microseconds) *
* n m=4 m=16 m=64 *
* 64 46 17 10 *
* 128 81 33 21 *
* 180 150 58 37 *
* 192 149 58 36 *
* 240 192 76 49 *
* 256 191 76 49 *
* 288 219 89 58 *
* 300 253 102 68 *
* 320 248 101 66 *
* 360 286 118 79 *
* 1024 898 359 238 *
* *
* portability standard fortran *
* standard cal (cr) *
* *
* system routines none *
* required *
* *
* 7/80 fft99-1 *
* *
************************************************************************
c
c subroutine 'fft99' - multiple fast real periodic transform
c corresponding to old scalar routine fft9
c procedure used to convert to half-length complex transform
c is given by cooley, lewis ' welch (j. sound vib., vol. 12
c (1970), 315-337)
c
c a is the array containing input ' output data
c work is an area of size (n+1)*lot
c trigs is a previously prepared list of trig function values
c ifax is a previously prepared list of factors of n/2
c inc is the increment within each data "vector"
c (e.g. inc=1 for consecutively stored data)
c jump is the increment between the start of each data vector
c n is the length of the data vectors
c lot is the number of data vectors
c isign = +1 for transform from spectral to gridpoint
c = -1 for transform from gridpoint to spectral
c
c ordering of coefficients:
c a(0),b(0),a(1),b(1),a(2),b(2),...,a(n/2),b(n/2)
c where b(0)=b(n/2)=0; (n+2) locations required
c
c ordering of data:
c x(n-1),x(0),x(1),x(2),...,x(n),x(0)
c i.e. explicit cyclic continuity; (n+2) locations required
c
c vectorization is achieved on cray by doing the transforms in
c parallel
c
c *** n.b. n is assumed to be an even number
c
c definition of transforms:
c -------------------------
c
c isign=+1: x(j)=sum(k=0,...,n-1)(c(k)*exp(2*i*j*k*pi/n))
c where c(k)=a(k)+i*b(k) and c(n-k)=a(k)-i*b(k)
c
c isign=-1: a(k)=(1/n)*sum(j=0,...,n-1)(x(j)*cos(2*j*k*pi/n))
c b(k)=-(1/n)*sum(j=0,...,n-1)(x(j)*sin(2*j*k*pi/n))
c
c
c line following next is not subroutine header(only comment)
c
c subroutine fft99(a,work,trigs,ifax,inc,jump,n,lot,isign)
c
c end
dimension a(n),work(n),trigs(n),ifax(1)
c
nfax=ifax(1)
if(nfax.le.0) go to 99
nx=n+1
nh=n/2
ink=inc+inc
if (isign.eq.+1) go to 30
c
c if necessary, transfer data to work area
igo=50
if (mod(nfax,2).eq.1) goto 40
ibase=inc+1
jbase=1
do 20 l=1,lot
i=ibase
j=jbase
cvd$ nodepchk
do 10 m=1,n
work(j)=a(i)
i=i+inc
j=j+1
10 continue
ibase=ibase+jump
jbase=jbase+nx
20 continue
c
igo=60
go to 40
c
c preprocessing (isign=+1)
c ------------------------
c
30 continue
call fft99a
(a,work,trigs,inc,jump,n,lot)
igo=60
c
c complex transform
c -----------------
c
40 continue
ia=inc+1
la=1
do 80 k=1,nfax
if (igo.eq.60) go to 60
50 continue
call vpassm
(a(ia),a(ia+inc),work(1),work(2),trigs,
* ink,2,jump,nx,lot,nh,ifax(k+1),la)
igo=60
go to 70
60 continue
call vpassm
(work(1),work(2),a(ia),a(ia+inc),trigs,
* 2,ink,nx,jump,lot,nh,ifax(k+1),la)
igo=50
70 continue
la=la*ifax(k+1)
80 continue
c
if (isign.eq.-1) go to 130
c
c if necessary, transfer data from work area
if (mod(nfax,2).eq.1) go to 110
ibase=1
jbase=ia
do 100 l=1,lot
i=ibase
j=jbase
cvd$ nodepchk
do 90 m=1,n
a(j)=work(i)
i=i+1
j=j+inc
90 continue
ibase=ibase+nx
jbase=jbase+jump
100 continue
c
c fill in cyclic boundary points
110 continue
ia=1
ib=n*inc+1
cvd$ nodepchk
do 120 l=1,lot
a(ia)=a(ib)
a(ib+inc)=a(ia+inc)
ia=ia+jump
ib=ib+jump
120 continue
go to 140
c
c postprocessing (isign=-1):
c --------------------------
c
130 continue
call fft99b
(work,a,trigs,inc,jump,n,lot)
c
140 continue
return
c ** error exit ifax(1) le 0 **
99 print *,'fft99 called but factors not supplied'
c call abort
stop 'sur ABORT'
end
subroutine fft99a(a,work,trigs,inc,jump,n,lot) 2
c
c subroutine fft99a - preprocessing step for fft99, isign=+1
c (spectral to gridpoint transform)
c
c line following next is not subroutine header(only comment)
c
c subroutine fft99a(a,work,trigs,inc,jump,n,lot)
c end
dimension a(n),work(n),trigs(n)
nh=n/2
nx=n+1
ink=inc+inc
c
c a(0) ' a(n/2)
ia=1
ib=n*inc+1
ja=1
jb=2
cvd$ nodepchk
do 10 l=1,lot
work(ja)=a(ia)+a(ib)
work(jb)=a(ia)-a(ib)
ia=ia+jump
ib=ib+jump
ja=ja+nx
jb=jb+nx
10 continue
c
c remaining wavenumbers
iabase=2*inc+1
ibbase=(n-2)*inc+1
jabase=3
jbbase=n-1
c
do 30 k=3,nh,2
ia=iabase
ib=ibbase
ja=jabase
jb=jbbase
c=trigs(n+k)
s=trigs(n+k+1)
cvd$ nodepchk
do 20 l=1,lot
work(ja)=(a(ia)+a(ib))-
* (s*(a(ia)-a(ib))+c*(a(ia+inc)+a(ib+inc)))
work(jb)=(a(ia)+a(ib))+
* (s*(a(ia)-a(ib))+c*(a(ia+inc)+a(ib+inc)))
work(ja+1)=(c*(a(ia)-a(ib))-s*(a(ia+inc)+a(ib+inc)))+
* (a(ia+inc)-a(ib+inc))
work(jb+1)=(c*(a(ia)-a(ib))-s*(a(ia+inc)+a(ib+inc)))-
* (a(ia+inc)-a(ib+inc))
ia=ia+jump
ib=ib+jump
ja=ja+nx
jb=jb+nx
20 continue
iabase=iabase+ink
ibbase=ibbase-ink
jabase=jabase+2
jbbase=jbbase-2
30 continue
c
if (iabase.ne.ibbase) go to 50
c wavenumber n/4 (if it exists)
ia=iabase
ja=jabase
cvd$ nodepchk
do 40 l=1,lot
work(ja)=2.0*a(ia)
work(ja+1)=-2.0*a(ia+inc)
ia=ia+jump
ja=ja+nx
40 continue
c
50 continue
return
end
subroutine fft99b(work,a,trigs,inc,jump,n,lot) 2
c
c subroutine fft99b - postprocessing step for fft99, isign=-1
c (gridpoint to spectral transform)
c
c
c line follwing next is not subroutine header(only comment)
c
c subroutine fft99b(work,a,trigs,inc,jump,n,lot)
c end
dimension work(n),a(n),trigs(n)
c
nh=n/2
nx=n+1
ink=inc+inc
c
c a(0) ' a(n/2)
scale=1.0/float(n)
ia=1
ib=2
ja=1
jb=n*inc+1
cvd$ nodepchk
do 10 l=1,lot
a(ja)=scale*(work(ia)+work(ib))
a(jb)=scale*(work(ia)-work(ib))
a(ja+inc)=0.0
a(jb+inc)=0.0
ia=ia+nx
ib=ib+nx
ja=ja+jump
jb=jb+jump
10 continue
c
c remaining wavenumbers
scale=0.5*scale
iabase=3
ibbase=n-1
jabase=2*inc+1
jbbase=(n-2)*inc+1
c
do 30 k=3,nh,2
ia=iabase
ib=ibbase
ja=jabase
jb=jbbase
c=trigs(n+k)
s=trigs(n+k+1)
cvd$ nodepchk
do 20 l=1,lot
a(ja)=scale*((work(ia)+work(ib))
* +(c*(work(ia+1)+work(ib+1))+s*(work(ia)-work(ib))))
a(jb)=scale*((work(ia)+work(ib))
* -(c*(work(ia+1)+work(ib+1))+s*(work(ia)-work(ib))))
a(ja+inc)=scale*((c*(work(ia)-work(ib))
* -s*(work(ia+1)+work(ib+1)))
* +(work(ib+1)-work(ia+1)))
a(jb+inc)=scale*((c*(work(ia)-work(ib))
* -s*(work(ia+1)+work(ib+1)))
* -(work(ib+1)-work(ia+1)))
ia=ia+nx
ib=ib+nx
ja=ja+jump
jb=jb+jump
20 continue
iabase=iabase+2
ibbase=ibbase-2
jabase=jabase+ink
jbbase=jbbase-ink
30 continue
c
if (iabase.ne.ibbase) go to 50
c wavenumber n/4 (if it exists)
ia=iabase
ja=jabase
scale=2.0*scale
cvd$ nodepchk
do 40 l=1,lot
a(ja)=scale*work(ia)
a(ja+inc)=-scale*work(ia+1)
ia=ia+nx
ja=ja+jump
40 continue
c
50 continue
return
end
subroutine fft991(a,work,trigs,ifax,inc,jump,n,lot,isign) 9,4
c
c subroutine 'fft991' - multiple real/half-complex periodic
c fast fourier transform
c
************************************************************************
* *
* c06-summation of series b6.1/3 *
* *
* fft99 *
* fft991 *
* *
* *
* subprogram subroutine fft99 *
* fft991 *
* *
* purpose perform multiple fast fourier transforms *
* *
* *
* version cyber cray-1 *
* *
* jan 1979 original jan 1979 original *
* *
* usage *
* call fft99 (a,work,trigs,ifax,inc,jump,n,m,isign) *
* call fft991(a,work,trigs,ifax,inc,jump,n,m,isign) *
* *
* arguments 1.dimension *
* a(idim),work((n+1)*m),trigs(3*n/2),ifax(10) *
* work is a work array *
* *
* 2.input *
* a - an array containing the input data or *
* coefficient vectors. *
* this array is overwritten by the results. *
* trigs and ifax - arrays set up by fftrig and fax*
* - see writeup of fftrig and fax *
* inc - the word increment between successive *
* elements of each data or coefficient vector*
* e.g. inc=1 for consecutively stored data. *
* jump - the word increment between the first *
* elements of successive data or coefficient*
* vectors. *
* n - the length of each transform. (see note x) *
* m - the number of transforms to be done *
* simultaneously. *
* isign - +1 for a transform from fourier *
* coefficients to data values. *
* -1 for a transform from data values *
* to fourier coefficients. *
* *
* 3.output *
* a - contains either the coefficients or the *
* data values,depending on isign. *
* in each case n independent quantities *
* occupy n+2 words. the coefficients are *
* stored as successive pairs of real and *
* imaginary parts - *
* a(k),b(k) , k=0,1,...n/2 *
* b(0) and b(n/2) are stored although they *
* must be 0. *
* for fft99 the data is stored with explicit *
* periodicity - *
* x(n-1),x(0),x(1),....x(n-1),x(0) *
* for fft991 the data appears as - *
* x(0),x(1),x(2),......x(n-1),0,0 *
* *
* notes 1. on cray-1, arrange data so that jump is not a *
* multiple of 8 (to avoid memory bank conflicts) *
* *
* write up computer bulletin b6.6/1 *
* *
* entry points fft99,fft991 *
* *
* common blocks none *
* *
* i/o none *
* *
* precision single *
* *
* other routines fft99a,fft99b,vpassm (cy) *
* required cal99,cpass (cr) *
* *
* *
* 7/80 fft99-1 *
* *
************************************************************************
* *
* c06-summation of series b6.1/3 *
* *
* fft99 *
* fft991 *
* *
* access (object) cyber: *
* attach,eclib. *
* ldset(lib=eclib) *
* cray 1: *
* ldr(lib=eclib...) *
* *
* access (source) attach,oldpl,eclibpl *
* *
* cyber : %define cyber *
* cray: %define cray *
* %c fft99,fft991 *
* *
* language fortran *
* but cray implementation of pass is in cal *
* *
* specialist clive temperton *
* *
* history written by c.temperton jan 1979 *
* *
* algorithm the algorithm is the self-sorting (temperton) *
* version of the fast fourier transform *
* *
* references ecmwf technical report no.3 *
* ecmwf internal report no.21 - c.temperton *
* *
* object size fft991 fft99 (octal words) *
* cyber: 2665 2676 *
* cray : 1250 1260 *
* *
* *
* accuracy *
* *
* timing vectorization is on vectors of length m. (cr) *
* hence timing is strongly dependent on m. *
* time per transform on cray-1 (microseconds) *
* n m=4 m=16 m=64 *
* 64 46 17 10 *
* 128 81 33 21 *
* 180 150 58 37 *
* 192 149 58 36 *
* 240 192 76 49 *
* 256 191 76 49 *
* 288 219 89 58 *
* 300 253 102 68 *
* 320 248 101 66 *
* 360 286 118 79 *
* 1024 898 359 238 *
* *
* portability standard fortran *
* standard cal (cr) *
* *
* system routines none *
* required *
* *
* 7/80 fft99-1 *
* *
************************************************************************
c
c same as fft99 except that ordering of data corresponds to
c that in mrfft2
c
c procedure used to convert to half-length complex transform
c is given by cooley, lewis ' welch (j. sound vib., vol. 12
c (1970), 315-337)
c
c a is the array containing input ' output data
c work is an area of size (n+1)*lot
c trigs is a previously prepared list of trig function values
c ifax is a previously prepared list of factors of n/2
c inc is the increment within each data "vector"
c (e.g. inc=1 for consecutively stored data)
c jump is the increment between the start of each data vector
c n is the length of the data vectors
c lot is the number of data vectors
c isign = +1 for transform from spectral to gridpoint
c = -1 for transform from gridpoint to spectral
c
c ordering of coefficients:
c a(0),b(0),a(1),b(1),a(2),b(2),...,a(n/2),b(n/2)
c where b(0)=b(n/2)=0; (n+2) locations required
c
c ordering of data:
c x(0),x(1),x(2),...,x(n-1)
c
c vectorization is achieved on cray by doing the transforms in
c parallel
c
c *** n.b. n is assumed to be an even number
c
c definition of transforms:
c -------------------------
c
c isign=+1: x(j)=sum(k=0,...,n-1)(c(k)*exp(2*i*j*k*pi/n))
c where c(k)=a(k)+i*b(k) and c(n-k)=a(k)-i*b(k)
c
c isign=-1: a(k)=(1/n)*sum(j=0,...,n-1)(x(j)*cos(2*j*k*pi/n))
c b(k)=-(1/n)*sum(j=0,...,n-1)(x(j)*sin(2*j*k*pi/n))
c
c subroutine fft991(a,work,trigs,ifax,inc,jump,n,lot,isign)
c end
dimension a(n),work(n),trigs(n),ifax(1)
c
nfax=ifax(1)
if(nfax.le.0) go to 99
nx=n+1
nh=n/2
ink=inc+inc
if (isign.eq.+1) go to 30
c
c if necessary, transfer data to work area
igo=50
if (mod(nfax,2).eq.1) goto 40
ibase=1
jbase=1
do 20 l=1,lot
i=ibase
j=jbase
cvd$ nodepchk
do 10 m=1,n
work(j)=a(i)
i=i+inc
j=j+1
10 continue
ibase=ibase+jump
jbase=jbase+nx
20 continue
c
igo=60
go to 40
c
c preprocessing (isign=+1)
c ------------------------
c
30 continue
call fft99a
(a,work,trigs,inc,jump,n,lot)
igo=60
c
c complex transform
c -----------------
c
40 continue
ia=1
la=1
do 80 k=1,nfax
if (igo.eq.60) go to 60
50 continue
call vpassm
(a(ia),a(ia+inc),work(1),work(2),trigs,
* ink,2,jump,nx,lot,nh,ifax(k+1),la)
igo=60
go to 70
60 continue
call vpassm
(work(1),work(2),a(ia),a(ia+inc),trigs,
* 2,ink,nx,jump,lot,nh,ifax(k+1),la)
igo=50
70 continue
la=la*ifax(k+1)
80 continue
c
if (isign.eq.-1) go to 130
c
c if necessary, transfer data from work area
if (mod(nfax,2).eq.1) go to 110
ibase=1
jbase=1
do 100 l=1,lot
i=ibase
j=jbase
cvd$ nodepchk
do 90 m=1,n
a(j)=work(i)
i=i+1
j=j+inc
90 continue
ibase=ibase+nx
jbase=jbase+jump
100 continue
c
c fill in zeros at end
110 continue
ib=n*inc+1
cvd$ nodepchk
do 120 l=1,lot
a(ib)=0.0
a(ib+inc)=0.0
ib=ib+jump
120 continue
go to 140
c
c postprocessing (isign=-1):
c --------------------------
c
130 continue
call fft99b
(work,a,trigs,inc,jump,n,lot)
c
140 continue
return
c ** error ifax(1) le 0 **
99 print *,' fft991 called but factors not supplied '
c call abort
stop 'sur ABORT'
end
subroutine vpassm(a,b,c,d,trigs, 4
* inc1,inc2,inc3,inc4,lot,n,ifac,la)
c
c subroutine 'vpassm' - multiple version of 'vpassa'
c performs one pass through data
c as part of multiple complex fft routine
c a is first real input vector
c b is first imaginary input vector
c c is first real output vector
c d is first imaginary output vector
c trigs is precalculated table of sines ' cosines
c inc1 is addressing increment for a and b
c inc2 is addressing increment for c and d
c inc3 is addressing increment between a's & b's
c inc4 is addressing increment between c's & d's
c lot is the number of vectors
c n is length of vectors
c ifac is current factor of n
c la is product of previous factors
c
c subroutine vpassm(a,b,c,d,trigs,
c * inc1,inc2,inc3,inc4,lot,n,ifac,la)
c
c end
dimension a(n),b(n),c(n),d(n),trigs(n)
data sin36/0.587785252292473/,cos36/0.809016994374947/,
* sin72/0.951056516295154/,cos72/0.309016994374947/,
* sin60/0.866025403784437/
c
m=n/ifac
iink=m*inc1
jink=la*inc2
jump=(ifac-1)*jink
ibase=0
jbase=0
igo=ifac-1
c check factors are correct - ensure non-negative
if (igo.le.0) goto 998
if (igo.gt.4) go to 999
go to (10,50,90,130),igo
c
c coding for factor 2
c
10 ia=1
ja=1
ib=ia+iink
jb=ja+jink
do 20 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 15 ijk=1,lot
c(ja+j)=a(ia+i)+a(ib+i)
d(ja+j)=b(ia+i)+b(ib+i)
c(jb+j)=a(ia+i)-a(ib+i)
d(jb+j)=b(ia+i)-b(ib+i)
i=i+inc3
j=j+inc4
15 continue
ibase=ibase+inc1
jbase=jbase+inc2
20 continue
if (la.eq.m) return
la1=la+1
jbase=jbase+jump
do 40 k=la1,m,la
kb=k+k-2
c1=trigs(kb+1)
s1=trigs(kb+2)
do 30 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 25 ijk=1,lot
c(ja+j)=a(ia+i)+a(ib+i)
d(ja+j)=b(ia+i)+b(ib+i)
c(jb+j)=c1*(a(ia+i)-a(ib+i))-s1*(b(ia+i)-b(ib+i))
d(jb+j)=s1*(a(ia+i)-a(ib+i))+c1*(b(ia+i)-b(ib+i))
i=i+inc3
j=j+inc4
25 continue
ibase=ibase+inc1
jbase=jbase+inc2
30 continue
jbase=jbase+jump
40 continue
return
c
c coding for factor 3
c
50 ia=1
ja=1
ib=ia+iink
jb=ja+jink
ic=ib+iink
jc=jb+jink
do 60 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 55 ijk=1,lot
c(ja+j)=a(ia+i)+(a(ib+i)+a(ic+i))
d(ja+j)=b(ia+i)+(b(ib+i)+b(ic+i))
c(jb+j)=(a(ia+i)-0.5*(a(ib+i)+a(ic+i)))-(sin60*(b(ib+i)-b(ic+i)))
c(jc+j)=(a(ia+i)-0.5*(a(ib+i)+a(ic+i)))+(sin60*(b(ib+i)-b(ic+i)))
d(jb+j)=(b(ia+i)-0.5*(b(ib+i)+b(ic+i)))+(sin60*(a(ib+i)-a(ic+i)))
d(jc+j)=(b(ia+i)-0.5*(b(ib+i)+b(ic+i)))-(sin60*(a(ib+i)-a(ic+i)))
i=i+inc3
j=j+inc4
55 continue
ibase=ibase+inc1
jbase=jbase+inc2
60 continue
if (la.eq.m) return
la1=la+1
jbase=jbase+jump
do 80 k=la1,m,la
kb=k+k-2
kc=kb+kb
c1=trigs(kb+1)
s1=trigs(kb+2)
c2=trigs(kc+1)
s2=trigs(kc+2)
do 70 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 65 ijk=1,lot
c(ja+j)=a(ia+i)+(a(ib+i)+a(ic+i))
d(ja+j)=b(ia+i)+(b(ib+i)+b(ic+i))
c(jb+j)=
* c1*((a(ia+i)-0.5*(a(ib+i)+a(ic+i)))-(sin60*(b(ib+i)-b(ic+i))))
* -s1*((b(ia+i)-0.5*(b(ib+i)+b(ic+i)))+(sin60*(a(ib+i)-a(ic+i))))
d(jb+j)=
* s1*((a(ia+i)-0.5*(a(ib+i)+a(ic+i)))-(sin60*(b(ib+i)-b(ic+i))))
* +c1*((b(ia+i)-0.5*(b(ib+i)+b(ic+i)))+(sin60*(a(ib+i)-a(ic+i))))
c(jc+j)=
* c2*((a(ia+i)-0.5*(a(ib+i)+a(ic+i)))+(sin60*(b(ib+i)-b(ic+i))))
* -s2*((b(ia+i)-0.5*(b(ib+i)+b(ic+i)))-(sin60*(a(ib+i)-a(ic+i))))
d(jc+j)=
* s2*((a(ia+i)-0.5*(a(ib+i)+a(ic+i)))+(sin60*(b(ib+i)-b(ic+i))))
* +c2*((b(ia+i)-0.5*(b(ib+i)+b(ic+i)))-(sin60*(a(ib+i)-a(ic+i))))
i=i+inc3
j=j+inc4
65 continue
ibase=ibase+inc1
jbase=jbase+inc2
70 continue
jbase=jbase+jump
80 continue
return
c
c coding for factor 4
c
90 ia=1
ja=1
ib=ia+iink
jb=ja+jink
ic=ib+iink
jc=jb+jink
id=ic+iink
jd=jc+jink
do 100 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 95 ijk=1,lot
c(ja+j)=(a(ia+i)+a(ic+i))+(a(ib+i)+a(id+i))
c(jc+j)=(a(ia+i)+a(ic+i))-(a(ib+i)+a(id+i))
d(ja+j)=(b(ia+i)+b(ic+i))+(b(ib+i)+b(id+i))
d(jc+j)=(b(ia+i)+b(ic+i))-(b(ib+i)+b(id+i))
c(jb+j)=(a(ia+i)-a(ic+i))-(b(ib+i)-b(id+i))
c(jd+j)=(a(ia+i)-a(ic+i))+(b(ib+i)-b(id+i))
d(jb+j)=(b(ia+i)-b(ic+i))+(a(ib+i)-a(id+i))
d(jd+j)=(b(ia+i)-b(ic+i))-(a(ib+i)-a(id+i))
i=i+inc3
j=j+inc4
95 continue
ibase=ibase+inc1
jbase=jbase+inc2
100 continue
if (la.eq.m) return
la1=la+1
jbase=jbase+jump
do 120 k=la1,m,la
kb=k+k-2
kc=kb+kb
kd=kc+kb
c1=trigs(kb+1)
s1=trigs(kb+2)
c2=trigs(kc+1)
s2=trigs(kc+2)
c3=trigs(kd+1)
s3=trigs(kd+2)
do 110 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 105 ijk=1,lot
c(ja+j)=(a(ia+i)+a(ic+i))+(a(ib+i)+a(id+i))
d(ja+j)=(b(ia+i)+b(ic+i))+(b(ib+i)+b(id+i))
c(jc+j)=
* c2*((a(ia+i)+a(ic+i))-(a(ib+i)+a(id+i)))
* -s2*((b(ia+i)+b(ic+i))-(b(ib+i)+b(id+i)))
d(jc+j)=
* s2*((a(ia+i)+a(ic+i))-(a(ib+i)+a(id+i)))
* +c2*((b(ia+i)+b(ic+i))-(b(ib+i)+b(id+i)))
c(jb+j)=
* c1*((a(ia+i)-a(ic+i))-(b(ib+i)-b(id+i)))
* -s1*((b(ia+i)-b(ic+i))+(a(ib+i)-a(id+i)))
d(jb+j)=
* s1*((a(ia+i)-a(ic+i))-(b(ib+i)-b(id+i)))
* +c1*((b(ia+i)-b(ic+i))+(a(ib+i)-a(id+i)))
c(jd+j)=
* c3*((a(ia+i)-a(ic+i))+(b(ib+i)-b(id+i)))
* -s3*((b(ia+i)-b(ic+i))-(a(ib+i)-a(id+i)))
d(jd+j)=
* s3*((a(ia+i)-a(ic+i))+(b(ib+i)-b(id+i)))
* +c3*((b(ia+i)-b(ic+i))-(a(ib+i)-a(id+i)))
i=i+inc3
j=j+inc4
105 continue
ibase=ibase+inc1
jbase=jbase+inc2
110 continue
jbase=jbase+jump
120 continue
return
c
c coding for factor 5
c
130 ia=1
ja=1
ib=ia+iink
jb=ja+jink
ic=ib+iink
jc=jb+jink
id=ic+iink
jd=jc+jink
ie=id+iink
je=jd+jink
do 140 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 135 ijk=1,lot
c(ja+j)=a(ia+i)+(a(ib+i)+a(ie+i))+(a(ic+i)+a(id+i))
d(ja+j)=b(ia+i)+(b(ib+i)+b(ie+i))+(b(ic+i)+b(id+i))
c(jb+j)=(a(ia+i)+cos72*(a(ib+i)+a(ie+i))-cos36*(a(ic+i)+a(id+i)))
* -(sin72*(b(ib+i)-b(ie+i))+sin36*(b(ic+i)-b(id+i)))
c(je+j)=(a(ia+i)+cos72*(a(ib+i)+a(ie+i))-cos36*(a(ic+i)+a(id+i)))
* +(sin72*(b(ib+i)-b(ie+i))+sin36*(b(ic+i)-b(id+i)))
d(jb+j)=(b(ia+i)+cos72*(b(ib+i)+b(ie+i))-cos36*(b(ic+i)+b(id+i)))
* +(sin72*(a(ib+i)-a(ie+i))+sin36*(a(ic+i)-a(id+i)))
d(je+j)=(b(ia+i)+cos72*(b(ib+i)+b(ie+i))-cos36*(b(ic+i)+b(id+i)))
* -(sin72*(a(ib+i)-a(ie+i))+sin36*(a(ic+i)-a(id+i)))
c(jc+j)=(a(ia+i)-cos36*(a(ib+i)+a(ie+i))+cos72*(a(ic+i)+a(id+i)))
* -(sin36*(b(ib+i)-b(ie+i))-sin72*(b(ic+i)-b(id+i)))
c(jd+j)=(a(ia+i)-cos36*(a(ib+i)+a(ie+i))+cos72*(a(ic+i)+a(id+i)))
* +(sin36*(b(ib+i)-b(ie+i))-sin72*(b(ic+i)-b(id+i)))
d(jc+j)=(b(ia+i)-cos36*(b(ib+i)+b(ie+i))+cos72*(b(ic+i)+b(id+i)))
* +(sin36*(a(ib+i)-a(ie+i))-sin72*(a(ic+i)-a(id+i)))
d(jd+j)=(b(ia+i)-cos36*(b(ib+i)+b(ie+i))+cos72*(b(ic+i)+b(id+i)))
* -(sin36*(a(ib+i)-a(ie+i))-sin72*(a(ic+i)-a(id+i)))
i=i+inc3
j=j+inc4
135 continue
ibase=ibase+inc1
jbase=jbase+inc2
140 continue
if (la.eq.m) return
la1=la+1
jbase=jbase+jump
do 160 k=la1,m,la
kb=k+k-2
kc=kb+kb
kd=kc+kb
ke=kd+kb
c1=trigs(kb+1)
s1=trigs(kb+2)
c2=trigs(kc+1)
s2=trigs(kc+2)
c3=trigs(kd+1)
s3=trigs(kd+2)
c4=trigs(ke+1)
s4=trigs(ke+2)
do 150 l=1,la
i=ibase
j=jbase
cvd$ nodepchk
do 145 ijk=1,lot
c(ja+j)=a(ia+i)+(a(ib+i)+a(ie+i))+(a(ic+i)+a(id+i))
d(ja+j)=b(ia+i)+(b(ib+i)+b(ie+i))+(b(ic+i)+b(id+i))
c(jb+j)=
* c1*((a(ia+i)+cos72*(a(ib+i)+a(ie+i))-cos36*(a(ic+i)+a(id+i)))
* -(sin72*(b(ib+i)-b(ie+i))+sin36*(b(ic+i)-b(id+i))))
* -s1*((b(ia+i)+cos72*(b(ib+i)+b(ie+i))-cos36*(b(ic+i)+b(id+i)))
* +(sin72*(a(ib+i)-a(ie+i))+sin36*(a(ic+i)-a(id+i))))
d(jb+j)=
* s1*((a(ia+i)+cos72*(a(ib+i)+a(ie+i))-cos36*(a(ic+i)+a(id+i)))
* -(sin72*(b(ib+i)-b(ie+i))+sin36*(b(ic+i)-b(id+i))))
* +c1*((b(ia+i)+cos72*(b(ib+i)+b(ie+i))-cos36*(b(ic+i)+b(id+i)))
* +(sin72*(a(ib+i)-a(ie+i))+sin36*(a(ic+i)-a(id+i))))
c(je+j)=
* c4*((a(ia+i)+cos72*(a(ib+i)+a(ie+i))-cos36*(a(ic+i)+a(id+i)))
* +(sin72*(b(ib+i)-b(ie+i))+sin36*(b(ic+i)-b(id+i))))
* -s4*((b(ia+i)+cos72*(b(ib+i)+b(ie+i))-cos36*(b(ic+i)+b(id+i)))
* -(sin72*(a(ib+i)-a(ie+i))+sin36*(a(ic+i)-a(id+i))))
d(je+j)=
* s4*((a(ia+i)+cos72*(a(ib+i)+a(ie+i))-cos36*(a(ic+i)+a(id+i)))
* +(sin72*(b(ib+i)-b(ie+i))+sin36*(b(ic+i)-b(id+i))))
* +c4*((b(ia+i)+cos72*(b(ib+i)+b(ie+i))-cos36*(b(ic+i)+b(id+i)))
* -(sin72*(a(ib+i)-a(ie+i))+sin36*(a(ic+i)-a(id+i))))
c(jc+j)=
* c2*((a(ia+i)-cos36*(a(ib+i)+a(ie+i))+cos72*(a(ic+i)+a(id+i)))
* -(sin36*(b(ib+i)-b(ie+i))-sin72*(b(ic+i)-b(id+i))))
* -s2*((b(ia+i)-cos36*(b(ib+i)+b(ie+i))+cos72*(b(ic+i)+b(id+i)))
* +(sin36*(a(ib+i)-a(ie+i))-sin72*(a(ic+i)-a(id+i))))
d(jc+j)=
* s2*((a(ia+i)-cos36*(a(ib+i)+a(ie+i))+cos72*(a(ic+i)+a(id+i)))
* -(sin36*(b(ib+i)-b(ie+i))-sin72*(b(ic+i)-b(id+i))))
* +c2*((b(ia+i)-cos36*(b(ib+i)+b(ie+i))+cos72*(b(ic+i)+b(id+i)))
* +(sin36*(a(ib+i)-a(ie+i))-sin72*(a(ic+i)-a(id+i))))
c(jd+j)=
* c3*((a(ia+i)-cos36*(a(ib+i)+a(ie+i))+cos72*(a(ic+i)+a(id+i)))
* +(sin36*(b(ib+i)-b(ie+i))-sin72*(b(ic+i)-b(id+i))))
* -s3*((b(ia+i)-cos36*(b(ib+i)+b(ie+i))+cos72*(b(ic+i)+b(id+i)))
* -(sin36*(a(ib+i)-a(ie+i))-sin72*(a(ic+i)-a(id+i))))
d(jd+j)=
* s3*((a(ia+i)-cos36*(a(ib+i)+a(ie+i))+cos72*(a(ic+i)+a(id+i)))
* +(sin36*(b(ib+i)-b(ie+i))-sin72*(b(ic+i)-b(id+i))))
* +c3*((b(ia+i)-cos36*(b(ib+i)+b(ie+i))+cos72*(b(ic+i)+b(id+i)))
* -(sin36*(a(ib+i)-a(ie+i))-sin72*(a(ic+i)-a(id+i))))
i=i+inc3
j=j+inc4
145 continue
ibase=ibase+inc1
jbase=jbase+inc2
150 continue
jbase=jbase+jump
160 continue
return
c ** error - factor less than 1 not allowed **
998 print *,' fft99: factors are incorrect '
c call abort
stop 'sur ABORT'
c ** error - factor higher than 5 not allowed **
999 print *,' fft99: factors higher than 5 are not supported '
c call abort
stop 'sur ABORT'
end
subroutine fax(ifax,n,mode) 1
c***********************************************************************
c *
c c06-summatiom
c c06-summation of series b6.1/3 *
c *
c fftrig *
c fax *
c *
c *
c sup
c subprogram subroutine fftrig *
c fax *
c *
c purpose setup routines for fft packages *
c *
c *
c version cyber cray-1 *
c *
c jan 1979 original jan 1979 original *
c *
c usage *
c call fftrig(trigs,n,3) *
c call fax (ifax ,n,3) *
c *
c arguments 1.dimension *
c trigs(dimension 3*n/2 - add 1 if n/2 is odd) *
c ifax(10) *
c *
c 2.input *
c n - the lenght of the transforms to be performed*
c n must be even. *
c the number of words of ifax used increases *
c logarithmically with n. *
c ifax(10) suffices for practical purposes. *
c (transforms of lenght at least 10000) *
c *
c 3.output *
c trigs - fftrig returns an array of trigonometric*
c function values subsequently used by *
c fft routines. *
c ifax - fax factorizes n/2 into a product of *
c 4"s and 2"s and higher prime numbers. *
c ifax(1) contains the number of factors. *
c and the factors themselves are stored *
c in ascending order in ifax(2),ifax(3).. *
c if fax is called with n odd ,ifax(1) *
c is set to -99(error condition) and no *
c factorization is done. *
c *
c write up none *
c *
c entry points fftrig, fax *
c *
c common blocks none *
c i/o none *
c precision single *
c other routines none *
c required *
c 7/80 fftrig-1 *
c *
c***********************************************************************
c *
c co6-summation of series b6.1/3 *
c *
c fftrig *
c fax *
c *
c acsses (object) cyber: *
c attach,eclib. *
c ldset(lib=eclib) *
c cray 1: *
c ldr(lib=eclib...) *
c *
c access (source) attach,oldpl,eclibpl *
c *
c cyber : %define cyber *
c cray: %define cray *
c %c fftrig, fax *
c *
c language fortran *
c *
c specialist clive temperton *
c *
c history written by c.temperton jan 1979 *
c *
c algorithm *
c references *
c *
c object size fftrig fax (octal words) *
c cyber: 145 127 *
c cray : 221 157 *
c *
c *
c accuracy *
c *
c timing *
c *
c portability standard fortran *
c *
c system routines none *
c required *
c
c 7/80 fftrig-2 *
c
c***********************************************************************
c end
dimension ifax(10)
nn=n
if (iabs(mode).eq.1) go to 10
if (iabs(mode).eq.8) go to 10
nn=n/2
if ((nn+nn).eq.n) go to 10
ifax(1)=-99
return
10 k=1
c test for factors of 4
20 if (mod(nn,4).ne.0) go to 30
k=k+1
ifax(k)=4
nn=nn/4
if (nn.eq.1) go to 80
go to 20
c test for extra factor of 2
30 if (mod(nn,2).ne.0) go to 40
k=k+1
ifax(k)=2
nn=nn/2
if (nn.eq.1) go to 80
c test for factors of 3
40 if (mod(nn,3).ne.0) go to 50
k=k+1
ifax(k)=3
nn=nn/3
if (nn.eq.1) go to 80
go to 40
c now find remaining factors
50 l=5
inc=2
c inc alternatively takes on values 2 and 4
60 if (mod(nn,l).ne.0) go to 70
k=k+1
ifax(k)=l
nn=nn/l
if (nn.eq.1) go to 80
go to 60
70 l=l+inc
inc=6-inc
go to 60
80 ifax(1)=k-1
c ifax(1) contains number of factors
nfax=ifax(1)
c sort factors into ascending order
if (nfax.eq.1) go to 110
do 100 ii=2,nfax
istop=nfax+2-ii
do 90 i=2,istop
if (ifax(i+1).ge.ifax(i)) go to 90
item=ifax(i)
ifax(i)=ifax(i+1)
ifax(i+1)=item
90 continue
100 continue
110 continue
return
end
subroutine fftrig (trigs,n,mode) 1
dimension trigs(n)
c fftrig returns an array of trigonometric function values
c subsequently used by f f t routines
c see comments in routine f a x
c end
pi=2.0*asin(1.0)
imode=iabs(mode)
nn=n
if (imode.gt.1.and.imode.lt.6) nn=n/2
del=(pi+pi)/float(nn)
l=nn+nn
do 10 i=1,l,2
angle=0.5*float(i-1)*del
trigs(i)=cos(angle)
trigs(i+1)=sin(angle)
10 continue
if (imode.eq.1) return
if (imode.eq.8) return
del=0.5*del
nh=(nn+1)/2
l=nh+nh
la=nn+nn
do 20 i=1,l,2
angle=0.5*float(i-1)*del
trigs(la+i)=cos(angle)
trigs(la+i+1)=sin(angle)
20 continue
if (imode.le.3) return
del=0.5*del
la=la+nn
if (mode.eq.5) go to 40
do 30 i=2,nn
angle=float(i-1)*del
trigs(la+i)=2.0*sin(angle)
30 continue
return
40 continue
del=0.5*del
do 50 i=2,n
angle=float(i-1)*del
trigs(la+i)=sin(angle)
50 continue
return
end
************************************************************************
subroutine fftcc(z,w,ex,ifax,inc,jump,n,nft,isign) 1,4
* fft complex-->complex using temperton's real-->complex fft
* same arguments as fft991
* n must be even
if(isign.eq.1)then
call trccc
(z,w,inc,jump,n,nft,isign)
call fft991
(z,w,ex,ifax,inc,jump,n,nft,isign)
return
else
call fft991
(z,w,ex,ifax,inc,jump,n,nft,isign)
call trccc
(z,w,inc,jump,n,nft,isign)
return
endif
end
************************************************************************
subroutine trccc(f,w,inc,jump,n,nft,isign) 2
*
* this routine is used to compute complex to complex transforms
* using temperton's routine fft991
*
*
*
* after ft along x1, one has
*
* sum f(x)cos(k1x1) sum f(x)sin(k1x1)
* x1 x1
*
*--------------------------------------------------------------
* after ft along x2 using fft991 one has
*
* sum f(x)cos(k1x1)sin(k2x2) sum f(x)sin(k1x1)sin(k2x2)
* x x
*
* sum f(x)cos(k1x1)cos(k2x2) sum f(x)sin(k1x1)cos(k2x2)
* x x
*
*---------------------------------------------------------------
* what we want is
*
* sum f(x)( cos(k1x1)cos(k2x2)-sin(k1x1)sin(k2x2) )
* x
*
* sum f(x)( cos(k1x1)sin(k2x2)+sin(k1x1)cos(k2x2) )
* x
*
*---------------------------------------------------------------
real f(1),w(2,0:1)
nh=n/2
nfth=nft/2
i1=1
incd=inc*2
if(isign.eq.-1)then
do 1 ift=1,nfth
i2=i1+1
i1p=i1+inc
i2p=i2+inc
cvd$ nodepchk
do 3 j=0,nh-1
w(1,j)=f(i1+incd*j)-f(i2p+incd*j)
w(2,j)=f(i2+incd*j)+f(i1p+incd*j)
w(1,n-j)=f(i1+incd*j)+f(i2p+incd*j)
w(2,n-j)=f(i2+incd*j)-f(i1p+incd*j)
3 continue
w(1,nh)=f(i1+incd*nh)+f(i2p+incd*nh)
w(2,nh)=f(i2+incd*nh)-f(i1p+incd*nh)
cvd$ nodepchk
do 5 j=0,n-1
f(i1+inc*j)=w(1,j)
f(i2+inc*j)=w(2,j)
5 continue
i1=i1+jump*2
1 continue
else
do 2 ift=1,nfth
i2=i1+1
i1n=i1+n*inc
i2n=i2+n*inc
cvd$ nodepchk
do 4 j=1,nh-1
w(1,2*j)=.5*(f(i1+inc*j)+f(i1n-inc*j))
w(2,2*j)=.5*(f(i2+inc*j)+f(i2n-inc*j))
w(1,2*j+1)=.5*(f(i2+inc*j)-f(i2n-inc*j))
w(2,2*j+1)=.5*(f(i1n-inc*j)-f(i1+inc*j))
4 continue
w(1,0)=f(i1)
w(2,0)=f(i2)
w(1,1)=0.
w(2,1)=0.
w(1,n)=f(i1n)
w(2,n)=f(i2n)
w(1,n+1)=0.
w(2,n+1)=0.
cvd$ nodepchk
do 6 j=0,n+1
f(i1+inc*j)=w(1,j)
f(i2+inc*j)=w(2,j)
6 continue
i1=i1+jump*2
2 continue
endif
return
end
************************************************************************
subroutine set99(ex,ifax,n) 3,2
call fftrig
(ex,n,3)
call fax
(ifax,n,3)
return
end
subroutine fftfax(n,ifax,ex),1
call set99
(ex,ifax,n)
return
end
subroutine cftfax(n,ifax,ex),1
call set99
(ex,ifax,n)
return
end
subroutine fft999(a,w,ex,ifax,inc,jump,n,nft,isign),1
call fft991
(a,w,ex,ifax,inc,jump,n,nft,isign)
return
end
subroutine cfft999(a,b,w,ex,ifax,inc,jump,n,nft,isign),1
call fftcc
(a,w,ex,ifax,inc,jump/2,n,nft*2,isign)
return
end