[Search for users] [Overall Top Noters] [List of all Conferences] [Download this site]

Conference nicctr::dxml

Title:	Digital Extended Math Library
Notice:	Kit locations: 9.last (UNIX), 10.last (VMS)
Moderator:	RTL::CHAOFGREN

Created:	Mon Apr 30 1990
Last Modified:	Tue Jun 03 1997
Last Successful Update:	Fri Jun 06 1997
Number of topics:	324
Total number of notes:	1402

318.0. "xposted from FORTRAn : on FFT's" by RTOMS::PARETIJ () Mon Mar 31 1997 11:58

      The following code does some sort of fft; there
      are 2 routines fft_ce and FFT_se calling cosft1 (sinft1) and
      realft colling four1. There are no comments, which strains
      my imagination. I am again looking for a way to use dxml.
      May be putting the code out to the forum will generate some
      questions I could refer to the customer.


	subroutine fft_ce(A,Re_,Im_,nx,ny,isign)
	implicit none
	integer nx,ny,isign
	real*8 A(0:nx,0:ny-1),Re_(0:nx,0:ny/2),Im_(0:nx,0:ny/2)
	integer i,j
	real*8 vec_x(0:nx), vec_y(0:ny-1)

	if(isign.eq.1)then

	do j=0,ny-1
	  do i=0,nx
	    vec_x(i)=A(i,j)
	  enddo
	  call cosft1(vec_x,nx)
	  A(0,j)=vec_x(0)/nx
	  do i=1,nx-1
	    A(i,j)=vec_x(i)*2/nx
	  enddo
	  A(nx,j)=vec_x(nx)/nx
	enddo
	do i=0,nx
	   do j=0,ny-1
	    vec_y(j)=A(i,j)
	   enddo
	   call realft(vec_y,ny,isign)
	   Re_(i,0)=vec_y(0)/ny
	   Im_(i,0)=0
	   do j=1,ny/2-1
	      Re_(i,j)=vec_y(2*j)/ny
	      Im_(i,j)=-vec_y(2*j+1)/ny
	   enddo
       Re_(i,ny/2)=0.5*vec_y(1)/ny
	   Im_(i,ny/2)=0
	enddo

	else

	do i=0,nx
		vec_y(0)=2*Re_(i,0)
		vec_y(1)=4*Re_(i,ny/2)
		do j=1,ny/2-1
			vec_y(2*j)=2*Re_(i,j)
			vec_y(2*j+1)=-2*Im_(i,j)
		enddo
		call realft(vec_y,ny,isign)
	    do j=0,ny-1
 			A(i,j)=vec_y(j)
	    enddo
	enddo
	do j=0,ny-1
	  vec_x(0)=2*A(0,j)
	  do i=1,nx-1
	    vec_x(i)=A(i,j)
	  enddo
	  vec_x(nx)=2*A(nx,j)
	  call cosft1(vec_x,nx)
	  do i=0,nx
	    A(i,j)=vec_x(i)
	  enddo
	enddo

	endif

	end
	
	
	
	subroutine fft_se(A,Re_,Im_,nx,ny,isign)
	implicit none
	integer nx,ny,isign
	real*8 A(0:nx,0:ny-1),Re_(0:nx,0:ny/2),Im_(0:nx,0:ny/2)
	integer i,j
	real*8 vec_x(0:nx-1), vec_y(0:ny-1)

	if(isign.eq.1)then

	do j=0,ny-1
	  vec_x(0)=0
	  do i=1,nx-1
	    vec_x(i)=A(i,j)
	  enddo
	  call sinft(vec_x,nx)
	  do i=0,nx-1
	    A(i,j)=vec_x(i)*2/nx
	  enddo
	enddo
	do i=1,nx-1
	   do j=0,ny-1
	    vec_y(j)=A(i,j)
	   enddo
	   call realft(vec_y,ny,isign)
	   Re_(i,0)=vec_y(0)/ny
	   Im_(i,0)=0
	   do j=1,ny/2-1
	      Re_(i,j)=vec_y(2*j)/ny
	      Im_(i,j)=-vec_y(2*j+1)/ny
	   enddo
       Re_(i,ny/2)=0.5*vec_y(1)/ny
	   Im_(i,ny/2)=0
	enddo
	do j=0,ny/2
		Re_(0,j)=0
		Im_(0,j)=0
		Re_(nx,j)=0
		Im_(nx,j)=0
	enddo

	else

	do i=1,nx-1
		vec_y(0)=2*Re_(i,0)
		vec_y(1)=4*Re_(i,ny/2)
		do j=1,ny/2-1
			vec_y(2*j)=2*Re_(i,j)
			vec_y(2*j+1)=-2*Im_(i,j)
		enddo
		call realft(vec_y,ny,isign)
	    do j=0,ny-1
 			A(i,j)=vec_y(j)
	    enddo
	enddo
	do j=0,ny-1
	  vec_x(0)=0
	  do i=1,nx-1
	    vec_x(i)=A(i,j)
	  enddo
	  call sinft(vec_x,nx)
	  do i=0,nx-1
	    A(i,j)=vec_x(i)
	  enddo
	enddo
	do j=0,ny-1
		A(nx,j)=0
	enddo

	endif

	end
	
	
      SUBROUTINE four1(data,nn,isign)
      INTEGER isign,nn
      real*8 data(2*nn)
      INTEGER i,istep,j,m,mmax,n
      real*8 tempi,tempr
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      n=2*nn
      j=1
      do 11 i=1,n,2
        if(j.gt.i)then
          tempr=data(j)
          tempi=data(j+1)
          data(j)=data(i)
          data(j+1)=data(i+1)
          data(i)=tempr
          data(i+1)=tempi
        endif
        m=n/2
1       if ((m.ge.2).and.(j.gt.m)) then
          j=j-m
          m=m/2
        goto 1
        endif
        j=j+m
11    continue
      mmax=2
2     if (n.gt.mmax) then
        istep=2*mmax
        theta=6.28318530717959d0/(isign*mmax)
        wpr=-2.d0*dsin(0.5d0*theta)**2
        wpi=dsin(theta)
        wr=1.d0
        wi=0.d0
        do 13 m=1,mmax,2
          do 12 i=m,n,istep
            j=i+mmax
            tempr=(wr)*data(j)-(wi)*data(j+1)
            tempi=(wr)*data(j+1)+(wi)*data(j)
            data(j)=data(i)-tempr
            data(j+1)=data(i+1)-tempi
            data(i)=data(i)+tempr
            data(i+1)=data(i+1)+tempi
12        continue
          wtemp=wr
          wr=wr*wpr-wi*wpi+wr
          wi=wi*wpr+wtemp*wpi+wi
13      continue
        mmax=istep
      goto 2
      endif
      return
      END
      
      
	SUBROUTINE realft(data,n,isign)
      INTEGER isign,n
      real*8 data(n)
CU    USES four1
      INTEGER i,i1,i2,i3,i4,n2p3
      real*8 c1,c2,h1i,h1r,h2i,h2r,wis,wrs
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/dble(n/2)
      c1=0.5
      if (isign.eq.1) then
        c2=-0.5
        call four1(data,n/2,+1)
      else
        c2=0.5
        theta=-theta
      endif
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      wr=1.0d0+wpr
      wi=wpi
      n2p3=n+3
      do 11 i=2,n/4
        i1=2*i-1
        i2=i1+1
        i3=n2p3-i2
        i4=i3+1
        wrs=(wr)
        wis=(wi)
        h1r=c1*(data(i1)+data(i3))
        h1i=c1*(data(i2)-data(i4))
        h2r=-c2*(data(i2)+data(i4))
        h2i=c2*(data(i1)-data(i3))
        data(i1)=h1r+wrs*h2r-wis*h2i
        data(i2)=h1i+wrs*h2i+wis*h2r
        data(i3)=h1r-wrs*h2r+wis*h2i
        data(i4)=-h1i+wrs*h2i+wis*h2r
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
11    continue
      if (isign.eq.1) then
        h1r=data(1)
        data(1)=h1r+data(2)
        data(2)=h1r-data(2)
      else
        h1r=data(1)
        data(1)=c1*(h1r+data(2))
        data(2)=c1*(h1r-data(2))
        call four1(data,n/2,-1)
      endif
      return
      END


	SUBROUTINE sinft(y,n)
      INTEGER n
      real*8 y(n)
CU    USES realft
      INTEGER j
      real*8 sum,y1,y2
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/dble(n)
      wr=1.0d0
      wi=0.0d0
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      y(1)=0.0
      do 11 j=1,n/2
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
        y1=wi*(y(j+1)+y(n-j+1))
        y2=0.5*(y(j+1)-y(n-j+1))
        y(j+1)=y1+y2
        y(n-j+1)=y1-y2
11    continue
      call realft(y,n,+1)
      sum=0.0
      y(1)=0.5*y(1)
      y(2)=0.0
      do 12 j=1,n-1,2
        sum=sum+y(j)
        y(j)=y(j+1)
        y(j+1)=sum
12    continue
      return
      END

     
	SUBROUTINE cosft1(y,n)
      INTEGER n
      real*8 y(n+1)
CU    USES realft
      INTEGER j
      real*8 sum,y1,y2
      DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp
      theta=3.141592653589793d0/n
      wr=1.0d0
      wi=0.0d0
      wpr=-2.0d0*dsin(0.5d0*theta)**2
      wpi=dsin(theta)
      sum=0.5*(y(1)-y(n+1))
      y(1)=0.5*(y(1)+y(n+1))
      do 11 j=1,n/2-1
        wtemp=wr
        wr=wr*wpr-wi*wpi+wr
        wi=wi*wpr+wtemp*wpi+wi
        y1=0.5*(y(j+1)+y(n-j+1))
        y2=(y(j+1)-y(n-j+1))
        y(j+1)=y1-wi*y2
        y(n-j+1)=y1+wi*y2
        sum=sum+wr*y2
11    continue
      call realft(y,n,+1)
      y(n+1)=y(2)
      y(2)=sum
      do 12 j=4,n,2
        sum=sum+y(j)
        y(j)=sum
12    continue
      return
      END

T.R	Title	User	Date	Lines
318.1		HPCGRP::MANLEY	`Mon Mar 31 1997 15:21`	18
	> The following code does some sort of fft; there > are 2 routines fft_ce and FFT_se calling cosft1 (sinft1) and > realft colling four1. There are no comments, which strains > my imagination. I am again looking for a way to use dxml. > May be putting the code out to the forum will generate some > questions I could refer to the customer. The routines cosft1, sinft, realft, and four1, have been copied exactly from Numerical Recipes (see pages 501-513 of the second edition for Fortran). DXML roiutines DFCT, DFST, and DFFT may be substituted for cosft1, sinft, and realft. Although fairly straightforward, the substitution is not plug and play. I'll make the changes and post them here. I assume you only need fft_ce and fft_se???
318.2		RTOMS::PARETIJ	`Tue Apr 01 1997 09:30`	52
	>I assume you only need fft_ce and fft_se??? Yes. But since I don't really know what they do I wrote a test program with reduced dimensions to compute only elements 0-1-2 of the arrays by feeding in the elements 0-1-2 of the input array with their values at some point in the customer's program and found DIFFERENT output: real 8 omega(0:2,0:2) real 8 R_omega(0:2,0:2) real 8 I_omega(0:2,0:2) c omega(1,0)=0.234867902391953 omega(1,1)=36.9781659782871 omega(1,2)=154.890240058231 omega(2,0)=0.385906851779590 omega(2,1)=68.8872388030323 omega(2,2)=287.391883911758 c nx=2 ny=2 c write(,)' joe - debug before call - omega ' do ijoe=0,2 write(,)(omega(ijoe,jjoe),jjoe=0,2) end do write(,)' joe - debug R_omega ' do ijoe=0,2 write(,)(R_omega(ijoe,jjoe),jjoe=0,2) end do write(,)' joe - debug I_omega ' do ijoe=0,2 write(,)(I_omega(ijoe,jjoe),jjoe=0,2) end do call fft_se(omega,R_omega,I_omega,ny,nx,1) write(,)' joe - debug after fft_se call - omega ' do ijoe=0,2 write(,)(omega(ijoe,jjoe),jjoe=0,2) end do write(,)' joe - debug R_omega ' do ijoe=0,2 write(,)(R_omega(ijoe,jjoe),jjoe=0,2) end do write(,)' joe - debug I_omega ' do ijoe=0,2 write(,*)(I_omega(ijoe,jjoe),jjoe=0,2) end do 7215 continue end
318.3		HPCGRP::MANLEY	`Wed Apr 02 1997 13:15`	12
	Joseph, For a "+" sign, the columns of "A" are overwritten by output from the cosine (fft_ce) or sine (fft_se) transforms, but not by the fft that's then done across the rows of A. Is it possible to perform both cosine (sine) transform and fft in-place, instead of copying data to Re_ and Im_? (Note: Re_ and Im_ may be defined to overlap A in an interleaved way, thus minimizing the impact on the rest of the code.) - Dwight -
318.4	Some code ...	HPCGRP::MANLEY	`Thu Apr 03 1997 15:55`	210
	Joseph, Please try the code below. However, before you do, re-size the A, Re_, and Im_ matrices to match what the new routines expect (a few extra columns and/or rows are required. I have boldly assumed the A matrix may be treated as both input and scratch for forward transforms, and scratch and output for inverse transforms. If that's not true, this code will not work. Also, if you can, define Re_ and Im_ to overlap A, and get rid of the copy operations altogether. The new code is not optimal. Three step FFTs should be used, but are not. You didn't share much of the application with us ;-), so I don't know how three step FFTs can best be integrated with rest of the application. Best Regards, - Dwight - ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ subroutine fft_ce(A,Re_,Im_,nx,ny,isign) implicit none integer nx,ny,isign real8 A(0:nx+1,0:ny+1),Re_(0:nx+1,0:ny/2),Im_(0:nx+1,0:ny/2) integer i,j if(isign.eq.1)then do j=0,ny-1 call dcosft( A(0,j),nx) A(0,j ) = A(0,j)0.5 A(nx,j) = A(nx,j)0.5 enddo call dfft_grp( 'r','c','f',A,A,ny,nx+1,nx+2,1,1 ) do i=0,nx Re_(i,0)=A(i,0)(2.0/(nxny)) Im_(i,0)=0.0 enddo do j=1,ny/2-1 do i=0,nx Re_(i,j)=A(i,2j+0)(2.0/(nxny)) Im_(i,j)=A(i,2j+1)(2.0/(nxny)) enddo enddo do i=0,nx Re_(i,ny/2)=A(i,ny)(1.0/(nxny)) Im_(i,ny/2)=0.0 enddo else do i=0,nx A(i,0)=nyRe_(i,0) A(i,1)=0.0 do j=1,ny/2-1 A(i,2j+0)=nyRe_(i,j) A(i,2j+1)=nyIm_(i,j) enddo A(i,ny+0)=(2.0ny)Re_(i,ny/2) A(i,ny+1)=0.0 enddo call dfft_grp( 'c','r','b',A,A,ny,nx+1,nx+2,1,1 ) do j=0,ny-1 A(0,j )=2.0A(0,j ) A(nx,j)=2.0A(nx,j) call dcosft( A(0,j),nx ) enddo endif end subroutine fft_se(A,Re_,Im_,nx,ny,isign) implicit none integer nx,ny,isign real8 A(0:nx+1,0:ny+1),Re_(0:nx+1,0:ny/2),Im_(0:nx+1,0:ny/2) integer i,j real8 vec_x(0:nx+1), vec_y(0:ny+1) if(isign.eq.1)then do j=0,ny-1 A(0,j) = 0.0 call dsinft( A(0,j),nx) enddo call dfft_grp( 'r','c','f',A(1,0),A(1,0),ny,nx-1,nx+2,1,1 ) Re_(0,0)=0.0 Im_(0,0)=0.0 do i=1,nx-1 Re_(i,0)=A(i,0)(2.0/(nxny)) Im_(i,0)=0.0 enddo Re_(nx,0)=0.0 Im_(nx,0)=0.0 do j=1,ny/2-1 Re_(0,j)=0.0 Im_(0,j)=0.0 do i=1,nx-1 Re_(i,j)=A(i,2j+0)(2.0/(nxny)) Im_(i,j)=A(i,2j+1)(2.0/(nxny)) enddo Re_(nx,j)=0.0 Im_(nx,j)=0.0 enddo Re_(0,ny/2)=0.0 Im_(0,ny/2)=0.0 do i=1,nx-1 Re_(i,ny/2)=A(i,ny)(1.0/(nxny)) Im_(i,ny/2)=0.0 enddo Re_(nx,ny/2)=0.0 Im_(nx,ny/2)=0.0 else do i=1,nx-1 A(i,0)=nyRe_(i,0) A(i,1)=0.0 enddo do j=1,ny/2-1 do i=1,nx-1 A(i,2j+0)=nyRe_(i,j) A(i,2j+1)=nyIm_(i,j) enddo enddo do i=1,nx-1 A(i,ny+0)=2.0nyRe_(i,ny/2) A(i,ny+1)=0.0 enddo call dfft_grp( 'c','r','b',A(1,0),A(1,0),ny,nx-1,nx+2,1,1 ) do j=0,ny-1 A(0,j )=0.0 call dsinft( A(0,j),nx ) A(nx,j)=0.0 enddo endif end SUBROUTINE dsinft(y,n) INTEGER n real8 y(n) INTEGER j real8 sum,y1,y2 DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp theta=3.141592653589793d0/dble(n) wr=1.0d0 wi=0.0d0 wpr=-2.0d0dsin(0.5d0theta)2 wpi=dsin(theta) y(1)=0.0 do j=1,n/2 wtemp=wr wr=wrwpr-wiwpi+wr wi=wiwpr+wtempwpi+wi y1=wi(y(j+1)+y(n-j+1)) y2=0.5(y(j+1)-y(n-j+1)) y(j+1)=y1+y2 y(n-j+1)=y1-y2 enddo call dfft( 'r','c','f',y,y,n,1 ) sum=0.0 y(1)=0.5y(1) do j=1,n-1,2 sum=sum+y(j) y(j)=-y(j+1) y(j+1)=sum enddo return END SUBROUTINE dcosft( y,n ) INTEGER n real8 y(n+1) INTEGER j real8 sum,y1,y2 DOUBLE PRECISION theta,wi,wpi,wpr,wr,wtemp theta=3.141592653589793d0/n wr=1.0d0 wi=0.0d0 wpr=-2.0d0dsin(0.5d0theta)*2 wpi=dsin(theta) sum=0.5(y(1)-y(n+1)) y(1)=0.5(y(1)+y(n+1)) do j=1,n/2-1 wtemp=wr wr=wrwpr-wiwpi+wr wi=wiwpr+wtempwpi+wi y1=0.5(y(j+1)+y(n-j+1)) y2=(y(j+1)-y(n-j+1)) y(j+1)=y1-wiy2 y(n-j+1)=y1+wiy2 sum=sum+wr*y2 enddo call dfft( 'r','c','f',y,y,n,1 ) y(2)=sum do j=4,n,2 sum = sum - y(j) y(j) = sum enddo return END
318.5	thanks Dwight !	RTOMS::PARETIJ	`Sun Apr 06 1997 06:11`	284
	Hi Dwight, thank you so much for your code using DXML. Here's the main, using the fourier routines, as well as the include file with modified dimensions to use your code. The results are correct. The performance of this new code is somehow worse than in the original one (like 120 vs. 150 "" ) However I 've not attempted yet the matrices overlap you suggested Also, could you please post the complete reference to "Numerical REcipes" Thanks again, -Joseph program NSE_2D implicit none include 'NSE_2D.inc' real8 FLD(1:DiffDim) real8 T0,T1,alx,tstep integer nstep integer error integer loop integer loop1 integer i, j real8 t_start, elapsed real8 auxiliary(0:nx,0:ny) external RHS_0 t_start = secnds(0.0) OPEN(Unit=7,FILE='NSE_2D.dat') read(7,)nstep read(7,)FR read(7,)alx read(7,)tstep close(unit=7) Lx = 2.pi Ly = 2.pi open(Unit=7,FILE='state.dat') do loop = 1, Diffdim read(7,) fld(loop) end do close(unit=7) call InitializeRungeKutta open(Unit=8,FILE='NSE_2D.sim',status='UNKNOWN') C------ main loop ----------------------------------------------- do loop=1,nstep T1=T0+tstep write(,) 'integration time = ',T1 CALL RungeKutta(T0,T1,DiffDim,FLD,RHS_0) write (8,100) (fld(i+63), i = 1, 10) T0=T1 enddo 100 format (10F12.6) close (Unit=8) C--------------------------------------------------------------- open(Unit = 7, FILE = 'state.dat') do loop = 1, Diffdim write(7,) fld(loop) end do close (Unit=7) do loop=0,ny do loop1=0,nx-1 auxiliary(loop1,loop)=psi(loop,loop1) enddo auxiliary(nx,loop)=psi(loop,0) enddo open(Unit=7,FILE='stream.dat') do i = 0, ny do j = 0, nx-1 write(7,) auxiliary(i,j) end do end do close(unit=7) elapsed = secnds(t_start) write(,) write(,) 'elapsed time(seconds): ', elapsed STOP end subroutine RHS_0(neq,t0,fld,f_fld) implicit none include 'NSE_2D.inc' integer neq real8 t0 real8 fld(1:neq),f_fld(1:neq) integer num,loop,loop1 do loop=0,ny do loop1=0,nx/2 R_omega(loop,loop1)=0.0 I_omega(loop,loop1)=0.0 enddo enddo num=0 loop1=0 do loop=1,ny-1 R_omega(loop,loop1)=fld(num+1) num=num+1 enddo do loop1=1,nx/2 do loop=1,ny-1 R_omega(loop,loop1)=fld(num+1) I_omega(loop,loop1)=fld(num+2) if(loop1.eq.0) I_omega(loop,loop1)=0.0 num=num+2 enddo enddo call rhs num=0 loop1=0 do loop=1,ny-1 f_fld(num+1)=F_R_omega(loop,loop1) num=num+1 enddo do loop1=1,nx/2 do loop=1,ny-1 f_fld(num+1)=F_R_omega(loop,loop1) f_fld(num+2)=F_I_omega(loop,loop1) num=num+2 enddo enddo return end subroutine rhs implicit none include 'NSE_2D.inc' integer loop,loop1 real8 tpi_Lx,pi_Ly real8 kx,ky,ksq real8 v_1_x_omega(0:ny+1,0:nx+1) real8 v_2_x_omega(0:ny+1,0:nx+1) real8 R_v_1_x_omega(0:ny+1,0:nx/2) real8 I_v_1_x_omega(0:ny+1,0:nx/2) real8 R_v_2_x_omega(0:ny+1,0:nx/2) real8 I_v_2_x_omega(0:ny+1,0:nx/2) tpi_Lx=2.0pi/Lx pi_Ly=pi/Ly do loop1=0,nx/2 do loop=0,ny ky=pi_Lyloop kx=tpi_Lxloop1 ksq=kxkx+kyky if(loop.eq.0.and.loop1.eq.0) then R_v_1(loop,loop1)=0.0 I_v_1(loop,loop1)=0.0 R_v_2(loop,loop1)=0.0 I_v_2(loop,loop1)=0.0 else R_v_1(loop,loop1)=kyR_omega(loop,loop1)/ksq I_v_1(loop,loop1)=kyI_omega(loop,loop1)/ksq R_v_2(loop,loop1)=kxI_omega(loop,loop1)/ksq I_v_2(loop,loop1)=-kxR_omega(loop,loop1)/ksq endif enddo enddo call fft_ce(v_1,R_v_1,I_v_1,ny,nx,-1) call fft_se(v_2,R_v_2,I_v_2,ny,nx,-1) call fft_se(omega,R_omega,I_omega,ny,nx,-1) do loop1=0,nx-1 do loop=0,ny v_1_x_omega(loop,loop1)=v_1(loop,loop1)omega(loop,loop1) v_2_x_omega(loop,loop1)=v_2(loop,loop1)omega(loop,loop1) enddo enddo call fft_se(v_1_x_omega,R_v_1_x_omega,I_v_1_x_omega,ny,nx,1) call fft_ce(v_2_x_omega,R_v_2_x_omega,I_v_2_x_omega,ny,nx,1) do loop1=0,nx/2 kx=tpi_Lxloop1 do loop=1,ny-1 ky=pi_Lyloop ksq=kxkx+kyky F_R_omega(loop,loop1)=-ksqR_omega(loop,loop1)+ & kxI_v_1_x_omega(loop,loop1)+ & kyR_v_2_x_omega(loop,loop1) F_I_omega(loop,loop1)=-ksqI_omega(loop,loop1)- & kxR_v_1_x_omega(loop,loop1)+ & kyI_v_2_x_omega(loop,loop1) enddo enddo F_I_omega(nf,mf)= F_I_omega(nf,mf)-Fr/2. return end c real8 pi parameter(pi=3.141592653589793) C------ nx and ny are the number of grid points integer nx, ny parameter ( nx = 128, ny = 128 ) C------ Lx and Ly are the length of the container, Fr is the forcing real8 Lx, Ly, Fr C------ nf number of forced vortices in the y-direction C------ mf number of forced pairs of vortices in the x-direction integer nf, mf parameter (nf=8,mf=4) C------ DiffDim is the dimension of the resulting system of OdE's integer DiffDim parameter (DiffDim=2(ny-1)nx/2+(ny-1)) C------ omega : vorticity C------ psi : streamfunction C------ v_1, v_2: components of velocity C------ R_omega, I_omega: real and imaginary part of omega C------ R_psi, I_psi: real and imaginary part of the streamfunction C------ R_v_1 and I_v_1: real and imaginary part of the fist C component of the velocity C------ R_v_2 and I_v_2: real and imaginary part of the second C component of the velocity real*8 omega(0:ny+1,0:nx+1),psi(0:ny,0:nx-1), 1 v_1(0:ny+1,0:nx+1),v_2(0:ny+1,0:nx+1), 1 R_omega(0:ny+1,0:nx/2),I_omega(0:ny+1,0:nx/2), 1 R_psi(0:ny,0:nx/2),I_psi(0:ny,0:nx/2), 1 R_v_1(0:ny+1,0:nx/2),I_v_1(0:ny+1,0:nx/2), 1 R_v_2(0:ny+1,0:nx/2),I_v_2(0:ny+1,0:nx/2), 1 F_R_omega(0:ny,0:nx/2),F_I_omega(0:ny,0:nx/2) common/fields/ omega,psi,v_1,v_2, 1 R_omega,I_omega, 1 R_psi,I_psi, 1 R_v_1,I_v_2, 1 F_R_omega,F_I_omega common/params/ Lx, Ly, Fr
318.5	thanks Dwight	RTOMS::PARETIJ	`Mon Apr 07 1997 08:07`	285
	Hi Dwight, thank you so much for your code using DXML. Here's the main, using the fourier routines, as well as the include file with modified dimensions to use your code. The performance of this new code is somehow worse than in the original one (like 120 vs. 150 "" ) However I 've not attempted yet the matrices overlap you suggested Also, could you please post the complete reference to "Numerical REcipes" Thanks again, -Joseph program NSE_2D implicit none include 'NSE_2D.inc' real8 FLD(1:DiffDim) real8 T0,T1,alx,tstep integer nstep integer error integer loop integer loop1 integer i, j real8 t_start, elapsed real8 auxiliary(0:nx,0:ny) external RHS_0 t_start = secnds(0.0) OPEN(Unit=7,FILE='NSE_2D.dat') read(7,)nstep read(7,)FR read(7,)alx read(7,)tstep close(unit=7) Lx = 2.pi Ly = 2.pi open(Unit=7,FILE='state.dat') do loop = 1, Diffdim read(7,) fld(loop) end do close(unit=7) call InitializeRungeKutta open(Unit=8,FILE='NSE_2D.sim',status='UNKNOWN') C------ main loop ----------------------------------------------- do loop=1,nstep T1=T0+tstep write(,) 'integration time = ',T1 CALL RungeKutta(T0,T1,DiffDim,FLD,RHS_0) write (8,100) (fld(i+63), i = 1, 10) T0=T1 enddo 100 format (10F12.6) close (Unit=8) C--------------------------------------------------------------- open(Unit = 7, FILE = 'state.dat') do loop = 1, Diffdim write(7,) fld(loop) end do close (Unit=7) do loop=0,ny do loop1=0,nx-1 auxiliary(loop1,loop)=psi(loop,loop1) enddo auxiliary(nx,loop)=psi(loop,0) enddo open(Unit=7,FILE='stream.dat') do i = 0, ny do j = 0, nx-1 write(7,) auxiliary(i,j) end do end do close(unit=7) elapsed = secnds(t_start) write(,) write(,) 'elapsed time(seconds): ', elapsed STOP end subroutine RHS_0(neq,t0,fld,f_fld) implicit none include 'NSE_2D.inc' integer neq real8 t0 real8 fld(1:neq),f_fld(1:neq) integer num,loop,loop1 do loop=0,ny do loop1=0,nx/2 R_omega(loop,loop1)=0.0 I_omega(loop,loop1)=0.0 enddo enddo num=0 loop1=0 do loop=1,ny-1 R_omega(loop,loop1)=fld(num+1) num=num+1 enddo do loop1=1,nx/2 do loop=1,ny-1 R_omega(loop,loop1)=fld(num+1) I_omega(loop,loop1)=fld(num+2) if(loop1.eq.0) I_omega(loop,loop1)=0.0 num=num+2 enddo enddo call rhs num=0 loop1=0 do loop=1,ny-1 f_fld(num+1)=F_R_omega(loop,loop1) num=num+1 enddo do loop1=1,nx/2 do loop=1,ny-1 f_fld(num+1)=F_R_omega(loop,loop1) f_fld(num+2)=F_I_omega(loop,loop1) num=num+2 enddo enddo return end subroutine rhs implicit none include 'NSE_2D.inc' integer loop,loop1 real8 tpi_Lx,pi_Ly real8 kx,ky,ksq real8 v_1_x_omega(0:ny+1,0:nx+1) real8 v_2_x_omega(0:ny+1,0:nx+1) real8 R_v_1_x_omega(0:ny+1,0:nx/2) real8 I_v_1_x_omega(0:ny+1,0:nx/2) real8 R_v_2_x_omega(0:ny+1,0:nx/2) real8 I_v_2_x_omega(0:ny+1,0:nx/2) tpi_Lx=2.0pi/Lx pi_Ly=pi/Ly do loop1=0,nx/2 do loop=0,ny ky=pi_Lyloop kx=tpi_Lxloop1 ksq=kxkx+kyky if(loop.eq.0.and.loop1.eq.0) then R_v_1(loop,loop1)=0.0 I_v_1(loop,loop1)=0.0 R_v_2(loop,loop1)=0.0 I_v_2(loop,loop1)=0.0 else R_v_1(loop,loop1)=kyR_omega(loop,loop1)/ksq I_v_1(loop,loop1)=kyI_omega(loop,loop1)/ksq R_v_2(loop,loop1)=kxI_omega(loop,loop1)/ksq I_v_2(loop,loop1)=-kxR_omega(loop,loop1)/ksq endif enddo enddo call fft_ce(v_1,R_v_1,I_v_1,ny,nx,-1) call fft_se(v_2,R_v_2,I_v_2,ny,nx,-1) call fft_se(omega,R_omega,I_omega,ny,nx,-1) do loop1=0,nx-1 do loop=0,ny v_1_x_omega(loop,loop1)=v_1(loop,loop1)omega(loop,loop1) v_2_x_omega(loop,loop1)=v_2(loop,loop1)omega(loop,loop1) enddo enddo call fft_se(v_1_x_omega,R_v_1_x_omega,I_v_1_x_omega,ny,nx,1) call fft_ce(v_2_x_omega,R_v_2_x_omega,I_v_2_x_omega,ny,nx,1) do loop1=0,nx/2 kx=tpi_Lxloop1 do loop=1,ny-1 ky=pi_Lyloop ksq=kxkx+kyky F_R_omega(loop,loop1)=-ksqR_omega(loop,loop1)+ & kxI_v_1_x_omega(loop,loop1)+ & kyR_v_2_x_omega(loop,loop1) F_I_omega(loop,loop1)=-ksqI_omega(loop,loop1)- & kxR_v_1_x_omega(loop,loop1)+ & kyI_v_2_x_omega(loop,loop1) enddo enddo F_I_omega(nf,mf)= F_I_omega(nf,mf)-Fr/2. return end c real8 pi parameter(pi=3.141592653589793) C------ nx and ny are the number of grid points integer nx, ny parameter ( nx = 128, ny = 128 ) C------ Lx and Ly are the length of the container, Fr is the forcing real8 Lx, Ly, Fr C------ nf number of forced vortices in the y-direction C------ mf number of forced pairs of vortices in the x-direction integer nf, mf parameter (nf=8,mf=4) C------ DiffDim is the dimension of the resulting system of OdE's integer DiffDim parameter (DiffDim=2(ny-1)nx/2+(ny-1)) C------ omega : vorticity C------ psi : streamfunction C------ v_1, v_2: components of velocity C------ R_omega, I_omega: real and imaginary part of omega C------ R_psi, I_psi: real and imaginary part of the streamfunction C------ R_v_1 and I_v_1: real and imaginary part of the fist C component of the velocity C------ R_v_2 and I_v_2: real and imaginary part of the second C component of the velocity real*8 omega(0:ny+1,0:nx+1),psi(0:ny,0:nx-1), 1 v_1(0:ny+1,0:nx+1),v_2(0:ny+1,0:nx+1), 1 R_omega(0:ny+1,0:nx/2),I_omega(0:ny+1,0:nx/2), 1 R_psi(0:ny,0:nx/2),I_psi(0:ny,0:nx/2), 1 R_v_1(0:ny+1,0:nx/2),I_v_1(0:ny+1,0:nx/2), 1 R_v_2(0:ny+1,0:nx/2),I_v_2(0:ny+1,0:nx/2), 1 F_R_omega(0:ny,0:nx/2),F_I_omega(0:ny,0:nx/2) common/fields/ omega,psi,v_1,v_2, 1 R_omega,I_omega, 1 R_psi,I_psi, 1 R_v_1,I_v_2, 1 F_R_omega,F_I_omega common/params/ Lx, Ly, Fr
318.6		HPCGRP::MANLEY	`Mon Apr 07 1997 10:00`	11
	> The performance of this new code > is somehow worse than in the original one (like 120 vs. 150 "" ) This comes as no surprise, one step ffts are not efficient. Thank you for the include file. I will fix the program to use the versions of fftce and fftse I sent you via e-mail last week. I will also change the program to use three step fast sine transforms, three step fast cosine transforms, and three step ffts.