#include "w3macros.h" !/ ------------------------------------------------------------------- / MODULE W3PROFSMD !/ !/ +-----------------------------------+ !/ | WAVEWATCH III NOAA/NCEP | !/ | Aron Roland | !/ | Fabrice Ardhuin | !/ | FORTRAN 90 | !/ | Last update : 18-Aug-2016 | !/ +-----------------------------------+ !/ !/ XX-Nov-2007 : Origination. ( version 3.10 ) !/ 03-Nov-2011 : Adding shoreline reflection ( version 4.04 ) !/ 03-Jun-2013 : Removed assign statements ( version 4.10 ) !/ 20-Jun-2013 : Update test output for time steps ( version 4.10 ) !/ 17-Oct-2013 : Removes boundary nodes from CFL ( version 4.12 ) !/ 15-Dec-2013 : Bug fix for implicit scheme ( version 4.16 ) !/ 18-Aug-2016 : Corrected boundary treatment ( version 4.16 ) ! ! 1. Purpose : ! ! Propagation schemes for unstructured grids using fluctuation splitting ! ! 2. Variables and types : ! ! Name Type Scope Description ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 3. Subroutines and functions : ! ! Name Type Scope Description ! ---------------------------------------------------------------- ! W3XYPUG Subr. Public Generic fluctuation splitting operations ! W3XYPFSN2 Subr. Public advection with N scheme (Csik et al. 2002) ! W3XYPFSPSI Subr. Public advection with FCT scheme ! W3XYPFSFCT2 Subr. Public advection with FCT scheme ! ---------------------------------------------------------------- ! ! 4. Subroutines and functions used : ! ! Name Type Module Description ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 5. Remarks : ! For a detailed description of the schemes and their properties, see ! Roland (2008), Ph.D. Thesis, T. U. Darmstadt. ! ! 6. Switches : ! ! 7. Source code : !/ !/ ------------------------------------------------------------------- / !/ PUBLIC !/ CONTAINS !/ ------------------------------------------------------------------- / SUBROUTINE W3XYPUG ( ISP, FACX, FACY, DTG, VQ, VGX, VGY, LCALC ) !/ !/ +-----------------------------------+ !/ | WAVEWATCH III NOAA/NCEP | !/ | Aron Roland | !/ | FORTRAN 90 | !/ | Last update : 10-Jan-2011 | !/ +-----------------------------------+ !/ !/ 10-Jan-2008 : Origination. ( version 3.13 ) !/ 10-Jan-2011 : Addition of implicit scheme ( version 3.14.4 ) !/ ! 1. Purpose : ! ! Propagation in physical space for a given spectral component. ! Gives the choice of scheme on unstructured grid ! ! 2. Method : ! ! 3. Parameters : ! ! Parameter list ! ---------------------------------------------------------------- ! ISP Int. I Number of spectral bin (IK-1)*NTH+ITH ! FACX/Y Real I Factor in propagation velocity. ! ( 1 or 0 * DT / DX ) ! DTG Real I Total time step. ! VQ R.A. I/O Field to propagate. ! VGX/Y Real I Speed of grid. ! ---------------------------------------------------------------- ! ! Local variables. ! ---------------------------------------------------------------- ! VCFL0X R.A. Local courant numbers for absolute group vel. ! using local X-grid step. ! VCFL0Y R.A. Id. in Y. ! ---------------------------------------------------------------- ! ! 4. Subroutines used : ! ! 5. Called by : ! ! W3WAVE Wave model routine. ! ! 6. Error messages : ! ! None. ! ! 7. Remarks : ! make the interface between the WAVEWATCH and the WWM code. ! ! 8. Structure : ! ! 9. Switches : ! ! !/S Enable subroutine tracing. ! ! 10. Source code : !/ ------------------------------------------------------------------- / !/ ! USE CONSTANTS ! USE W3TIMEMD, ONLY: DSEC21 ! USE W3GDATMD, ONLY: NX, NY, NSEA, MAPSF, MAPFS, DTCFL, CLATS, & FLCX, FLCY, NK, NTH, DTH, XFR, & ECOS, ESIN, SIG, PFMOVE,IEN, & NTRI, TRIGP, CCON , & IE_CELL, POS_CELL, IOBP, IOBPD, & FSN, FSPSI, FSFCT, FSNIMP, GTYPE, UNGTYPE USE W3WDATMD, ONLY: TIME USE W3ODATMD, ONLY: TBPI0, TBPIN, FLBPI USE W3ADATMD, ONLY: CG, CX, CY, ATRNX, ATRNY, ITIME, CFLXYMAX USE W3IDATMD, ONLY: FLCUR ! USE W3ODATMD, ONLY: NDSE, NDST, FLBPI, NBI, TBPI0, TBPIN, & ! ISBPI, BBPI0, BBPIN IMPLICIT NONE !/ ------------------------------------------------------------------- / !/ Parameter list !/ INTEGER, INTENT(IN) :: ISP REAL, INTENT(IN) :: FACX, FACY, DTG, VGX, VGY REAL, INTENT(INOUT) :: VQ(1-NY:NY*(NX+2)) LOGICAL, INTENT(IN) :: LCALC !/ !/ ------------------------------------------------------------------- / !/ Local parameters !/ INTEGER :: ITH, IK, ISEA, IXY INTEGER :: IX REAL :: CCOS, CSIN, CCURX, CCURY REAL :: C(NX,2) REAL :: RD1, RD2 !/ !/ Automatic work arrays !/ REAL :: VLCFLX((NX+1)*NY), VLCFLY(NX*NY) DOUBLE PRECISION :: AC(NX) !/ ------------------------------------------------------------------- / ! ! 1. Preparations --------------------------------------------------- * ! 1.a Set constants ! ITH = 1 + MOD(ISP-1,NTH) IK = 1 + (ISP-1)/NTH CCOS = FACX * ECOS(ITH) CSIN = FACY * ESIN(ITH) CCURX = FACX CCURY = FACY ! ! 1.b Initialize arrays ! VLCFLX = 0. VLCFLY = 0. ! ! 1.c Set depth ! CALL SETDEPTH ! ! 2. Calculate velocities ---------------- * ! DO ISEA=1, NSEA IXY = MAPSF(ISEA,3) VQ(IXY) = VQ(IXY) / CG(IK,ISEA) * CLATS(ISEA) VLCFLX(IXY) = CCOS * CG(IK,ISEA) / CLATS(ISEA) VLCFLY(IXY) = CSIN * CG(IK,ISEA) END DO IF ( FLCUR ) THEN DO ISEA=1, NSEA IXY = MAPSF(ISEA,3) ! ! Currents are not included on coastal boundaries (IOBP(IXY).EQ.0) ! IF (IOBP(IXY) .EQ. 1) THEN VLCFLX(IXY) = VLCFLX(IXY) + CCURX*CX(ISEA)/CLATS(ISEA) VLCFLY(IXY) = VLCFLY(IXY) + CCURY*CY(ISEA) END IF END DO END IF ! ! 3. initialize fluctuation splitting arrays ( to fit with WWM notations) ! AC(1:NX) = DBLE(VQ(1:NX)) C(1:NX,1) = VLCFLX(1:NX) C(1:NX,2) = VLCFLY(1:NX) ! ! 4. Prepares boundary update ! IF ( FLBPI ) THEN RD1 = DSEC21 ( TBPI0, TIME ) RD2 = DSEC21 ( TBPI0, TBPIN ) ELSE RD1=1. RD2=0. END IF ! ! 4. propagate using the selected scheme ! IF (FSN) THEN CALL W3XYPFSN2 (ISP, C, LCALC, RD1, RD2, DTG, AC) ELSE IF (FSPSI) THEN CALL W3XYPFSPSI2 (ISP, C, LCALC, RD1, RD2, DTG, AC) ELSE IF (FSFCT) THEN CALL W3XYPFSFCT2 (ISP, C, LCALC, RD1, RD2, DTG, AC) ELSE IF (FSNIMP) THEN CALL W3XYPFSNIMP(ISP, C, LCALC, RD1, RD2, DTG, AC) ENDIF ! DO IX=1,NX ISEA=MAPFS(1,IX) VQ(IX)=REAL(AC(IX)) ENDDO ! 6. Store results in VQ in proper format --------------------------- * ! DO ISEA=1, NSEA IXY = MAPSF(ISEA,3) VQ(IXY) = MAX ( 0. , CG(IK,ISEA)/CLATS(ISEA)*VQ(IXY) ) END DO END SUBROUTINE W3XYPUG !/ ------------------------------------------------------------------- / SUBROUTINE W3CFLUG ( ISEA, NKCFL, FACX, FACY, DT, MAPFS, CFLXYMAX, & VGX, VGY ) !/ !/ +-----------------------------------+ !/ | WAVEWATCH III NOAA/NCEP | !/ | Fabrice Ardhuin | !/ | FORTRAN 90 | !/ | Last update : 20-Jun-2013 | !/ +-----------------------------------+ !/ !/ 01-Mar-2011 : Origination. ( version 3.14 ) !/ 20-Jun-2013 : Computes only up to NKCFL for tests ( version 4.10 ) !/ 1-Jun-2017 : Rewrite routine for performance ( version 5.xx ) !/ ! 1. Purpose : ! ! Computes the max CFL number for output purposes ! ! 2. Method : ! ! 3. Parameters : ! ! Parameter list ! ---------------------------------------------------------------- ! ISEA Int. I Index of sea point ! NKCFL Int. I Maximum frequency index ! FACX/Y Real I Factor in propagation velocity. ! ( 1 or 0 * DT / DX ) ! DT Real I Time step. ! MAPFS I.A. I Storage map. ! CFLXYMAX Real Maxmimum CFL for spatial advection ! VGX/Y Real I Speed of grid. ! ---------------------------------------------------------------- ! ! Local variables. ! ---------------------------------------------------------------- ! VCFL0X R.A. Local courant numbers for absolute group vel. ! using local X-grid step. ! VCFL0Y R.A. Id. in Y. ! ---------------------------------------------------------------- ! ! 4. Subroutines used : ! ! 5. Called by : ! ! W3WAVE Wave model routine. ! ! 6. Error messages : ! ! None. ! ! 7. Remarks : ! make the interface between the WAVEWATCH and the WWM code. ! ! 8. Structure : ! ! 9. Switches : ! ! !/S Enable subroutine tracing. ! ! 10. Source code : !/ ------------------------------------------------------------------- / !/ ! USE CONSTANTS ! USE W3TIMEMD, ONLY: DSEC21 ! USE W3GDATMD, ONLY: NX, NY, NSEA, MAPSF, DTCFL, CLATS, & FLCX, FLCY, NK, NTH, DTH, XFR, & ECOS, ESIN, SIG, PFMOVE,IEN, INDEX_CELL, & NTRI, TRIGP, CCON , & IE_CELL, POS_CELL, COUNTRI, SI, IOBP, XYB USE W3ADATMD, ONLY: CG, CX, CY, ATRNX, ATRNY, ITIME, DW USE W3IDATMD, ONLY: FLCUR IMPLICIT NONE !/ ------------------------------------------------------------------- / !/ Parameter list !/ INTEGER, INTENT(IN) :: ISEA, NKCFL, MAPFS(NY*NX) REAL, INTENT(IN) :: FACX, FACY, DT, VGX, VGY REAL, INTENT(INOUT) :: CFLXYMAX !/ !/ ------------------------------------------------------------------- / !/ Local parameters !/ INTEGER :: ITH, IK INTEGER :: IP, IP2, ISEA2, I, J, IE, IV, I1, I2, I3 REAL :: CCOS, CSIN, CCURX, CCURY REAL :: C(NX,2) REAL*8 :: KELEM(3), KTMP(3), LAMBDA(2) REAL*8 :: KKSUM, DTMAXEXP !/ !/ Velocities !/ REAL*8, PARAMETER :: ONESIXTH = 1.0d0/6.0d0 REAL*8, PARAMETER :: THR8 = TINY(1.0d0) REAL, PARAMETER :: THR = TINY(1.0) !/ ------------------------------------------------------------------- / ! ! 1. Preparations --------------------------------------------------- * ! 1.a Set constants ! CFLXYMAX=1E-10 IP = MAPSF(ISEA,3) ! ! CFL no important on boundary ! IF (IOBP(IP).EQ.1) THEN CCURX = FACX CCURY = FACY ! ! Loop over spectral components ! DO IK=1,NKCFL DO ITH=1,NTH CCOS = FACX * ECOS(ITH) CSIN = FACY * ESIN(ITH) C(:,:)=0. ! ! 2. Calculate advection velocities: group speed ---------------- * ! !AR: needs to be rewritten for speed ... single node computation is costly ... !MA: you are right but now it is only called if CFX and UNST for CFL profiling DO I = INDEX_CELL(IP), INDEX_CELL(IP+1)-1 IE=IE_CELL(I) ! TRIGP(IE,IV)=IP with IV=POS_CELL(I) DO J=1,3 IP2=TRIGP(IE,J) ISEA2=MAPFS(IP2) C(IP2,1) = CCOS * CG(IK,ISEA2) / CLATS(ISEA2) C(IP2,2) = CSIN * CG(IK,ISEA2) IF ( FLCUR ) THEN IF (IOBP(IP2) .EQ. 1) THEN C(IP2,1) = C(IP2,1) + CCURX*CX(ISEA2)/CLATS(ISEA2) C(IP2,2) = C(IP2,2) + CCURY*CY(ISEA2) END IF END IF ! end of ( FLCUR ) END DO END DO ! !3. Calculate K-Values and contour based quantities ... ! KKSUM = 0.d0 DO I = INDEX_CELL(IP), INDEX_CELL(IP+1)-1 IE=IE_CELL(I) ! TRIGP(IE,IV)=IP IV=POS_CELL(I) I1 = TRIGP(IE,1) I2 = TRIGP(IE,2) I3 = TRIGP(IE,3) LAMBDA(1) = ONESIXTH *(C(I1,1)+C(I2,1)+C(I3,1)) ! Advection speed in X direction LAMBDA(2) = ONESIXTH *(C(I1,2)+C(I2,2)+C(I3,2)) ! Advection speed in Y direction KELEM(1) = LAMBDA(1) * IEN(IE,1) + LAMBDA(2) * IEN(IE,2) ! K-Values - so called Flux Jacobians KELEM(2) = LAMBDA(1) * IEN(IE,3) + LAMBDA(2) * IEN(IE,4) ! K-Values - so called Flux Jacobians KELEM(3) = LAMBDA(1) * IEN(IE,5) + LAMBDA(2) * IEN(IE,6) ! K-Values - so called Flux Jacobians KTMP = KELEM ! Copy KELEM = MAX(0.d0,KTMP) KKSUM = KKSUM + KELEM(IV) END DO ! COUNTRI ! DTMAXEXP = SI(IP)/MAX(DBLE(10.E-10),KKSUM) CFLXYMAX = MAX(DBLE(DT)/DTMAXEXP,DBLE(CFLXYMAX)) END DO END DO END IF ! RETURN END SUBROUTINE W3CFLUG !/ ------------------------------------------------------------------- / SUBROUTINE W3XYPFSN2 ( ISP, C, LCALC, RD10, RD20, DT, AC) !/ !/ !/ +-----------------------------------+ !/ | WWIII Version of the WWM FS Code | !/ | by Aron Roland | !/ | and Fabrice Ardhuin | !/ | for use in WWIII | !/ | GPL License | !/ | Last update : 03-Nov-2011 | !/ +-----------------------------------+ !/ !/ 19-Dec-2007 : Origination. ( version 3.13 ) !/ 25-Aug-2011 : Change of method for IOBPD ( version 4.04 ) !/ 03-Nov-2011 : Addition of shoreline reflection ( version 4.04 ) !/ !/ ! 1. Purpose : ! Advection of a scalar in a arbitary velocity field on unstructured meshes ! for the conservative hyperbolic equation N,t + (c*N),xy = 0 in spatial space ! This is the standard explicit N-Scheme from Roe as formulated in Abgrall ! ! 2. Method : ! ! 3. Parameters : ! ! Parameter list ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 4. Subroutines used : ! ! STRACE Subroutine tracing (!/S switch) ! ! 5. Called by : ! ! W3XYPUG Routine for advection on unstructured grid ! ! 6. Error messages : ! ! None. ! ! 7. Remarks : ! ! 8. Structure : ! ! See source code. ! ! 9. Switches : ! ! !/S Enable subroutine tracing. ! ! 10. Source code : ! !/ ------------------------------------------------------------------- / !/ USE W3GDATMD, ONLY : NK, NTH, NTRI, NX, CCON, IE_CELL,POS_CELL, SI, & IEN, TRIGP, CLATS, MAPSF, IOBPD, IOBP, IOBDP, IOBPA, XYB USE W3GDATMD, ONLY : REFPARS USE W3WDATMD, ONLY: TIME USE W3ADATMD, ONLY: CG, ITER USE W3ODATMD, ONLY: NDSE, NDST, FLBPI, NBI, TBPI0, TBPIN, ISBPI, BBPI0, BBPIN USE W3TIMEMD, ONLY: DSEC21 IMPLICIT NONE INTEGER, INTENT(IN) :: ISP ! Actual Frequency/Wavenumber, actual Wave Direction REAL, INTENT(IN) :: DT ! Time intervall for which the advection should be computed for the given vel REAL, INTENT(IN) :: C(:,:) ! Velocity field in it's X- and Y- Components, DOUBLE PRECISION, INTENT(INOUT):: AC(:) ! Wave Action before and after advection REAL, INTENT(IN) :: RD10, RD20 ! Time interpolation coefficients for boundary conditions LOGICAL, INTENT(IN) :: LCALC ! Switch for the calculation of the max. Global Time step !/ !/ ------------------------------------------------------------------- / !/ Parameter list !/ !/ !/ ------------------------------------------------------------------- / !/ Local parameters !/ REAL*8, PARAMETER :: ONESIXTH = 1.0d0/6.0d0 REAL*8, PARAMETER :: THR8 = TINY(1.0d0) REAL, PARAMETER :: THR = TINY(1.0) !/ !/ ------------------------------------------------------------------- / !/ ! ! local integer ! INTEGER :: IP, IE, IT, I1, I2, I3, ITH, IK INTEGER :: IBI, NI(3) ! ! local real ! REAL :: RD1, RD2 !: ! local double ! REAL*8 :: UTILDE, BOUNDARY_FORCING REAL*8 :: CFLXY REAL*8 :: FL11, FL12, FL21, FL22, FL31, FL32 REAL*8 :: FL111, FL112, FL211, FL212, FL311, FL312 REAL*8 :: DTSI(NX), U(NX) REAL*8 :: DTMAXGL, DTMAXEXP, REST REAL*8 :: LAMBDA(2), KTMP(3) REAL*8 :: KELEM(3,NTRI), FLALL(3,NTRI) REAL*8 :: KKSUM(NX), ST(NX) REAL*8 :: NM(NTRI) ! 1. initialisation ITH = 1 + MOD(ISP-1,NTH) IK = 1 + (ISP-1)/NTH DTMAXGL = DBLE(10.E10) ! !2 Propagation !2.a Calculate K-Values and contour based quantities ... ! DO IE = 1, NTRI ! I precacalculate this arrays below as I assume that current velocity changes continusly ... I1 = TRIGP(IE,1) ! Index of the Element Nodes (TRIGP) I2 = TRIGP(IE,2) I3 = TRIGP(IE,3) LAMBDA(1) = ONESIXTH *(C(I1,1)+C(I2,1)+C(I3,1)) ! Linearized advection speed in X and Y direction LAMBDA(2) = ONESIXTH *(C(I1,2)+C(I2,2)+C(I3,2)) KELEM(1,IE) = LAMBDA(1) * IEN(IE,1) + LAMBDA(2) * IEN(IE,2) ! K-Values - so called Flux Jacobians KELEM(2,IE) = LAMBDA(1) * IEN(IE,3) + LAMBDA(2) * IEN(IE,4) KELEM(3,IE) = LAMBDA(1) * IEN(IE,5) + LAMBDA(2) * IEN(IE,6) ! KTMP = KELEM(:,IE) ! Copy NM(IE) = - 1.D0/MIN(-THR8,SUM(MIN(0.d0,KTMP))) ! N-Values KELEM(:,IE) = MAX(0.d0,KTMP) FL11 = C(I2,1) * IEN(IE,1) + C(I2,2) * IEN(IE,2) ! Weights for Simpson Integration FL12 = C(I3,1) * IEN(IE,1) + C(I3,2) * IEN(IE,2) FL21 = C(I3,1) * IEN(IE,3) + C(I3,2) * IEN(IE,4) FL22 = C(I1,1) * IEN(IE,3) + C(I1,2) * IEN(IE,4) FL31 = C(I1,1) * IEN(IE,5) + C(I1,2) * IEN(IE,6) FL32 = C(I2,1) * IEN(IE,5) + C(I2,2) * IEN(IE,6) FL111 = 2.d0*FL11+FL12 FL112 = 2.d0*FL12+FL11 FL211 = 2.d0*FL21+FL22 FL212 = 2.d0*FL22+FL21 FL311 = 2.d0*FL31+FL32 FL312 = 2.d0*FL32+FL31 FLALL(1,IE) = (FL311 + FL212) * ONESIXTH + KELEM(1,IE) FLALL(2,IE) = (FL111 + FL312) * ONESIXTH + KELEM(2,IE) FLALL(3,IE) = (FL211 + FL112) * ONESIXTH + KELEM(3,IE) ! IF (I1.EQ.1.OR.I2.EQ.1.OR.I3.EQ.1) WRITE(6,*) 'TEST N1 :',IK,ITH,IP,IE,KELEM(:,IE),'##',LAMBDA END DO ! NTRI IF (LCALC) THEN ! If the current field or water level changes estimate the iteration number based on the new flow field a KKSUM = 0.d0 DO IE = 1, NTRI NI = TRIGP(IE,:) KKSUM(NI) = KKSUM(NI) + KELEM(:,IE) END DO ! IE DTMAXEXP = 1E10 ! initialize to large number DO IP = 1, NX DTMAXEXP = SI(IP)/MAX(DBLE(10.E-10),KKSUM(IP)*IOBDP(IP)) DTMAXGL = MIN( DTMAXGL, DTMAXEXP) END DO ! IP CFLXY = DBLE(DT)/DTMAXGL REST = ABS(MOD(CFLXY,1.0d0)) IF (REST .LT. THR8) THEN ITER(IK,ITH) = ABS(NINT(CFLXY)) ELSE IF (REST .GT. THR8 .AND. REST .LT. 0.5d0) THEN ITER(IK,ITH) = ABS(NINT(CFLXY)) + 1 ELSE ITER(IK,ITH) = ABS(NINT(CFLXY)) END IF END IF ! LCALC DO IP = 1, NX DTSI(IP) = DBLE(DT)/DBLE(ITER(IK,ITH))/SI(IP) ! Some precalculations for the time integration. END DO DO IT = 1, ITER(IK,ITH) U = AC ST = 0.d0 DO IE = 1, NTRI NI = TRIGP(IE,:) UTILDE = NM(IE) * (DOT_PRODUCT(FLALL(:,IE),U(NI))) ST(NI) = ST(NI) + KELEM(:,IE) * (U(NI) - UTILDE) ! the 2nd term are the theta values of each node ... END DO ! IE DO IP = 1, NX ! ! IOBPD=0 : waves coming from land (or outside grid) ! Possibly set flux to zero by multiplying ST by IOBPD but also in UTILDE multiply U(NI) by IOBPD ... ! U(IP) = MAX(0.d0,U(IP)-DTSI(IP)*ST(IP)*(1-IOBPA(IP)))*DBLE(IOBPD(ITH,IP)) IF (REFPARS(3).LT.0.5.AND.IOBPD(ITH,IP).EQ.0.AND.IOBPA(IP).EQ.0) U(IP)= AC(IP) ! restores reflected boundary values END DO ! update spectrum AC = U ! ! 4 Update boundaries: performs interpolation in time as done in rect grids (e.g. w3pro1md.ftn) ! IF ( FLBPI ) THEN ! ! 4.1 In this case the boundary is read from the nest.ww3 file ! RD1=RD10 - DT * REAL(ITER(IK,ITH)-IT)/REAL(ITER(IK,ITH)) RD2=RD20 IF ( RD2 .GT. 0.001 ) THEN RD2 = MIN(1.,MAX(0.,RD1/RD2)) RD1 = 1. - RD2 ELSE RD1 = 0. RD2 = 1. END IF ! ! Overwrites only the incoming energy ( IOBPD(ITH,IP) = 0) ! DO IBI=1, NBI IP = MAPSF(ISBPI(IBI),1) AC(IP) = ( RD1*BBPI0(ISP,IBI) + RD2*BBPIN(ISP,IBI) ) & / CG(IK,ISBPI(IBI)) * CLATS(ISBPI(IBI)) END DO ENDIF ! END DO ! End of loop on time steps ! CALL EXTCDE ( 99 ) !/ !/ End of W3XYPFSN ----------------------------------------------------- / !/ END SUBROUTINE W3XYPFSN2 !/ ------------------------------------------------------------------- / SUBROUTINE W3XYPFSPSI2 ( ISP, C, LCALC, RD10, RD20, DT, AC) !/ !/ !/ +-----------------------------------+ !/ | WWIII Version of the WWM FS Code | !/ | by Aron Roland | !/ | for use in WWIII | !/ | GPL License | !/ | Last update : 19-Dec-2007 | !/ +-----------------------------------+ !/ ! 1. Purpose : ! Advection of a scalar in a arbitary velocity field on unstructured meshes ! for the conservative hyperbolic equation N,t + (c*N),xy = 0 in spatial space ! This is the standard explicit N-Scheme from Roe as formulated in Abgrall ! ! 2. Method : ! ! 3. Parameters : ! ! Parameter list ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 4. Subroutines used : ! ! STRACE Subroutine tracing (!/S switch) ! ! 5. Called by : ! ! W3XYPUG Routine for advection on unstructured grid ! ! 6. Error messages : ! ! None. ! ! 7. Remarks : ! ! 8. Structure : ! ! See source code. ! ! 9. Switches : ! ! !/S Enable subroutine tracing. ! ! 10. Source code : ! !/ ------------------------------------------------------------------- / !/ USE W3GDATMD, ONLY : NK, NTH, NTRI, NX, CCON, IE_CELL,POS_CELL, SI, & IEN, TRIGP, CLATS, MAPSF, IOBPA, IOBPD, IOBP, NNZ, IOBDP USE W3GDATMD, ONLY : REFPARS USE W3WDATMD, ONLY: TIME USE W3ADATMD, ONLY: CG, ITER USE W3ODATMD, ONLY: NDSE, NDST, FLBPI, NBI, TBPI0, TBPIN, ISBPI, BBPI0, BBPIN USE W3TIMEMD, ONLY: DSEC21 IMPLICIT NONE INTEGER, INTENT(IN) :: ISP ! Actual Frequency/Wavenumber, actual Wave Direction REAL, INTENT(IN) :: DT ! Time intervall for which the advection should be computed for the given vel REAL, INTENT(IN) :: C(:,:) ! Velocity field in it's X- and Y- Components, DOUBLE PRECISION,INTENT(INOUT) :: AC(:) ! Wave Action before and after advection REAL, INTENT(IN) :: RD10, RD20 ! Time interpolation coefficients for boundary conditions LOGICAL, INTENT(IN) :: LCALC ! Switch for the calculation of the max. Global Time step !/ !/ ------------------------------------------------------------------- / !/ Parameter list !/ !/ !/ ------------------------------------------------------------------- / !/ Local parameters !/ REAL*8, PARAMETER :: ONESIXTH = 1.0d0/6.0d0 REAL*8, PARAMETER :: THR8 = TINY(1.0d0) REAL, PARAMETER :: THR = TINY(1.0) !/ !/ ------------------------------------------------------------------- / !/ ! ! local integer ! INTEGER :: IP, IE, IT, I1, I2, I3, ITH, IK INTEGER :: IBI, NI(3) ! ! local real ! REAL :: RD1, RD2 !: ! local double ! REAL*8 :: UTILDE, BOUNDARY_FORCING REAL*8 :: FT, CFLXY REAL*8 :: FL11, FL12, FL21, FL22, FL31, FL32 REAL*8 :: FL111, FL112, FL211, FL212, FL311, FL312 REAL*8 :: DTSI(NX), U(NX) REAL*8 :: DTMAXGL, DTMAXEXP, REST REAL*8 :: LAMBDA(2), KTMP(3), TMP(3) REAL*8 :: THETA_L(3), BET1(3), BETAHAT(3) REAL*8 :: KELEM(3,NTRI), FLALL(3,NTRI) REAL*8 :: KKSUM(NX), ST(NX) REAL*8 :: NM(NTRI) ! 1. initialisation ITH = 1 + MOD(ISP-1,NTH) IK = 1 + (ISP-1)/NTH DTMAXGL = DBLE(10.E10) ! !2 Propagation !2.a Calculate K-Values and contour based quantities ... ! DO IE = 1, NTRI ! I precacalculate this arrays below as I assume that current velocity changes continusly ... I1 = TRIGP(IE,1) ! Index of the Element Nodes (TRIGP) I2 = TRIGP(IE,2) I3 = TRIGP(IE,3) LAMBDA(1) = ONESIXTH *(C(I1,1)+C(I2,1)+C(I3,1)) ! Linearized advection speed in X and Y direction LAMBDA(2) = ONESIXTH *(C(I1,2)+C(I2,2)+C(I3,2)) KELEM(1,IE) = LAMBDA(1) * IEN(IE,1) + LAMBDA(2) * IEN(IE,2) ! K-Values - so called Flux Jacobians KELEM(2,IE) = LAMBDA(1) * IEN(IE,3) + LAMBDA(2) * IEN(IE,4) KELEM(3,IE) = LAMBDA(1) * IEN(IE,5) + LAMBDA(2) * IEN(IE,6) KTMP = KELEM(:,IE) ! Copy NM(IE) = - 1.D0/MIN(-THR8,SUM(MIN(0.d0,KTMP))) ! N-Values KELEM(:,IE) = MAX(0.d0,KTMP) FL11 = C(I2,1) * IEN(IE,1) + C(I2,2) * IEN(IE,2) ! Weights for Simpson Integration FL12 = C(I3,1) * IEN(IE,1) + C(I3,2) * IEN(IE,2) FL21 = C(I3,1) * IEN(IE,3) + C(I3,2) * IEN(IE,4) FL22 = C(I1,1) * IEN(IE,3) + C(I1,2) * IEN(IE,4) FL31 = C(I1,1) * IEN(IE,5) + C(I1,2) * IEN(IE,6) FL32 = C(I2,1) * IEN(IE,5) + C(I2,2) * IEN(IE,6) FL111 = 2.d0*FL11+FL12 FL112 = 2.d0*FL12+FL11 FL211 = 2.d0*FL21+FL22 FL212 = 2.d0*FL22+FL21 FL311 = 2.d0*FL31+FL32 FL312 = 2.d0*FL32+FL31 FLALL(1,IE) = (FL311 + FL212)! * ONESIXTH + KELEM(1,IE) FLALL(2,IE) = (FL111 + FL312)! * ONESIXTH + KELEM(2,IE) FLALL(3,IE) = (FL211 + FL112)! * ONESIXTH + KELEM(3,IE) END DO ! NTRI IF (LCALC) THEN ! If the current field or water level changes estimate the iteration number based on the new flow field a KKSUM = 0.d0 DO IE = 1, NTRI NI = TRIGP(IE,:) KKSUM(NI) = KKSUM(NI) + KELEM(:,IE) END DO ! IE DTMAXEXP = 1E10 ! initialize to large number DO IP = 1, NX DTMAXEXP = SI(IP)/MAX(DBLE(10.E-10),KKSUM(IP)*IOBDP(IP)) DTMAXGL = MIN( DTMAXGL, DTMAXEXP) END DO ! IP CFLXY = DBLE(DT)/DTMAXGL REST = ABS(MOD(CFLXY,1.0d0)) IF (REST .LT. THR8) THEN ITER(IK,ITH) = ABS(NINT(CFLXY)) ELSE IF (REST .GT. THR8 .AND. REST .LT. 0.5d0) THEN ITER(IK,ITH) = ABS(NINT(CFLXY)) + 1 ELSE ITER(IK,ITH) = ABS(NINT(CFLXY)) END IF END IF ! LCALC DO IP = 1, NX DTSI(IP) = DBLE(DT)/DBLE(ITER(IK,ITH))/SI(IP) ! Some precalculations for the time integration. END DO DO IT = 1, ITER(IK,ITH) U = AC ST = 0.d0 DO IE = 1, NTRI NI = TRIGP(IE,:) FT =-ONESIXTH*DOT_PRODUCT(U(NI),FLALL(:,IE)) UTILDE = NM(IE) * ( DOT_PRODUCT(KELEM(:,IE),U(NI)) - FT ) THETA_L(:) = KELEM(:,IE) * (U(NI) - UTILDE) IF (ABS(FT) .GT. 0.0d0) THEN BET1(:) = THETA_L(:)/FT IF (ANY( BET1 .LT. 0.0d0) ) THEN BETAHAT(1) = BET1(1) + 0.5d0 * BET1(2) BETAHAT(2) = BET1(2) + 0.5d0 * BET1(3) BETAHAT(3) = BET1(3) + 0.5d0 * BET1(1) BET1(1) = MAX(0.d0,MIN(BETAHAT(1),1.d0-BETAHAT(2),1.d0)) BET1(2) = MAX(0.d0,MIN(BETAHAT(2),1.d0-BETAHAT(3),1.d0)) BET1(3) = MAX(0.d0,MIN(BETAHAT(3),1.d0-BETAHAT(1),1.d0)) THETA_L(:) = FT * BET1 END IF ELSE THETA_L(:) = 0.d0 END IF ST(NI) = ST(NI) + THETA_L ! the 2nd term are the theta values of each node ... END DO DO IP = 1, NX U(IP) = MAX(0.d0,U(IP)-DTSI(IP)*ST(IP)*(1-IOBPA(IP)))*DBLE(IOBPD(ITH,IP)) IF (REFPARS(3).LT.0.5.AND.IOBPD(ITH,IP).EQ.0.AND.IOBPA(IP).EQ.0) U(IP)= AC(IP) ! restores reflected boundary values END DO ! update spectrum AC = U ! ! 4 Update boundaries: performs interpolation in time as done in rect grids (e.g. w3pro1md.ftn) ! IF ( FLBPI ) THEN ! ! 4.1 In this case the boundary is read from the nest.ww3 file ! RD1=RD10 - DT * REAL(ITER(IK,ITH)-IT)/REAL(ITER(IK,ITH)) RD2=RD20 IF ( RD2 .GT. 0.001 ) THEN RD2 = MIN(1.,MAX(0.,RD1/RD2)) RD1 = 1. - RD2 ELSE RD1 = 0. RD2 = 1. END IF ! ! Overwrites only the incoming energy ( IOBPD(ITH,IP) = 0) ! DO IBI=1, NBI IP = MAPSF(ISBPI(IBI),1) AC(IP) = ( RD1*BBPI0(ISP,IBI) + RD2*BBPIN(ISP,IBI) ) & / CG(IK,ISBPI(IBI)) * CLATS(ISBPI(IBI)) END DO ENDIF ! END DO ! End of loop on time steps ! CALL EXTCDE ( 99 ) !/ !/ End of W3XYPFSN ----------------------------------------------------- / !/ END SUBROUTINE W3XYPFSPSI2 !/ ------------------------------------------------------------------- / SUBROUTINE W3XYPFSNIMP ( ISP, C, LCALC, RD10, RD20, DT, AC) !/ !/ !/ +-----------------------------------+ !/ | WWIII Version of the WWM FS Code | !/ | by Aron Roland | !/ | for use in WWIII | !/ | GPL License | !/ | Last update : 15-Dec-2013 | !/ +-----------------------------------+ !/ !/ 15-Dec-2013 : Bug fix for implicit scheme ( version 4.16 ) !/ ! 1. Purpose : ! Advection of a scalar in a arbitary velocity field on unstructured meshes ! for the conservative hyperbolic equation N,t + (c*N),xy = 0 in spatial space ! This is the standard explicit N-Scheme from Roe as formulated in Abgrall ! ! 2. Method : ! ! 3. Parameters : ! ! Parameter list ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 4. Subroutines used : ! ! STRACE Subroutine tracing (!/S switch) ! ! 5. Called by : ! ! W3XYPUG Routine for advection on unstructured grid ! ! 6. Error messages : ! ! None. ! ! 7. Remarks : ! ! 8. Structure : ! ! See source code. ! ! 9. Switches : ! ! !/S Enable subroutine tracing. ! ! 10. Source code : ! !/ ------------------------------------------------------------------- / !/ USE W3GDATMD, ONLY : NK, NTH, NTRI, NX, CCON, IE_CELL,POS_CELL, SI, & IEN, TRIGP, CLATS, MAPSF, IOBPD, IOBPA, IOBP, IAA, JAA, POSI, & TRIA, NNZ USE W3GDATMD, ONLY : REFPARS USE W3WDATMD, ONLY: TIME USE W3ADATMD, ONLY: CG, ITER USE W3ODATMD, ONLY: NDSE, NDST, FLBPI, NBI, TBPI0, TBPIN, ISBPI, BBPI0, BBPIN USE W3TIMEMD, ONLY: DSEC21 IMPLICIT NONE INTEGER, INTENT(IN) :: ISP ! Actual Frequency/Wavenumber, actual Wave Direction REAL, INTENT(IN) :: DT ! Time intervall for which the advection should be computed for the given vel REAL, INTENT(IN) :: C(:,:) ! Velocity field in it's X- and Y- Components, DOUBLE PRECISION,INTENT(INOUT) :: AC(:) ! Wave Action before and after advection REAL, INTENT(IN) :: RD10, RD20 ! Time interpolation coefficients for boundary conditions LOGICAL, INTENT(IN) :: LCALC ! Switch for the calculation of the max. Global Time step !/ !/ ------------------------------------------------------------------- / !/ Parameter list !/ !/ !/ ------------------------------------------------------------------- / !/ Local parameters !/ REAL*8, PARAMETER :: ONESIXTH = 1.0d0/6.0d0 REAL*8, PARAMETER :: ONETHIRD = 1.0d0/3.0d0 REAL*8, PARAMETER :: THR8 = TINY(1.0d0) REAL, PARAMETER :: THR = TINY(1.0) !/ !/ ------------------------------------------------------------------- / !/ ! ! local integer ! INTEGER :: IP, IE, POS, I1, I2, I3, I, J, ITH, IK INTEGER :: IBI ! ! local real ! REAL :: RD1, RD2 !: ! local double ! REAL*8 :: BOUNDARY_FORCING REAL*8 :: FL11, FL12, FL21, FL22, FL31, FL32 REAL*8 :: U(NX) REAL*8 :: DTMAXGL REAL*8 :: LAMBDA(2) REAL*8 :: KP(3,NTRI) REAL*8 :: K1, DTK, TMP3, KM(3), K(3) REAL*8 :: NM(NTRI), CRFS(3), DELTAL(3,NTRI) REAL*8 :: B(NX), X(NX) REAL*8 :: ASPAR(NNZ) INTEGER :: IWKSP( 20*NX ) INTEGER :: FLJU(NX) INTEGER :: FLJAU(NNZ+1) INTEGER :: POS_TRICK(3,2) INTEGER :: IPAR(16) INTEGER :: IERROR ! IWK ! ERROR Indicator and Work Array Size, INTEGER :: JAU(NNZ+1), JU(NX) REAL*8 :: FPAR(16) ! DROPTOL REAL*8 :: WKSP( 8 * NX ) ! REAL WORKSPACES REAL*8 :: AU(NNZ+1) REAL*8 :: INIU(NX) external bcgstab POS_TRICK(1,1) = 2 POS_TRICK(1,2) = 3 POS_TRICK(2,1) = 3 POS_TRICK(2,2) = 1 POS_TRICK(3,1) = 1 POS_TRICK(3,2) = 2 ! 1. initialisation ITH = 1 + MOD(ISP-1,NTH) IK = 1 + (ISP-1)/NTH DTMAXGL = DBLE(10.E10) IF (.FALSE.) THEN WRITE(*,*) 'NNZ', NNZ WRITE(*,*) 'MINVAL IAA,JAA', MINVAL(IAA), MINVAL(JAA) WRITE(*,*) 'MINVAL IAA,JAA', MAXVAL(IAA), MAXVAL(JAA) WRITE(*,*) 'MAX/MIN POSI', MAXVAL(POSI), MINVAL(POSI) WRITE(*,*) 'AC, AQ', SUM(AC) END IF ! !2 Propagation !2.a Calculate K-Values and contour based quantities ... ! DO IE = 1, NTRI ! I precacalculate this arrays below as I assume that current velocity changes continusly ... I1 = TRIGP(IE,1) ! Index of the Element Nodes (TRIGP) I2 = TRIGP(IE,2) I3 = TRIGP(IE,3) LAMBDA(1) = ONESIXTH * (C(I1,1)+C(I2,1)+C(I3,1)) LAMBDA(2) = ONESIXTH * (C(I1,2)+C(I2,2)+C(I3,2)) K(1) = LAMBDA(1) * IEN(IE,1) + LAMBDA(2) * IEN(IE,2) K(2) = LAMBDA(1) * IEN(IE,3) + LAMBDA(2) * IEN(IE,4) K(3) = LAMBDA(1) * IEN(IE,5) + LAMBDA(2) * IEN(IE,6) KP(1,IE) = MAX(0.d0,K(1)) KP(2,IE) = MAX(0.d0,K(2)) KP(3,IE) = MAX(0.d0,K(3)) KM(1) = MIN(0.d0,K(1)) KM(2) = MIN(0.d0,K(2)) KM(3) = MIN(0.d0,K(3)) FL11 = C(I2,1)*IEN(IE,1)+C(I2,2)*IEN(IE,2) FL12 = C(I3,1)*IEN(IE,1)+C(I3,2)*IEN(IE,2) FL21 = C(I3,1)*IEN(IE,3)+C(I3,2)*IEN(IE,4) FL22 = C(I1,1)*IEN(IE,3)+C(I1,2)*IEN(IE,4) FL31 = C(I1,1)*IEN(IE,5)+C(I1,2)*IEN(IE,6) FL32 = C(I2,1)*IEN(IE,5)+C(I2,2)*IEN(IE,6) CRFS(1) = - ONESIXTH * (2.0d0 *FL31 + FL32 + FL21 + 2.0d0 * FL22 ) CRFS(2) = - ONESIXTH * (2.0d0 *FL32 + 2.0d0 * FL11 + FL12 + FL31 ) CRFS(3) = - ONESIXTH * (2.0d0 *FL12 + 2.0d0 * FL21 + FL22 + FL11 ) DELTAL(:,IE) = CRFS(:)- KP(:,IE) NM(IE) = 1.d0/MIN(DBLE(THR),SUM(KM(:))) END DO ! NTRI U = DBLE(AC) J = 0 ASPAR = 0.d0 B = 0.d0 DO IP = 1, NX DO I = 1, CCON(IP) J = J + 1 IE = IE_CELL(J) POS = POS_CELL(J) K1 = KP(POS,IE) * IOBPD(ITH,IP) IF (K1 > 0.) THEN DTK = K1 * DT TMP3 = DTK * NM(IE) I1 = POSI(1,J) I2 = POSI(2,J) I3 = POSI(3,J) ASPAR(I1) = ONETHIRD * TRIA(IE) + DTK - TMP3 * DELTAL(POS,IE) + ASPAR(I1) ASPAR(I2) = - TMP3 * DELTAL(POS_TRICK(POS,1),IE) + ASPAR(I2) ASPAR(I3) = - TMP3 * DELTAL(POS_TRICK(POS,2),IE) + ASPAR(I3) B(IP) = B(IP) + ONETHIRD * TRIA(IE) * U(IP) ELSE I1 = POSI(1,J) ASPAR(I1) = ONETHIRD * TRIA(IE) + ASPAR(I1) B(IP) = B(IP) + ONETHIRD * TRIA(IE) * U(IP) END IF END DO ! End loop over connected elements ... END DO ! !2DO setup a semi-implicit integration scheme for source terms only ... ! IPAR(1) = 0 ! always 0 to start an iterative solver IPAR(2) = 1 ! right preconditioning IPAR(3) = 1 ! use convergence test scheme 1 IPAR(4) = 8*NX ! IPAR(5) = 15 IPAR(6) = 1000 ! use at most 1000 matvec's FPAR(1) = DBLE(1.0E-8) ! relative tolerance 1.0E-6 FPAR(2) = DBLE(1.0E-10) ! absolute tolerance 1.0E-10 FPAR(11) = 0.d0 ! clearing the FLOPS counter AU = 0. FLJAU = 0 FLJU = 0 JAU = 0 JU = 0 CALL ILU0 (NX, ASPAR, JAA, IAA, AU, FLJAU, FLJU, IWKSP, IERROR) ! WRITE(*,*) 'PRECONDITIONER', IERROR ! IF (SUM(AC) .GT. 0.) THEN IF (.FALSE.) THEN WRITE(*,*) SUM(AC) WRITE(*,*) 'CALL SOLVER' WRITE(*,*) 'WRITE CG', SUM(CG) WRITE(*,*) 'B, X, AC, U', SUM(B), SUM(X), SUM(AC), SUM(U) WRITE(*,*) 'IPAR, FPAR', SUM(IPAR), SUM(FPAR) WRITE(*,*) 'WKSP, INIU', SUM(WKSP), SUM(INIU) WRITE(*,*) 'ASPAR, JAA, IAA',SUM(ASPAR), SUM(IAA), SUM(JAA) WRITE(*,*) 'AU, FLJAU, FLJU',SUM(AU), SUM(FLJAU), SUM(FLJU) END IF INIU = U X = 0.d0 CALL RUNRC (NX, B, X, IPAR, FPAR, WKSP, INIU, ASPAR, JAA, IAA, AU, FLJAU, FLJU, BCGSTAB) IF (.FALSE.) THEN WRITE(*,*) 'SOLUTION' WRITE(*,*) 'B, X, AC, U', SUM(B), SUM(X), SUM(AC), SUM(U) WRITE(*,*) 'IPAR, FPAR', SUM(IPAR), SUM(FPAR) WRITE(*,*) 'WKSP, INIU', SUM(WKSP), SUM(INIU) WRITE(*,*) 'ASPAR, JAA, IAA', SUM(ASPAR), SUM(JAA), SUM(IAA) WRITE(*,*) 'AU, FLJAU, FLJU', SUM(AU), SUM(FLJAU), SUM(FLJU) END IF DO IP = 1,NX U(IP) = MAX(0.d0,X(IP)*DBLE(IOBPD(ITH,IP))) IF (REFPARS(3).LT.0.5.AND.IOBPD(ITH,IP).EQ.0.AND.IOBPA(IP).EQ.0) U(IP)= AC(IP) ! restores reflected boundary values END DO ! ! update spectrum AC = REAL(U) ! ! 4 Update boundaries: performs interpolation in time as done in rect grids (e.g. w3pro1md.ftn) ! IF ( FLBPI ) THEN RD1=RD10 RD2=RD20 IF ( RD2 .GT. 0.001 ) THEN RD2 = MIN(1.,MAX(0.,RD1/RD2)) RD1 = 1. - RD2 ELSE RD1 = 0. RD2 = 1. END IF ! ! Time interpolation as done in rect grids ! DO IBI=1, NBI IP = MAPSF(ISBPI(IBI),1) AC(IP) = ( RD1*BBPI0(ISP,IBI) + RD2*BBPIN(ISP,IBI) ) & *IOBPA(IP)*(1-IOBPD(ITH,IP)) / CG(IK,ISBPI(IBI)) * CLATS(ISBPI(IBI)) END DO END IF ! CALL EXTCDE ( 99 ) !/ !/ End of W3XYPFSNIMP------------------------------------------------- / !/ END SUBROUTINE W3XYPFSNIMP !/ ------------------------------------------------------------------- / SUBROUTINE W3XYPFSFCT2 ( ISP, C, LCALC, RD10, RD20, DT, AC) !/ !/ !/ +-----------------------------------+ !/ | WWIII Version of the WWM FS Code | !/ | by Aron Roland | !/ | for use in WWIII | !/ | GPL License | !/ | Last update : 19-Dec-2007 | !/ +-----------------------------------+ !/ ! 1. Purpose : ! Advection of a scalar in a arbitary velocity field on unstructured meshes ! for the conservative hyperbolic equation N,t + (c*N),xy = 0 in spatial space ! ! 2. Method : ! ! 3. Parameters : ! ! Parameter list ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 4. Subroutines used : ! ! STRACE Subroutine tracing (!/S switch) ! ! 5. Called by : ! ! W3XYPUG Routine for advection on unstructured grid ! ! 6. Error messages : ! ! None. ! ! 7. Remarks : ! ! 8. Structure : ! ! See source code. ! ! 9. Switches : ! ! !/S Enable subroutine tracing. ! ! 10. Source code : ! !/ ------------------------------------------------------------------- / !/ USE W3GDATMD, ONLY : NK, NTH, NTRI, NX, CCON, IE_CELL,POS_CELL, SI, & IEN, TRIGP, CLATS, MAPSF, IOBPD, IOBPA, TRIA, IOBDP USE W3GDATMD, ONLY : REFPARS USE W3WDATMD, ONLY: TIME USE W3ADATMD, ONLY: CG, ITER USE W3ODATMD, ONLY: NDSE, NDST, FLBPI, NBI, TBPI0, TBPIN, ISBPI, BBPI0, BBPIN USE W3TIMEMD, ONLY: DSEC21 IMPLICIT NONE INTEGER, INTENT(IN) :: ISP ! Actual Frequency/Wavenumber, actual Wave Direction REAL, INTENT(IN) :: DT ! Time intervall for which the advection should be computed for the given vel REAL, INTENT(IN) :: C(:,:) ! Velocity field in it's X- and Y- Components, DOUBLE PRECISION, INTENT(INOUT) :: AC(:) ! Wave Action before and after advection REAL, INTENT(IN) :: RD10, RD20 ! Time interpolation coefficients for boundary condition LOGICAL, INTENT(IN) :: LCALC ! Switch for the calculation of the max. Global Time step !/ !/ ------------------------------------------------------------------- / !/ Parameter list !/ !/ !/ ------------------------------------------------------------------- / !/ Local parameters !/ REAL*8, PARAMETER :: ONESIXTH = 1.0d0/6.0d0 REAL*8, PARAMETER :: ONETHIRD = 1.0d0/3.0d0 REAL*8, PARAMETER :: THR8 = TINY(1.0d0) REAL, PARAMETER :: THR = TINY(1.0) !/ !/ ------------------------------------------------------------------- / !/ ! ! local integer ! INTEGER :: IP, IE, IT, I1, I2, I3, I, ITH, IK INTEGER :: IBI, NI(3) ! ! local real ! REAL :: RD1, RD2 !: ! local double ! REAL*8 :: UTILDE, BOUNDARY_FORCING REAL*8 :: FT, CFLXY REAL*8 :: FL11, FL12, FL21, FL22, FL31, FL32 REAL*8 :: FL111, FL112, FL211, FL212, FL311, FL312 REAL*8 :: DTSI(NX), U(NX), DT4AI REAL*8 :: DTMAXGL, DTMAXEXP, REST REAL*8 :: LAMBDA(2), KTMP(3), TMP(3) REAL*8 :: BET1(3), BETAHAT(3) REAL*8 :: THETA_L(3,NTRI), THETA_H(3,NTRI), THETA_ACE(3,NTRI), UTMP(3) REAL*8 :: WII(2,NX), UL(NX), USTARI(2,NX) REAL*8 :: PM(NX), PP(NX), UIM(NX), UIP(NX) REAL*8 :: KELEM(3,NTRI), FLALL(3,NTRI) REAL*8 :: KKSUM(NX), ST(NX), BETA REAL*8 :: NM(NTRI) ! 1. initialisation ITH = 1 + MOD(ISP-1,NTH) IK = 1 + (ISP-1)/NTH DTMAXGL = DBLE(10.E10) ! !2 Propagation !2.a Calculate K-Values and contour based quantities ... ! DO IE = 1, NTRI ! I precacalculate this arrays below as I assume that current velocity changes continusly ... I1 = TRIGP(IE,1) ! Index of the Element Nodes (TRIGP) I2 = TRIGP(IE,2) I3 = TRIGP(IE,3) LAMBDA(1) = ONESIXTH *(C(I1,1)+C(I2,1)+C(I3,1)) ! Linearized advection speed in X and Y direction LAMBDA(2) = ONESIXTH *(C(I1,2)+C(I2,2)+C(I3,2)) KELEM(1,IE) = LAMBDA(1) * IEN(IE,1) + LAMBDA(2) * IEN(IE,2) ! K-Values - so called Flux Jacobians KELEM(2,IE) = LAMBDA(1) * IEN(IE,3) + LAMBDA(2) * IEN(IE,4) KELEM(3,IE) = LAMBDA(1) * IEN(IE,5) + LAMBDA(2) * IEN(IE,6) KTMP = KELEM(:,IE) ! Copy NM(IE) = - 1.D0/MIN(-THR8,SUM(MIN(0.d0,KTMP))) ! N-Values FL11 = C(I2,1) * IEN(IE,1) + C(I2,2) * IEN(IE,2) ! Weights for Simpson Integration FL12 = C(I3,1) * IEN(IE,1) + C(I3,2) * IEN(IE,2) FL21 = C(I3,1) * IEN(IE,3) + C(I3,2) * IEN(IE,4) FL22 = C(I1,1) * IEN(IE,3) + C(I1,2) * IEN(IE,4) FL31 = C(I1,1) * IEN(IE,5) + C(I1,2) * IEN(IE,6) FL32 = C(I2,1) * IEN(IE,5) + C(I2,2) * IEN(IE,6) FL111 = 2.d0*FL11+FL12 FL112 = 2.d0*FL12+FL11 FL211 = 2.d0*FL21+FL22 FL212 = 2.d0*FL22+FL21 FL311 = 2.d0*FL31+FL32 FL312 = 2.d0*FL32+FL31 FLALL(1,IE) = (FL311 + FL212)! * ONESIXTH + KELEM(1,IE) FLALL(2,IE) = (FL111 + FL312)! * ONESIXTH + KELEM(2,IE) FLALL(3,IE) = (FL211 + FL112)! * ONESIXTH + KELEM(3,IE) END DO ! NTRI IF (LCALC) THEN ! If the current field or water level changes estimate the iteration number based on the new flow field and KKSUM = 0.d0 DO IE = 1, NTRI NI = TRIGP(IE,:) KKSUM(NI) = KKSUM(NI) + KELEM(:,IE) END DO ! IE DTMAXEXP = 1E10 ! initialize to large number DO IP = 1, NX DTMAXEXP = SI(IP)/MAX(DBLE(10.E-10),KKSUM(IP)*IOBDP(IP)) DTMAXGL = MIN( DTMAXGL, DTMAXEXP) END DO ! IP CFLXY = DBLE(DT)/DTMAXGL REST = ABS(MOD(CFLXY,1.0d0)) IF (REST .LT. THR8) THEN ITER(IK,ITH) = ABS(NINT(CFLXY)) ELSE IF (REST .GT. THR8 .AND. REST .LT. 0.5d0) THEN ITER(IK,ITH) = ABS(NINT(CFLXY)) + 1 ELSE ITER(IK,ITH) = ABS(NINT(CFLXY)) END IF END IF ! LCALC DT4AI = DBLE(DT)/DBLE(ITER(IK,ITH)) DTSI(:) = DT4AI/SI(:) ! Some precalculations for the time integration. U = AC UL = U ! ! Now loop on sub-timesteps ! DO IT = 1, ITER(IK,ITH) ST = 0.d0 DO IE = 1, NTRI NI = TRIGP(IE,:) UTMP = U(NI) FT = - ONESIXTH*DOT_PRODUCT(UTMP,FLALL(:,IE)) TMP = MAX(0.d0,KELEM(:,IE)) UTILDE = NM(IE) * ( DOT_PRODUCT(TMP,UTMP) - FT ) THETA_L(:,IE) = TMP * ( UTMP - UTILDE ) IF (ABS(FT) .GT. DBLE(THR)) THEN BET1(:) = THETA_L(:,IE)/FT IF (ANY( BET1 .LT. 0.0d0) ) THEN BETAHAT(1) = BET1(1) + 0.5d0 * BET1(2) BETAHAT(2) = BET1(2) + 0.5d0 * BET1(3) BETAHAT(3) = BET1(3) + 0.5d0 * BET1(1) BET1(1) = MAX(0.d0,MIN(BETAHAT(1),1.d0-BETAHAT(2),1.d0)) BET1(2) = MAX(0.d0,MIN(BETAHAT(2),1.d0-BETAHAT(3),1.d0)) BET1(3) = MAX(0.d0,MIN(BETAHAT(3),1.d0-BETAHAT(1),1.d0)) THETA_L(:,IE) = FT * BET1 END IF ELSE THETA_L(:,IE) = 0.d0 END IF ! THETA_H(:,IE) = (ONETHIRD+DT4AI/(2.d0*TRIA(IE)) * KELEM(:,IE))*FT ! LAX-WENDROFF THETA_H(:,IE) = (1./3.+2./3.* KELEM(:,IE)/SUM(ABS(KELEM(:,IE))) )*FT ! CENTRAL SCHEME ! Antidiffusive residual according to the higher order nonmonotone scheme THETA_ACE(:,IE) = ((THETA_H(:,IE) - THETA_L(:,IE))) * DT4AI/SI(NI) ST(NI) = ST(NI) + THETA_L(:,IE)*DT4AI/SI(NI) END DO ! UL = MAX(0.d0,U-ST)*DBLE(IOBPD(ITH,:))!*DBLE(IOBDP(:)) ... add for IOBDP dry/wet flag. DO IP = 1,NX UL(IP) = MAX(0.d0,U(IP)-ST(IP))*DBLE(IOBPD(ITH,IP)) END DO USTARI(1,:) = MAX(UL,U) USTARI(2,:) = MIN(UL,U) UIP = -THR8 UIM = THR8 PP = 0.d0 PM = 0.d0 DO IE = 1, NTRI NI = TRIGP(IE,:) PP(NI) = PP(NI) + MAX( THR8, -THETA_ACE(:,IE)) PM(NI) = PM(NI) + MIN( -THR8, -THETA_ACE(:,IE)) UIP(NI) = MAX (UIP(NI), MAXVAL( USTARI(1,NI) )) UIM(NI) = MIN (UIM(NI), MINVAL( USTARI(2,NI) )) END DO WII(1,:) = MIN(1.0d0,(UIP-UL)/MAX( THR8,PP)) WII(2,:) = MIN(1.0d0,(UIM-UL)/MIN(-THR8,PM)) ST = 0.d0 DO IE = 1, NTRI DO I = 1, 3 IP = TRIGP(IE,I) IF (-THETA_ACE(I,IE) .GE. 0.) THEN TMP(I) = WII(1,IP) ELSE TMP(I) = WII(2,IP) END IF END DO BETA = MINVAL(TMP) NI = TRIGP(IE,:) ST(NI) = ST(NI) + BETA * THETA_ACE(:,IE) END DO DO IP = 1,NX ! ! IOBPD is the switch for removing energy coming from the shoreline ! U(IP) = MAX(0.d0,UL(IP)-ST(IP))*DBLE(IOBPD(ITH,IP)) IF (REFPARS(3).LT.0.5.AND.IOBPD(ITH,IP).EQ.0.AND.IOBPA(IP).EQ.0) U(IP)= AC(IP) ! restores reflected boundary values END DO ! ! update spectrum AC = U ! ! 4 Update boundaries: performs interpolation in time as done in rect grids (e.g. w3pro1md.ftn) ! IF ( FLBPI ) THEN ! ! 4.1 In this case the boundary is read from the nest.ww3 file ! RD1=RD10 - DT * REAL(ITER(IK,ITH)-IT)/REAL(ITER(IK,ITH)) RD2=RD20 IF ( RD2 .GT. 0.001 ) THEN RD2 = MIN(1.,MAX(0.,RD1/RD2)) RD1 = 1. - RD2 ELSE RD1 = 0. RD2 = 1. END IF ! ! Overwrites only the incoming energy ( IOBPD(ITH,IP) = 0) ! DO IBI=1, NBI IP = MAPSF(ISBPI(IBI),1) AC(IP) = ( RD1*BBPI0(ISP,IBI) + RD2*BBPIN(ISP,IBI) ) & / CG(IK,ISBPI(IBI)) * CLATS(ISBPI(IBI)) END DO ENDIF ! END DO ! End of loop on time steps ! CALL EXTCDE ( 99 ) !/ !/ End of W3XYPFSN ----------------------------------------------------- / !/ END SUBROUTINE W3XYPFSFCT2 !/ ------------------------------------------------------------------- / SUBROUTINE SETDEPTH !/ !/ +-----------------------------------+ !/ | WAVEWATCH III NOAA/NCEP | !/ | | !/ | Aron Roland (BGS IT&E GmbH) | !/ | Mathieu Dutour-Sikiric (IRB) | !/ | | !/ | FORTRAN 90 | !/ | Last update : 01-June-2018 | !/ +-----------------------------------+ !/ !/ 01-June-2018 : Origination. ( version 6.04 ) !/ ! 1. Purpose : Init pdlib part ! 2. Method : ! 3. Parameters : ! ! Parameter list ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 4. Subroutines used : ! ! Name Type Module Description ! ---------------------------------------------------------------- ! STRACE Subr. W3SERVMD Subroutine tracing. ! ---------------------------------------------------------------- ! ! 5. Called by : ! ! Name Type Module Description ! ---------------------------------------------------------------- ! ---------------------------------------------------------------- ! ! 6. Error messages : ! 7. Remarks ! 8. Structure : ! 9. Switches : ! ! !/S Enable subroutine tracing. ! ! 10. Source code : ! !/ ------------------------------------------------------------------- / ! USE CONSTANTS, ONLY : LPDLIB USE W3GDATMD, ONLY: MAPSF, NSEAL, DMIN, IOBDP, MAPSTA, IOBP, MAPFS, NX USE W3ADATMD, ONLY: DW IMPLICIT NONE !/ !/ ------------------------------------------------------------------- / !/ Parameter list !/ !/ ------------------------------------------------------------------- / !/ Local PARAMETERs !/ !/ !/ ------------------------------------------------------------------- / ! INTEGER :: JSEA, ISEA, IX, IP REAL*8, PARAMETER :: DTHR = 10E-6 IOBDP = 1 DO IP=1,NX IF (DW(IP) .LT. DMIN + DTHR) IOBDP(IP) = 0 !WRITE(*,*) ip, ip_glob, MAPSTA(1,IP_glob), IOBP(IP_glob), DW(ISEA), DMIN END DO END SUBROUTINE !/ ------------------------------------------------------------------- / END MODULE W3PROFSMD !-------------------------------------------------------------------------- !-------------------------------------------------------------------------- !-------------------------------------------------------------------------- !------------------iterative solver --------------------------------------- !----------------------------------------------------------------------c ! S P A R S K I T c !----------------------------------------------------------------------c ! Basic Iterative Solvers with Reverse Communication c !----------------------------------------------------------------------c ! This file currently has several basic iterative linear system c ! solvers. They are: c ! CG -- Conjugate Gradient Method c ! CGNR -- Conjugate Gradient Method (Normal Residual equation) c ! BCG -- Bi-Conjugate Gradient Method c ! DBCG -- BCG with partial pivoting c ! BCGSTAB -- BCG stabilized c ! TFQMR -- Transpose-Free Quasi-Minimum Residual method c ! FOM -- Full Orthogonalization Method c ! GMRES -- Generalized Minimum RESidual method (no longer available) c ! FGMRES -- Flexible version of Generalized Minimum c ! RESidual method c ! DQGMRES -- Direct versions of Quasi Generalize Minimum c ! Residual method c !----------------------------------------------------------------------c ! They all have the following calling sequence: ! subroutine solver(n, rhs, sol, ipar, fpar, w) ! integer n, ipar(16) ! real*8 rhs(n), sol(n), fpar(16), w(*) ! Where ! (1) 'n' is the size of the linear system, ! (2) 'rhs' is the right-hand side of the linear system, ! (3) 'sol' is the solution to the linear system, ! (4) 'ipar' is an integer parameter array for the reverse ! communication protocol, ! (5) 'fpar' is an floating-point parameter array storing ! information to and from the iterative solvers. ! (6) 'w' is the work space (size is specified in ipar) ! ! They are preconditioned iterative solvers with reverse ! communication. The preconditioners can be applied from either ! from left or right or both (specified by ipar(2), see below). ! ! Author: Kesheng John Wu (kewu@mail.cs.umn.edu) 1993 ! ! NOTES: ! ! (1) Work space required by each of the iterative solver ! routines is as follows: ! CG == 5 * n ! CGNR == 5 * n ! BCG == 7 * n ! DBCG == 11 * n ! BCGSTAB == 8 * n ! TFQMR == 11 * n ! FOM == (n+3)*(m+2) + (m+1)*m/2 (m = ipar(5), default m=15) ! GMRES == (n+3)*(m+2) + (m+1)*m/2 (m = ipar(5), default m=15) ! GMRES is no longer available ! FGMRES == 2*n*(m+1) + (m+1)*m/2 + 3*m + 2 (m = ipar(5), ! default m=15) ! DQGMRES == n + lb * (2*n+4) (lb=ipar(5)+1, default lb = 16) ! ! (2) ALL iterative solvers require a user-supplied DOT-product ! routine named ddot. The prototype of ddot is ! ! real*8 function ddot(n,x,y) ! integer n, ix, iy ! real*8 x(1+(n-1)*ix), y(1+(n-1)*iy) ! ! This interface of ddot is exactly the same as that of ! DDOT (or SDOT if real == real*8) from BLAS-1. It should have ! same functionality as DDOT on a single processor machine. On a ! parallel/distributed environment, each processor can perform ! DDOT on the data it has, then perform a summation on all the ! partial results. ! ! (3) To use this set of routines under SPMD/MIMD program paradigm, ! several things are to be noted: (a) 'n' should be the number of ! vector elements of 'rhs' that is present on the local processor. ! (b) if RHS(i) is on processor j, it is expected that SOL(i) ! will be on the same processor, i.e. the vectors are distributed ! to each processor in the same way. (c) the preconditioning and ! stopping criteria specifications have to be the same on all ! processor involved, ipar and fpar have to be the same on each ! processor. (d) ddot should be replaced by a distributed ! dot-product function. ! ! .................................................................. ! Reverse Communication Protocols ! ! When a reverse-communication routine returns, it could be either ! that the routine has terminated or it simply requires the caller ! to perform one matrix-vector multiplication. The possible matrices ! that involve in the matrix-vector multiplications are: ! A (the matrix of the linear system), ! A^T (A transposed), ! Ml^{-1} (inverse of the left preconditioner), ! Ml^{-T} (inverse of the left preconditioner transposed), ! Mr^{-1} (inverse of the right preconditioner), ! Mr^{-T} (inverse of the right preconditioner transposed). ! For all the matrix vector multiplication, v = A u. The input and ! output vectors are supposed to be part of the work space 'w', and ! the starting positions of them are stored in ipar(8:9), see below. ! ! The array 'ipar' is used to store the information about the solver. ! Here is the list of what each element represents: ! ! ipar(1) -- status of the call/return. ! A call to the solver with ipar(1) == 0 will initialize the ! iterative solver. On return from the iterative solver, ipar(1) ! carries the status flag which indicates the condition of the ! return. The status information is divided into two categories, ! (1) a positive value indicates the solver requires a matrix-vector ! multiplication, ! (2) a non-positive value indicates termination of the solver. ! Here is the current definition: ! 1 == request a matvec with A, ! 2 == request a matvec with A^T, ! 3 == request a left preconditioner solve (Ml^{-1}), ! 4 == request a left preconditioner transposed solve (Ml^{-T}), ! 5 == request a right preconditioner solve (Mr^{-1}), ! 6 == request a right preconditioner transposed solve (Mr^{-T}), ! 10 == request the caller to perform stopping test, ! 0 == normal termination of the solver, satisfied the stopping ! criteria, ! -1 == termination because iteration number is greater than the ! preset limit, ! -2 == return due to insufficient work space, ! -3 == return due to anticipated break-down / divide by zero, ! in the case where Arnoldi procedure is used, additional ! error code can be found in ipar(12), where ipar(12) is ! the error code of orthogonalization procedure MGSRO: ! -1: zero input vector ! -2: input vector contains abnormal numbers ! -3: input vector is a linear combination of others ! -4: trianguler system in GMRES/FOM/etc. has nul rank ! -4 == the values of fpar(1) and fpar(2) are both <= 0, the valid ! ranges are 0 <= fpar(1) < 1, 0 <= fpar(2), and they can ! not be zero at the same time ! -9 == while trying to detect a break-down, an abnormal number is ! detected. ! -10 == return due to some non-numerical reasons, e.g. invalid ! floating-point numbers etc. ! ! ipar(2) -- status of the preconditioning: ! 0 == no preconditioning ! 1 == left preconditioning only ! 2 == right preconditioning only ! 3 == both left and right preconditioning ! ! ipar(3) -- stopping criteria (details of this will be ! discussed later). ! ! ipar(4) -- number of elements in the array 'w'. if this is less ! than the desired size, it will be over-written with the minimum ! requirement. In which case the status flag ipar(1) = -2. ! ! ipar(5) -- size of the Krylov subspace (used by GMRES and its ! variants), e.g. GMRES(ipar(5)), FGMRES(ipar(5)), ! DQGMRES(ipar(5)). ! ! ipar(6) -- maximum number of matrix-vector multiplies, if not a ! positive number the iterative solver will run till convergence ! test is satisfied. ! ! ipar(7) -- current number of matrix-vector multiplies. It is ! incremented after each matrix-vector multiplication. If there ! is preconditioning, the counter is incremented after the ! preconditioning associated with each matrix-vector multiplication. ! ! ipar(8) -- pointer to the input vector to the requested matrix- ! vector multiplication. ! ! ipar(9) -- pointer to the output vector of the requested matrix- ! vector multiplication. ! ! To perform v = A * u, it is assumed that u is w(ipar(8):ipar(8)+n-1) ! and v is stored as w(ipar(9):ipar(9)+n-1). ! ! ipar(10) -- the return address (used to determine where to go to ! inside the iterative solvers after the caller has performed the ! requested services). ! ! ipar(11) -- the result of the external convergence test ! On final return from the iterative solvers, this value ! will be reflected by ipar(1) = 0 (details discussed later) ! ! ipar(12) -- error code of MGSRO, it is ! 1 if the input vector to MGSRO is linear combination ! of others, ! 0 if MGSRO was successful, ! -1 if the input vector to MGSRO is zero, ! -2 if the input vector contains invalid number. ! ! ipar(13) -- number of initializations. During each initilization ! residual norm is computed directly from M_l(b - A x). ! ! ipar(14) to ipar(16) are NOT defined, they are NOT USED by ! any iterative solver at this time. ! ! Information about the error and tolerance are stored in the array ! FPAR. So are some internal variables that need to be saved from ! one iteration to the next one. Since the internal variables are ! not the same for each routine, we only define the common ones. ! ! The first two are input parameters: ! fpar(1) -- the relative tolerance, ! fpar(2) -- the absolute tolerance (details discussed later), ! ! When the iterative solver terminates, ! fpar(3) -- initial residual/error norm, ! fpar(4) -- target residual/error norm, ! fpar(5) -- current residual norm (if available), ! fpar(6) -- current residual/error norm, ! fpar(7) -- convergence rate, ! ! fpar(8:10) are used by some of the iterative solvers to save some ! internal information. ! ! fpar(11) -- number of floating-point operations. The iterative ! solvers will add the number of FLOPS they used to this variable, ! but they do NOT initialize it, nor add the number of FLOPS due to ! matrix-vector multiplications (since matvec is outside of the ! iterative solvers). To insure the correct FLOPS count, the ! caller should set fpar(11) = 0 before invoking the iterative ! solvers and account for the number of FLOPS from matrix-vector ! multiplications and preconditioners. ! ! fpar(12:16) are not used in current implementation. ! ! Whether the content of fpar(3), fpar(4) and fpar(6) are residual ! norms or error norms depends on ipar(3). If the requested ! convergence test is based on the residual norm, they will be ! residual norms. If the caller want to test convergence based the ! error norms (estimated by the norm of the modifications applied ! to the approximate solution), they will be error norms. ! Convergence rate is defined by (Fortran 77 statement) ! fpar(7) = log10(fpar(3) / fpar(6)) / (ipar(7)-ipar(13)) ! If fpar(7) = 0.5, it means that approximately every 2 (= 1/0.5) ! steps the residual/error norm decrease by a factor of 10. ! ! .................................................................. ! Stopping criteria, ! ! An iterative solver may be terminated due to (1) satisfying ! convergence test; (2) exceeding iteration limit; (3) insufficient ! work space; (4) break-down. Checking of the work space is ! only done in the initialization stage, i.e. when it is called with ! ipar(1) == 0. A complete convergence test is done after each ! update of the solutions. Other conditions are monitored ! continuously. ! ! With regard to the number of iteration, when ipar(6) is positive, ! the current iteration number will be checked against it. If ! current iteration number is greater the ipar(6) than the solver ! will return with status -1. If ipar(6) is not positive, the ! iteration will continue until convergence test is satisfied. ! ! Two things may be used in the convergence tests, one is the ! residual 2-norm, the other one is 2-norm of the change in the ! approximate solution. The residual and the change in approximate ! solution are from the preconditioned system (if preconditioning ! is applied). The DQGMRES and TFQMR use two estimates for the ! residual norms. The estimates are not accurate, but they are ! acceptable in most of the cases. Generally speaking, the error ! of the TFQMR's estimate is less accurate. ! ! The convergence test type is indicated by ipar(3). There are four ! type convergence tests: (1) tests based on the residual norm; ! (2) tests based on change in approximate solution; (3) caller ! does not care, the solver choose one from above two on its own; ! (4) caller will perform the test, the solver should simply continue. ! Here is the complete definition: ! -2 == || dx(i) || <= rtol * || rhs || + atol ! -1 == || dx(i) || <= rtol * || dx(1) || + atol ! 0 == solver will choose test 1 (next) ! 1 == || residual || <= rtol * || initial residual || + atol ! 2 == || residual || <= rtol * || rhs || + atol ! 999 == caller will perform the test ! where dx(i) denote the change in the solution at the ith update. ! ||.|| denotes 2-norm. rtol = fpar(1) and atol = fpar(2). ! ! If the caller is to perform the convergence test, the outcome ! should be stored in ipar(11). ! ipar(11) = 0 -- failed the convergence test, iterative solver ! should continue ! ipar(11) = 1 -- satisfied convergence test, iterative solver ! should perform the clean up job and stop. ! ! Upon return with ipar(1) = 10, ! ipar(8) points to the starting position of the change in ! solution Sx, where the actual solution of the step is ! x_j = x_0 + M_r^{-1} Sx. ! Exception: ipar(8) < 0, Sx = 0. It is mostly used by ! GMRES and variants to indicate (1) Sx was not necessary, ! (2) intermediate result of Sx is not computed. ! ipar(9) points to the starting position of a work vector that ! can be used by the caller. ! ! NOTE: the caller should allow the iterative solver to perform ! clean up job after the external convergence test is satisfied, ! since some of the iterative solvers do not directly ! update the 'sol' array. A typical clean-up stage includes ! performing the final update of the approximate solution and ! computing the convergence information (e.g. values of fpar(3:7)). ! ! NOTE: fpar(4) and fpar(6) are not set by the accelerators (the ! routines implemented here) if ipar(3) = 999. ! ! .................................................................. ! Usage: ! ! To start solving a linear system, the user needs to specify ! first 6 elements of the ipar, and first 2 elements of fpar. ! The user may optionally set fpar(11) = 0 if one wants to count ! the number of floating-point operations. (Note: the iterative ! solvers will only add the floating-point operations inside ! themselves, the caller will have to add the FLOPS from the ! matrix-vector multiplication routines and the preconditioning ! routines in order to account for all the arithmetic operations.) ! ! Here is an example: ! ipar(1) = 0 ! always 0 to start an iterative solver ! ipar(2) = 2 ! right preconditioning ! ipar(3) = 1 ! use convergence test scheme 1 ! ipar(4) = 10000 ! the 'w' has 10,000 elements ! ipar(5) = 10 ! use *GMRES(10) (e.g. FGMRES(10)) ! ipar(6) = 100 ! use at most 100 matvec's ! fpar(1) = 1.0E-6 ! relative tolerance 1.0E-6 ! fpar(2) = 1.0E-10 ! absolute tolerance 1.0E-10 ! fpar(11) = 0.0 ! clearing the FLOPS counter ! ! After the above specifications, one can start to call an iterative ! solver, say BCG. Here is a piece of pseudo-code showing how it can ! be done, ! ! 10 call bcg(n,rhs,sol,ipar,fpar,w) ! if (ipar(1).eq.1) then ! call amux(n,w(ipar(8)),w(ipar(9)),a,ja,ia) ! goto 10 ! else if (ipar(1).eq.2) then ! call atmux(n,w(ipar(8)),w(ipar(9)),a,ja,ia) ! goto 10 ! else if (ipar(1).eq.3) then ! left preconditioner solver ! goto 10 ! else if (ipar(1).eq.4) then ! left preconditioner transposed solve ! goto 10 ! else if (ipar(1).eq.5) then ! right preconditioner solve ! goto 10 ! else if (ipar(1).eq.6) then ! right preconditioner transposed solve ! goto 10 ! else if (ipar(1).eq.10) then ! call my own stopping test routine ! goto 10 ! else if (ipar(1).gt.0) then ! ipar(1) is an unspecified code ! else ! the iterative solver terminated with code = ipar(1) ! endif ! ! This segment of pseudo-code assumes the matrix is in CSR format, ! AMUX and ATMUX are two routines from the SPARSKIT MATVEC module. ! They perform matrix-vector multiplications for CSR matrices, ! where w(ipar(8)) is the first element of the input vectors to the ! two routines, and w(ipar(9)) is the first element of the output ! vectors from them. For simplicity, we did not show the name of ! the routine that performs the preconditioning operations or the ! convergence tests. !----------------------------------------------------------------------- subroutine bcgstab(n, rhs, sol, ipar, fpar, w) implicit none integer n, ipar(16) real*8 rhs(n), sol(n), fpar(16), w(n,8) !----------------------------------------------------------------------- ! BCGSTAB --- Bi Conjugate Gradient stabilized (BCGSTAB) ! This is an improved BCG routine. (1) no matrix transpose is ! involved. (2) the convergence is smoother. ! ! Algorithm: ! Initialization - r = b - A x, r0 = r, p = r, rho = (r0, r), ! Iterate - ! (1) v = A p ! (2) alpha = rho / (r0, v) ! (3) s = r - alpha v ! (4) t = A s ! (5) omega = (t, s) / (t, t) ! (6) x = x + alpha * p + omega * s ! (7) r = s - omega * t ! convergence test goes here ! (8) beta = rho, rho = (r0, r), beta = rho * alpha / (beta * omega) ! p = r + beta * (p - omega * v) ! ! in this routine, before successful return, the fpar's are ! fpar(3) == initial (preconditionied-)residual norm ! fpar(4) == target (preconditionied-)residual norm ! fpar(5) == current (preconditionied-)residual norm ! fpar(6) == current residual norm or error ! fpar(7) == current rho (rhok = ) ! fpar(8) == alpha ! fpar(9) == omega ! ! Usage of the work space W ! w(:, 1) = r0, the initial residual vector ! w(:, 2) = r, current residual vector ! w(:, 3) = s ! w(:, 4) = t ! w(:, 5) = v ! w(:, 6) = p ! w(:, 7) = tmp, used in preconditioning, etc. ! w(:, 8) = delta x, the correction to the answer is accumulated ! here, so that the right-preconditioning may be applied ! at the end !----------------------------------------------------------------------- ! external routines used ! real*8 ddot logical stopbis, brkdn external ddot, stopbis, brkdn ! real*8 one parameter(one=1.0D0) ! ! local variables ! integer i real*8 alpha,beta,rho,omega logical lp, rp save lp, rp ! ! where to go ! if (ipar(1).gt.0) then goto (10, 20, 40, 50, 60, 70, 80, 90, 100, 110) ipar(10) else if (ipar(1).lt.0) then goto 900 endif ! ! call the initialization routine ! call bisinit(ipar,fpar,8*n,1,lp,rp,w) if (ipar(1).lt.0) return ! ! perform a matvec to compute the initial residual ! ipar(1) = 1 ipar(8) = 1 ipar(9) = 1 + n do i = 1, n w(i,1) = sol(i) enddo ipar(10) = 1 return 10 ipar(7) = ipar(7) + 1 ipar(13) = ipar(13) + 1 do i = 1, n w(i,1) = rhs(i) - w(i,2) enddo fpar(11) = fpar(11) + n if (lp) then ipar(1) = 3 ipar(10) = 2 return endif ! 20 if (lp) then do i = 1, n w(i,1) = w(i,2) w(i,6) = w(i,2) enddo else do i = 1, n w(i,2) = w(i,1) w(i,6) = w(i,1) enddo endif ! fpar(7) = ddot(n,w,w) fpar(11) = fpar(11) + 2 * n fpar(5) = sqrt(fpar(7)) fpar(3) = fpar(5) if (abs(ipar(3)).eq.2) then fpar(4) = fpar(1) * sqrt(ddot(n,rhs,rhs)) + fpar(2) fpar(11) = fpar(11) + 2 * n else if (ipar(3).ne.999) then fpar(4) = fpar(1) * fpar(3) + fpar(2) endif if (ipar(3).ge.0) fpar(6) = fpar(5) if (ipar(3).ge.0 .and. fpar(5).le.fpar(4) .and. ipar(3).ne.999) then goto 900 endif ! ! beginning of the iterations ! ! Step (1), v = A p 30 if (rp) then ipar(1) = 5 ipar(8) = 5*n+1 if (lp) then ipar(9) = 4*n + 1 else ipar(9) = 6*n + 1 endif ipar(10) = 3 return endif ! 40 ipar(1) = 1 if (rp) then ipar(8) = ipar(9) else ipar(8) = 5*n+1 endif if (lp) then ipar(9) = 6*n + 1 else ipar(9) = 4*n + 1 endif ipar(10) = 4 return 50 if (lp) then ipar(1) = 3 ipar(8) = ipar(9) ipar(9) = 4*n + 1 ipar(10) = 5 return endif ! 60 ipar(7) = ipar(7) + 1 ! ! step (2) alpha = ddot(n,w(1,1),w(1,5)) fpar(11) = fpar(11) + 2 * n if (brkdn(alpha, ipar)) goto 900 alpha = fpar(7) / alpha fpar(8) = alpha ! ! step (3) do i = 1, n w(i,3) = w(i,2) - alpha * w(i,5) enddo fpar(11) = fpar(11) + 2 * n ! ! Step (4): the second matvec -- t = A s ! if (rp) then ipar(1) = 5 ipar(8) = n+n+1 if (lp) then ipar(9) = ipar(8)+n else ipar(9) = 6*n + 1 endif ipar(10) = 6 return endif ! 70 ipar(1) = 1 if (rp) then ipar(8) = ipar(9) else ipar(8) = n+n+1 endif if (lp) then ipar(9) = 6*n + 1 else ipar(9) = 3*n + 1 endif ipar(10) = 7 return 80 if (lp) then ipar(1) = 3 ipar(8) = ipar(9) ipar(9) = 3*n + 1 ipar(10) = 8 return endif 90 ipar(7) = ipar(7) + 1 ! ! step (5) omega = ddot(n,w(1,4),w(1,4)) fpar(11) = fpar(11) + n + n if (brkdn(omega,ipar)) goto 900 omega = ddot(n,w(1,4),w(1,3)) / omega fpar(11) = fpar(11) + n + n if (brkdn(omega,ipar)) goto 900 fpar(9) = omega alpha = fpar(8) ! ! step (6) and (7) do i = 1, n w(i,7) = alpha * w(i,6) + omega * w(i,3) w(i,8) = w(i,8) + w(i,7) w(i,2) = w(i,3) - omega * w(i,4) enddo fpar(11) = fpar(11) + 6 * n + 1 ! ! convergence test if (ipar(3).eq.999) then ipar(1) = 10 ipar(8) = 7*n + 1 ipar(9) = 6*n + 1 ipar(10) = 9 return endif if (stopbis(n,ipar,2,fpar,w(1,2),w(1,7),one)) goto 900 100 if (ipar(3).eq.999.and.ipar(11).eq.1) goto 900 ! ! step (8): computing new p and rho ! rho = fpar(7) fpar(7) = ddot(n,w(1,2),w(1,1)) omega = fpar(9) beta = fpar(7) * fpar(8) / (fpar(9) * rho) do i = 1, n w(i,6) = w(i,2) + beta * (w(i,6) - omega * w(i,5)) enddo fpar(11) = fpar(11) + 6 * n + 3 if (brkdn(fpar(7),ipar)) goto 900 ! ! end of an iteration ! goto 30 ! ! some clean up job to do ! 900 if (rp) then if (ipar(1).lt.0) ipar(12) = ipar(1) ipar(1) = 5 ipar(8) = 7*n + 1 ipar(9) = ipar(8) - n ipar(10) = 10 return endif 110 if (rp) then call tidycg(n,ipar,fpar,sol,w(1,7)) else call tidycg(n,ipar,fpar,sol,w(1,8)) endif ! return !-----end-of-bcgstab end !----------------------------------------------------------------------- subroutine implu(np,umm,beta,ypiv,u,permut,full) real*8 umm,beta,ypiv(*),u(*),x, xpiv logical full, perm, permut(*) integer np,k,npm1 !----------------------------------------------------------------------- ! performs implicitly one step of the lu factorization of a ! banded hessenberg matrix. !----------------------------------------------------------------------- if (np .le. 1) goto 12 npm1 = np - 1 ! ! -- perform previous step of the factorization- ! do 6 k=1,npm1 if (.not. permut(k)) goto 5 x=u(k) u(k) = u(k+1) u(k+1) = x 5 u(k+1) = u(k+1) - ypiv(k)*u(k) 6 continue !----------------------------------------------------------------------- ! now determine pivotal information to be used in the next call !----------------------------------------------------------------------- 12 umm = u(np) perm = (beta .gt. abs(umm)) if (.not. perm) goto 4 xpiv = umm / beta u(np) = beta goto 8 4 xpiv = beta/umm 8 permut(np) = perm ypiv(np) = xpiv if (.not. full) return ! shift everything up if full... do 7 k=1,npm1 ypiv(k) = ypiv(k+1) permut(k) = permut(k+1) 7 continue return !-----end-of-implu end !----------------------------------------------------------------------- subroutine uppdir(n,p,np,lbp,indp,y,u,usav,flops) implicit none integer :: k,np,n,npm1,j,ju,indp,lbp real*8 :: p(n,lbp), y(*), u(*), usav(*), x, flops !----------------------------------------------------------------------- ! updates the conjugate directions p given the upper part of the ! banded upper triangular matrix u. u contains the non zero ! elements of the column of the triangular matrix.. !----------------------------------------------------------------------- real*8 zero parameter(zero=0.0D0) ! npm1=np-1 if (np .le. 1) goto 12 j=indp ju = npm1 10 if (j .le. 0) j=lbp x = u(ju) /usav(j) if (x .eq. zero) goto 115 do 11 k=1,n y(k) = y(k) - x*p(k,j) 11 continue flops = flops + 2*n 115 j = j-1 ju = ju -1 if (ju .ge. 1) goto 10 12 indp = indp + 1 if (indp .gt. lbp) indp = 1 usav(indp) = u(np) do 13 k=1,n p(k,indp) = y(k) 13 continue return !----------------------------------------------------------------------- !-------end-of-uppdir--------------------------------------------------- end subroutine givens(x,y,c,s) implicit none real*8 :: x,y,c,s !----------------------------------------------------------------------- ! Given x and y, this subroutine generates a Givens' rotation c, s. ! And apply the rotation on (x,y) ==> (sqrt(x**2 + y**2), 0). ! (See P 202 of "matrix computation" by Golub and van Loan.) !----------------------------------------------------------------------- real*8 :: t,one,zero parameter (zero=0.0D0,one=1.0D0) ! if (x.eq.zero .and. y.eq.zero) then c = one s = zero else if (abs(y).gt.abs(x)) then t = x / y x = sqrt(one+t*t) s = sign(one / x, y) c = t*s else if (abs(y).le.abs(x)) then t = y / x y = sqrt(one+t*t) c = sign(one / y, x) s = t*c else ! ! X or Y must be an invalid floating-point number, set both to zero ! x = zero y = zero c = one s = zero endif x = abs(x*y) ! ! end of givens ! return end !-----end-of-givens !----------------------------------------------------------------------- logical function stopbis(n,ipar,mvpi,fpar,r,delx,sx) implicit none integer n,mvpi,ipar(16) real*8 fpar(16), r(n), delx(n), sx, ddot external ddot !----------------------------------------------------------------------- ! function for determining the stopping criteria. return value of ! true if the stopbis criteria is satisfied. !----------------------------------------------------------------------- if (ipar(11) .eq. 1) then stopbis = .true. else stopbis = .false. endif if (ipar(6).gt.0 .and. ipar(7).ge.ipar(6)) then ipar(1) = -1 stopbis = .true. endif if (stopbis) return ! ! computes errors ! fpar(5) = sqrt(ddot(n,r,r)) fpar(11) = fpar(11) + 2 * n if (ipar(3).lt.0) then ! ! compute the change in the solution vector ! fpar(6) = sx * sqrt(ddot(n,delx,delx)) fpar(11) = fpar(11) + 2 * n if (ipar(7).lt.mvpi+mvpi+1) then ! ! if this is the end of the first iteration, set fpar(3:4) ! fpar(3) = fpar(6) if (ipar(3).eq.-1) then fpar(4) = fpar(1) * fpar(3) + fpar(2) endif endif else fpar(6) = fpar(5) endif ! ! .. the test is struct this way so that when the value in fpar(6) ! is not a valid number, STOPBIS is set to .true. ! if (fpar(6).gt.fpar(4)) then stopbis = .false. ipar(11) = 0 else stopbis = .true. ipar(11) = 1 endif ! return end !-----end-of-stopbis !----------------------------------------------------------------------- subroutine tidycg(n,ipar,fpar,sol,delx) implicit none integer i,n,ipar(16) real*8 fpar(16),sol(n),delx(n) !----------------------------------------------------------------------- ! Some common operations required before terminating the CG routines !----------------------------------------------------------------------- real*8 zero parameter(zero=0.0D0) ! if (ipar(12).ne.0) then ipar(1) = ipar(12) else if (ipar(1).gt.0) then if ((ipar(3).eq.999 .and. ipar(11).eq.1) .or. & & fpar(6).le.fpar(4)) then ipar(1) = 0 else if (ipar(7).ge.ipar(6) .and. ipar(6).gt.0) then ipar(1) = -1 else ipar(1) = -10 endif endif if (fpar(3).gt.zero .and. fpar(6).gt.zero .and. ipar(7).gt.ipar(13)) then fpar(7) = log10(fpar(3) / fpar(6)) / dble(ipar(7)-ipar(13)) else fpar(7) = zero endif do i = 1, n sol(i) = sol(i) + delx(i) enddo return end !-----end-of-tidycg !----------------------------------------------------------------------- logical function brkdn(alpha, ipar) implicit none integer ipar(16) real*8 alpha, beta, zero, one parameter (zero=0.0D0, one=1.0D0) !----------------------------------------------------------------------- ! test whether alpha is zero or an abnormal number, if yes, ! this routine will return .true. ! ! If alpha == 0, ipar(1) = -3, ! if alpha is an abnormal number, ipar(1) = -9. !----------------------------------------------------------------------- brkdn = .false. if (alpha.gt.zero) then beta = one / alpha if (.not. beta.gt.zero) then brkdn = .true. ipar(1) = -9 endif else if (alpha.lt.zero) then beta = one / alpha if (.not. beta.lt.zero) then brkdn = .true. ipar(1) = -9 endif else if (alpha.eq.zero) then brkdn = .true. ipar(1) = -3 else brkdn = .true. ipar(1) = -9 endif return end !-----end-of-brkdn !----------------------------------------------------------------------- subroutine bisinit(ipar,fpar,wksize,dsc,lp,rp,wk) implicit none integer i,ipar(16),wksize,dsc logical lp,rp real*8 fpar(16),wk(*) !----------------------------------------------------------------------- ! some common initializations for the iterative solvers !----------------------------------------------------------------------- real*8 zero, one parameter(zero=0.0D0, one=1.0D0) ! ! ipar(1) = -2 inidcate that there are not enough space in the work ! array ! if (ipar(4).lt.wksize) then ipar(1) = -2 ipar(4) = wksize return endif ! if (ipar(2).gt.2) then lp = .true. rp = .true. else if (ipar(2).eq.2) then lp = .false. rp = .true. else if (ipar(2).eq.1) then lp = .true. rp = .false. else lp = .false. rp = .false. endif if (ipar(3).eq.0) ipar(3) = dsc ! .. clear the ipar elements used ipar(7) = 0 ipar(8) = 0 ipar(9) = 0 ipar(10) = 0 ipar(11) = 0 ipar(12) = 0 ipar(13) = 0 ! ! fpar(1) must be between (0, 1), fpar(2) must be positive, ! fpar(1) and fpar(2) can NOT both be zero ! Normally return ipar(1) = -4 to indicate any of above error ! if (fpar(1).lt.zero .or. fpar(1).ge.one .or. fpar(2).lt.zero .or. & & (fpar(1).eq.zero .and. fpar(2).eq.zero)) then if (ipar(1).eq.0) then ipar(1) = -4 return else fpar(1) = 1.0D-6 fpar(2) = 1.0D-16 endif endif ! .. clear the fpar elements do i = 3, 10 fpar(i) = zero enddo if (fpar(11).lt.zero) fpar(11) = zero ! .. clear the used portion of the work array to zero do i = 1, wksize wk(i) = zero enddo ! return !-----end-of-bisinit end !----------------------------------------------------------------------- subroutine mgsro(full,lda,n,m,ind,ops,vec,hh,ierr) implicit none logical full integer lda,m,n,ind,ierr real*8 ops,hh(m),vec(lda,m) !----------------------------------------------------------------------- ! MGSRO -- Modified Gram-Schmidt procedure with Selective Re- ! Orthogonalization ! The ind'th vector of VEC is orthogonalized against the rest of ! the vectors. ! ! The test for performing re-orthogonalization is performed for ! each indivadual vectors. If the cosine between the two vectors ! is greater than 0.99 (REORTH = 0.99**2), re-orthogonalization is ! performed. The norm of the 'new' vector is kept in variable NRM0, ! and updated after operating with each vector. ! ! full -- .ture. if it is necessary to orthogonalize the ind'th ! against all the vectors vec(:,1:ind-1), vec(:,ind+2:m) ! .false. only orthogonalize againt vec(:,1:ind-1) ! lda -- the leading dimension of VEC ! n -- length of the vector in VEC ! m -- number of vectors can be stored in VEC ! ind -- index to the vector to be changed ! ops -- operation counts ! vec -- vector of LDA X M storing the vectors ! hh -- coefficient of the orthogonalization ! ierr -- error code ! 0 : successful return ! -1: zero input vector ! -2: input vector contains abnormal numbers ! -3: input vector is a linear combination of others ! ! External routines used: real*8 ddot !----------------------------------------------------------------------- integer i,k real*8 nrm0, nrm1, fct, thr, ddot, zero, one, reorth parameter (zero=0.0D0, one=1.0D0, reorth=0.98D0) external ddot ! ! compute the norm of the input vector ! nrm0 = ddot(n,vec(1,ind),vec(1,ind)) ops = ops + n + n thr = nrm0 * reorth if (nrm0.le.zero) then ierr = - 1 return else if (nrm0.gt.zero .and. one/nrm0.gt.zero) then ierr = 0 else ierr = -2 return endif ! ! Modified Gram-Schmidt loop ! if (full) then do 40 i = ind+1, m fct = ddot(n,vec(1,ind),vec(1,i)) hh(i) = fct do 20 k = 1, n vec(k,ind) = vec(k,ind) - fct * vec(k,i) 20 continue ops = ops + 4 * n + 2 if (fct*fct.gt.thr) then fct = ddot(n,vec(1,ind),vec(1,i)) hh(i) = hh(i) + fct do 30 k = 1, n vec(k,ind) = vec(k,ind) - fct * vec(k,i) 30 continue ops = ops + 4*n + 1 endif nrm0 = nrm0 - hh(i) * hh(i) if (nrm0.lt.zero) nrm0 = zero thr = nrm0 * reorth 40 continue endif ! do 70 i = 1, ind-1 fct = ddot(n,vec(1,ind),vec(1,i)) hh(i) = fct do 50 k = 1, n vec(k,ind) = vec(k,ind) - fct * vec(k,i) 50 continue ops = ops + 4 * n + 2 if (fct*fct.gt.thr) then fct = ddot(n,vec(1,ind),vec(1,i)) hh(i) = hh(i) + fct do 60 k = 1, n vec(k,ind) = vec(k,ind) - fct * vec(k,i) 60 continue ops = ops + 4*n + 1 endif nrm0 = nrm0 - hh(i) * hh(i) if (nrm0.lt.zero) nrm0 = zero thr = nrm0 * reorth 70 continue ! ! test the resulting vector ! nrm1 = sqrt(ddot(n,vec(1,ind),vec(1,ind))) ops = ops + n + n hh(ind) = nrm1 ! statement label 75 if (nrm1.le.zero) then ierr = -3 return endif ! ! scale the resulting vector ! fct = one / nrm1 do 80 k = 1, n vec(k,ind) = vec(k,ind) * fct 80 continue ops = ops + n + 1 ! ! normal return ! ierr = 0 return ! end surbotine mgsro end !----------------------------------------------------------------------c ! S P A R S K I T c !----------------------------------------------------------------------c ! BASIC MATRIX-VECTOR OPERATIONS - MATVEC MODULE c ! Matrix-vector Mulitiplications and Triang. Solves c !----------------------------------------------------------------------c ! contents: (as of Nov 18, 1991) c !---------- c ! 1) Matrix-vector products: c !--------------------------- c ! amux : A times a vector. Compressed Sparse Row (CSR) format. c ! amuxms: A times a vector. Modified Compress Sparse Row format. c ! atmux : Transp(A) times a vector. CSR format. c ! atmuxr: Transp(A) times a vector. CSR format. A rectangular. c ! amuxe : A times a vector. Ellpack/Itpack (ELL) format. c ! amuxd : A times a vector. Diagonal (DIA) format. c ! amuxj : A times a vector. Jagged Diagonal (JAD) format. c ! vbrmv : Sparse matrix-full vector product, in VBR format c ! c ! 2) Triangular system solutions: c !------------------------------- c ! lsol : Unit Lower Triang. solve. Compressed Sparse Row (CSR) format.c ! ldsol : Lower Triang. solve. Modified Sparse Row (MSR) format. c ! lsolc : Unit Lower Triang. solve. Comp. Sparse Column (CSC) format. c ! ldsolc: Lower Triang. solve. Modified Sparse Column (MSC) format. c ! ldsoll: Lower Triang. solve with level scheduling. MSR format. c ! usol : Unit Upper Triang. solve. Compressed Sparse Row (CSR) format.c ! udsol : Upper Triang. solve. Modified Sparse Row (MSR) format. c ! usolc : Unit Upper Triang. solve. Comp. Sparse Column (CSC) format. c ! udsolc: Upper Triang. solve. Modified Sparse Column (MSC) format. c !----------------------------------------------------------------------c ! 1) M A T R I X B Y V E C T O R P R O D U C T S c !----------------------------------------------------------------------c subroutine amux (n, x, y, a,ja,ia) real*8 x(*), y(*), a(*) integer n, ja(*), ia(*) !----------------------------------------------------------------------- ! A times a vector !----------------------------------------------------------------------- ! multiplies a matrix by a vector using the dot product form ! Matrix A is stored in compressed sparse row storage. ! ! on entry: !---------- ! n = row dimension of A ! x = real array of length equal to the column dimension of ! the A matrix. ! a, ja, ! ia = input matrix in compressed sparse row format. ! ! on return: !----------- ! y = real array of length n, containing the product y=Ax ! !----------------------------------------------------------------------- ! local variables ! real*8 t integer i, k !----------------------------------------------------------------------- do 100 i = 1,n ! ! compute the inner product of row i with vector x ! t = 0.0d0 do 99 k=ia(i), ia(i+1)-1 t = t + a(k)*x(ja(k)) 99 continue ! ! store result in y(i) ! y(i) = t 100 continue ! return !---------end-of-amux--------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine amuxms (n, x, y, a,ja) real*8 x(*), y(*), a(*) integer n, ja(*) !----------------------------------------------------------------------- ! A times a vector in MSR format !----------------------------------------------------------------------- ! multiplies a matrix by a vector using the dot product form ! Matrix A is stored in Modified Sparse Row storage. ! ! on entry: !---------- ! n = row dimension of A ! x = real array of length equal to the column dimension of ! the A matrix. ! a, ja,= input matrix in modified compressed sparse row format. ! ! on return: !----------- ! y = real array of length n, containing the product y=Ax ! !----------------------------------------------------------------------- ! local variables ! integer i, k !----------------------------------------------------------------------- do 10 i=1, n y(i) = a(i)*x(i) 10 continue do 100 i = 1,n ! ! compute the inner product of row i with vector x ! do 99 k=ja(i), ja(i+1)-1 y(i) = y(i) + a(k) *x(ja(k)) 99 continue 100 continue ! return !---------end-of-amuxm-------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine atmux (n, x, y, a, ja, ia) real*8 x(*), y(*), a(*) integer n, ia(*), ja(*) !----------------------------------------------------------------------- ! transp( A ) times a vector !----------------------------------------------------------------------- ! multiplies the transpose of a matrix by a vector when the original ! matrix is stored in compressed sparse row storage. Can also be ! viewed as the product of a matrix by a vector when the original ! matrix is stored in the compressed sparse column format. !----------------------------------------------------------------------- ! ! on entry: !---------- ! n = row dimension of A ! x = real array of length equal to the column dimension of ! the A matrix. ! a, ja, ! ia = input matrix in compressed sparse row format. ! ! on return: !----------- ! y = real array of length n, containing the product y=transp(A)*x ! !----------------------------------------------------------------------- ! local variables ! integer i, k !----------------------------------------------------------------------- ! ! zero out output vector ! do 1 i=1,n y(i) = 0.0 1 continue ! ! loop over the rows ! do 100 i = 1,n do 99 k=ia(i), ia(i+1)-1 y(ja(k)) = y(ja(k)) + x(i)*a(k) 99 continue 100 continue ! return !-------------end-of-atmux---------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine atmuxr (m, n, x, y, a, ja, ia) real*8 x(*), y(*), a(*) integer m, n, ia(*), ja(*) !----------------------------------------------------------------------- ! transp( A ) times a vector, A can be rectangular !----------------------------------------------------------------------- ! See also atmux. The essential difference is how the solution vector ! is initially zeroed. If using this to multiply rectangular CSC ! matrices by a vector, m number of rows, n is number of columns. !----------------------------------------------------------------------- ! ! on entry: !---------- ! m = column dimension of A ! n = row dimension of A ! x = real array of length equal to the column dimension of ! the A matrix. ! a, ja, ! ia = input matrix in compressed sparse row format. ! ! on return: !----------- ! y = real array of length n, containing the product y=transp(A)*x ! !----------------------------------------------------------------------- ! local variables ! integer i, k !----------------------------------------------------------------------- ! ! zero out output vector ! do 1 i=1,m y(i) = 0.0 1 continue ! ! loop over the rows ! do 100 i = 1,n do 99 k=ia(i), ia(i+1)-1 y(ja(k)) = y(ja(k)) + x(i)*a(k) 99 continue 100 continue ! return !-------------end-of-atmuxr--------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine amuxe (n,x,y,na,ncol,a,ja) implicit none integer :: n, na, ncol, ja(na,*) real*8 :: x(n), y(n), a(na,*) !----------------------------------------------------------------------- ! A times a vector in Ellpack Itpack format (ELL) !----------------------------------------------------------------------- ! multiplies a matrix by a vector when the original matrix is stored ! in the ellpack-itpack sparse format. !----------------------------------------------------------------------- ! ! on entry: !---------- ! n = row dimension of A ! x = real array of length equal to the column dimension of ! the A matrix. ! na = integer. The first dimension of arrays a and ja ! as declared by the calling program. ! ncol = integer. The number of active columns in array a. ! (i.e., the number of generalized diagonals in matrix.) ! a, ja = the real and integer arrays of the itpack format ! (a(i,k),k=1,ncol contains the elements of row i in matrix ! ja(i,k),k=1,ncol contains their column numbers) ! ! on return: !----------- ! y = real array of length n, containing the product y=y=A*x ! !----------------------------------------------------------------------- ! local variables ! integer i, j !----------------------------------------------------------------------- do 1 i=1, n y(i) = 0.0 1 continue do 10 j=1,ncol do 25 i = 1,n y(i) = y(i)+a(i,j)*x(ja(i,j)) 25 continue 10 continue ! return !--------end-of-amuxe--------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine amuxd (n,x,y,diag,ndiag,idiag,ioff) integer n, ndiag, idiag, ioff(idiag) real*8 x(n), y(n), diag(ndiag,idiag) !----------------------------------------------------------------------- ! A times a vector in Diagonal storage format (DIA) !----------------------------------------------------------------------- ! multiplies a matrix by a vector when the original matrix is stored ! in the diagonal storage format. !----------------------------------------------------------------------- ! ! on entry: !---------- ! n = row dimension of A ! x = real array of length equal to the column dimension of ! the A matrix. ! ndiag = integer. The first dimension of array adiag as declared in ! the calling program. ! idiag = integer. The number of diagonals in the matrix. ! diag = real array containing the diagonals stored of A. ! idiag = number of diagonals in matrix. ! diag = real array of size (ndiag x idiag) containing the diagonals ! ! ioff = integer array of length idiag, containing the offsets of the ! diagonals of the matrix: ! diag(i,k) contains the element a(i,i+ioff(k)) of the matrix. ! ! on return: !----------- ! y = real array of length n, containing the product y=A*x ! !----------------------------------------------------------------------- ! local variables ! integer j, k, io, i1, i2 !----------------------------------------------------------------------- do 1 j=1, n y(j) = 0.0d0 1 continue do 10 j=1, idiag io = ioff(j) i1 = max0(1,1-io) i2 = min0(n,n-io) do 9 k=i1, i2 y(k) = y(k)+diag(k,j)*x(k+io) 9 continue 10 continue ! return !----------end-of-amuxd------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine amuxj (n, x, y, jdiag, a, ja, ia) integer n, jdiag, ja(*), ia(*) real*8 x(n), y(n), a(*) !----------------------------------------------------------------------- ! A times a vector in Jagged-Diagonal storage format (JAD) !----------------------------------------------------------------------- ! multiplies a matrix by a vector when the original matrix is stored ! in the jagged diagonal storage format. !----------------------------------------------------------------------- ! ! on entry: !---------- ! n = row dimension of A ! x = real array of length equal to the column dimension of ! the A matrix. ! jdiag = integer. The number of jadded-diagonals in the data-structure. ! a = real array containing the jadded diagonals of A stored ! in succession (in decreasing lengths) ! j = integer array containing the colum indices of the ! corresponding elements in a. ! ia = integer array containing the lengths of the jagged diagonals ! ! on return: !----------- ! y = real array of length n, containing the product y=A*x ! ! Note: !------- ! Permutation related to the JAD format is not performed. ! this can be done by: ! call permvec (n,y,y,iperm) ! after the call to amuxj, where iperm is the permutation produced ! by csrjad. !----------------------------------------------------------------------- ! local variables ! integer i, ii, k1, ilen, j !----------------------------------------------------------------------- do 1 i=1, n y(i) = 0.0d0 1 continue do 70 ii=1, jdiag k1 = ia(ii)-1 ilen = ia(ii+1)-k1-1 do 60 j=1,ilen y(j)= y(j)+a(k1+j)*x(ja(k1+j)) 60 continue 70 continue ! return !----------end-of-amuxj------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine vbrmv(nr, nc, ia, ja, ka, a, kvstr, kvstc, x, b) !----------------------------------------------------------------------- integer nr, nc, ia(nr+1), ja(*), ka(*), kvstr(nr+1), kvstc(*) real*8 a(*), x(*), b(*) !----------------------------------------------------------------------- ! Sparse matrix-full vector product, in VBR format. !----------------------------------------------------------------------- ! On entry: !-------------- ! nr, nc = number of block rows and columns in matrix A ! ia,ja,ka,a,kvstr,kvstc = matrix A in variable block row format ! x = multiplier vector in full format ! ! On return: !--------------- ! b = product of matrix A times vector x in full format ! ! Algorithm: !--------------- ! Perform multiplication by traversing a in order. ! !----------------------------------------------------------------------- !-----local variables integer n, i, j, ii, jj, k, istart, istop real*8 xjj !--------------------------------- n = kvstc(nc+1)-1 do i = 1, n b(i) = 0.d0 enddo !--------------------------------- k = 1 do i = 1, nr istart = kvstr(i) istop = kvstr(i+1)-1 do j = ia(i), ia(i+1)-1 do jj = kvstc(ja(j)), kvstc(ja(j)+1)-1 xjj = x(jj) do ii = istart, istop b(ii) = b(ii) + xjj*a(k) k = k + 1 enddo enddo enddo enddo !--------------------------------- return end !----------------------------------------------------------------------- !----------------------end-of-vbrmv------------------------------------- !----------------------------------------------------------------------- !----------------------------------------------------------------------c ! 2) T R I A N G U L A R S Y S T E M S O L U T I O N S c !----------------------------------------------------------------------c subroutine lsol (n,x,y,al,jal,ial) integer n, jal(*),ial(n+1) real*8 x(n), y(n), al(*) !----------------------------------------------------------------------- ! solves L x = y ; L = lower unit triang. / CSR format !----------------------------------------------------------------------- ! solves a unit lower triangular system by standard (sequential ) ! forward elimination - matrix stored in CSR format. !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real array containg the right side. ! ! al, ! jal, ! ial, = Lower triangular matrix stored in compressed sparse row ! format. ! ! On return: !----------- ! x = The solution of L x = y. !-------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- x(1) = y(1) do 150 k = 2, n t = y(k) do 100 j = ial(k), ial(k+1)-1 t = t-al(j)*x(jal(j)) 100 continue x(k) = t 150 continue ! return !----------end-of-lsol-------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine ldsol (n,x,y,al,jal) integer n, jal(*) real*8 x(n), y(n), al(*) !----------------------------------------------------------------------- ! Solves L x = y L = triangular. MSR format !----------------------------------------------------------------------- ! solves a (non-unit) lower triangular system by standard (sequential) ! forward elimination - matrix stored in MSR format ! with diagonal elements already inverted (otherwise do inversion, ! al(1:n) = 1.0/al(1:n), before calling ldsol). !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real array containg the right hand side. ! ! al, ! jal, = Lower triangular matrix stored in Modified Sparse Row ! format. ! ! On return: !----------- ! x = The solution of L x = y . !-------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- x(1) = y(1)*al(1) do 150 k = 2, n t = y(k) do 100 j = jal(k), jal(k+1)-1 t = t - al(j)*x(jal(j)) 100 continue x(k) = al(k)*t 150 continue return !----------end-of-ldsol------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine lsolc (n,x,y,al,jal,ial) integer n, jal(*),ial(*) real*8 x(n), y(n), al(*) !----------------------------------------------------------------------- ! SOLVES L x = y ; where L = unit lower trang. CSC format !----------------------------------------------------------------------- ! solves a unit lower triangular system by standard (sequential ) ! forward elimination - matrix stored in CSC format. !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real*8 array containg the right side. ! ! al, ! jal, ! ial, = Lower triangular matrix stored in compressed sparse column ! format. ! ! On return: !----------- ! x = The solution of L x = y. !----------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- do 140 k=1,n x(k) = y(k) 140 continue do 150 k = 1, n-1 t = x(k) do 100 j = ial(k), ial(k+1)-1 x(jal(j)) = x(jal(j)) - t*al(j) 100 continue 150 continue ! return !----------end-of-lsolc------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine ldsolc (n,x,y,al,jal) integer n, jal(*) real*8 x(n), y(n), al(*) !----------------------------------------------------------------------- ! Solves L x = y ; L = nonunit Low. Triang. MSC format !----------------------------------------------------------------------- ! solves a (non-unit) lower triangular system by standard (sequential) ! forward elimination - matrix stored in Modified Sparse Column format ! with diagonal elements already inverted (otherwise do inversion, ! al(1:n) = 1.0/al(1:n), before calling ldsol). !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real array containg the right hand side. ! ! al, ! jal, ! ial, = Lower triangular matrix stored in Modified Sparse Column ! format. ! ! On return: !----------- ! x = The solution of L x = y . !-------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- do 140 k=1,n x(k) = y(k) 140 continue do 150 k = 1, n x(k) = x(k)*al(k) t = x(k) do 100 j = jal(k), jal(k+1)-1 x(jal(j)) = x(jal(j)) - t*al(j) 100 continue 150 continue ! return !----------end-of-lsolc------------------------------------------------ !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine ldsoll (n,x,y,al,jal,nlev,lev,ilev) integer n, nlev, jal(*), ilev(nlev+1), lev(n) real*8 x(n), y(n), al(*) !----------------------------------------------------------------------- ! Solves L x = y L = triangular. Uses LEVEL SCHEDULING/MSR format !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real array containg the right hand side. ! ! al, ! jal, = Lower triangular matrix stored in Modified Sparse Row ! format. ! nlev = number of levels in matrix ! lev = integer array of length n, containing the permutation ! that defines the levels in the level scheduling ordering. ! ilev = pointer to beginning of levels in lev. ! the numbers lev(i) to lev(i+1)-1 contain the row numbers ! that belong to level number i, in the level shcheduling ! ordering. ! ! On return: !----------- ! x = The solution of L x = y . !-------------------------------------------------------------------- integer ii, jrow, i, k real*8 t ! ! outer loop goes through the levels. (SEQUENTIAL loop) ! do 150 ii=1, nlev ! ! next loop executes within the same level. PARALLEL loop ! do 100 i=ilev(ii), ilev(ii+1)-1 jrow = lev(i) ! ! compute inner product of row jrow with x ! t = y(jrow) do 130 k=jal(jrow), jal(jrow+1)-1 t = t - al(k)*x(jal(k)) 130 continue x(jrow) = t*al(jrow) 100 continue 150 continue return !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine usol (n,x,y,au,jau,iau) integer n, jau(*),iau(n+1) real*8 x(n), y(n), au(*) !----------------------------------------------------------------------- ! Solves U x = y U = unit upper triangular. !----------------------------------------------------------------------- ! solves a unit upper triangular system by standard (sequential ) ! backward elimination - matrix stored in CSR format. !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real array containg the right side. ! ! au, ! jau, ! iau, = Lower triangular matrix stored in compressed sparse row ! format. ! ! On return: !----------- ! x = The solution of U x = y . !-------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- x(n) = y(n) do 150 k = n-1,1,-1 t = y(k) do 100 j = iau(k), iau(k+1)-1 t = t - au(j)*x(jau(j)) 100 continue x(k) = t 150 continue ! return !----------end-of-usol-------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine udsol (n,x,y,au,jau) integer n, jau(*) real*8 x(n), y(n),au(*) !----------------------------------------------------------------------- ! Solves U x = y ; U = upper triangular in MSR format !----------------------------------------------------------------------- ! solves a non-unit upper triangular matrix by standard (sequential ) ! backward elimination - matrix stored in MSR format. ! with diagonal elements already inverted (otherwise do inversion, ! au(1:n) = 1.0/au(1:n), before calling). !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real array containg the right side. ! ! au, ! jau, = Lower triangular matrix stored in modified sparse row ! format. ! ! On return: !----------- ! x = The solution of U x = y . !-------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- x(n) = y(n)*au(n) do 150 k = n-1,1,-1 t = y(k) do 100 j = jau(k), jau(k+1)-1 t = t - au(j)*x(jau(j)) 100 continue x(k) = au(k)*t 150 continue ! return !----------end-of-udsol------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine usolc (n,x,y,au,jau,iau) real*8 x(*), y(*), au(*) integer n, jau(*),iau(*) !----------------------------------------------------------------------- ! SOUVES U x = y ; where U = unit upper trang. CSC format !----------------------------------------------------------------------- ! solves a unit upper triangular system by standard (sequential ) ! forward elimination - matrix stored in CSC format. !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real*8 array containg the right side. ! ! au, ! jau, ! iau, = Uower triangular matrix stored in compressed sparse column ! format. ! ! On return: !----------- ! x = The solution of U x = y. !----------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- do 140 k=1,n x(k) = y(k) 140 continue do 150 k = n,1,-1 t = x(k) do 100 j = iau(k), iau(k+1)-1 x(jau(j)) = x(jau(j)) - t*au(j) 100 continue 150 continue ! return !----------end-of-usolc------------------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine udsolc (n,x,y,au,jau) integer n, jau(*) real*8 x(n), y(n), au(*) !----------------------------------------------------------------------- ! Solves U x = y ; U = nonunit Up. Triang. MSC format !----------------------------------------------------------------------- ! solves a (non-unit) upper triangular system by standard (sequential) ! forward elimination - matrix stored in Modified Sparse Column format ! with diagonal elements already inverted (otherwise do inversion, ! auuuul(1:n) = 1.0/au(1:n), before calling ldsol). !----------------------------------------------------------------------- ! ! On entry: !---------- ! n = integer. dimension of problem. ! y = real*8 array containg the right hand side. ! ! au, ! jau, = Upper triangular matrix stored in Modified Sparse Column ! format. ! ! On return: !----------- ! x = The solution of U x = y . !-------------------------------------------------------------------- ! local variables ! integer k, j real*8 t !----------------------------------------------------------------------- do 140 k=1,n x(k) = y(k) 140 continue do 150 k = n,1,-1 x(k) = x(k)*au(k) t = x(k) do 100 j = jau(k), jau(k+1)-1 x(jau(j)) = x(jau(j)) - t*au(j) 100 continue 150 continue ! return !----------end-of-udsolc------------------------------------------------ !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine lusol(n, y, x, alu, jlu, ju) implicit none integer :: n, jlu(*), ju(*) real*8 :: x(n), y(n), alu(*) !----------------------------------------------------------------------- integer :: i,k ! ! forward solve ! do 40 i = 1, n x(i) = y(i) do 41 k=jlu(i),ju(i)-1 x(i) = x(i) - alu(k)* x(jlu(k)) 41 continue 40 continue do 90 i = n, 1, -1 do 91 k=ju(i),jlu(i+1)-1 x(i) = x(i) - alu(k)*x(jlu(k)) 91 continue x(i) = alu(i)*x(i) 90 continue ! return !----------------end of lusol ------------------------------------------ end !----------------------------------------------------------------------- subroutine lutsol(n, y, x, alu, jlu, ju) implicit none integer :: n, jlu(*), ju(*) real*8 :: x(n), y(n), alu(*) !----------------------------------------------------------------------- ! local variables ! integer :: i,k ! do 10 i = 1, n x(i) = y(i) 10 continue ! ! forward solve (with U^T) ! do 20 i = 1, n x(i) = x(i) * alu(i) do 30 k=ju(i),jlu(i+1)-1 x(jlu(k)) = x(jlu(k)) - alu(k)* x(i) 30 continue 20 continue ! ! backward solve (with L^T) ! do 40 i = n, 1, -1 do 50 k=jlu(i),ju(i)-1 x(jlu(k)) = x(jlu(k)) - alu(k)*x(i) 50 continue 40 continue ! return !----------------end of lutsol ----------------------------------------- !----------------------------------------------------------------------- end !----------------------------------------------------------------------- subroutine qsplit(a,ind,n,ncut) implicit none integer :: n, ind(n), ncut real*8 :: a(n) !----------------------------------------------------------------------- ! does a quick-sort split of a real array. ! on input a(1:n). is a real array ! on output a(1:n) is permuted such that its elements satisfy: ! ! abs(a(i)) .ge. abs(a(ncut)) for i .lt. ncut and ! abs(a(i)) .le. abs(a(ncut)) for i .gt. ncut ! ! ind(1:n) is an integer array which permuted in the same way as a(*). !----------------------------------------------------------------------- real*8 :: tmp, abskey integer :: itmp, first, last, j, mid !----- first = 1 last = n if (ncut .lt. first .or. ncut .gt. last) return ! ! outer loop -- while mid .ne. ncut do ! 1 mid = first abskey = abs(a(mid)) do 2 j=first+1, last if (abs(a(j)) .gt. abskey) then mid = mid+1 ! interchange tmp = a(mid) itmp = ind(mid) a(mid) = a(j) ind(mid) = ind(j) a(j) = tmp ind(j) = itmp endif 2 continue ! ! interchange ! tmp = a(mid) a(mid) = a(first) a(first) = tmp ! itmp = ind(mid) ind(mid) = ind(first) ind(first) = itmp ! ! test for while loop ! if (mid .eq. ncut) return if (mid .gt. ncut) then last = mid-1 else first = mid+1 endif goto 1 !----------------end-of-qsplit------------------------------------------ !----------------------------------------------------------------------- end subroutine runrc(n,rhs,sol,ipar,fpar,wk,guess,a,ja,ia,au,jau,ju,solver) implicit none integer n,ipar(16),ia(n+1),ja(*),ju(*),jau(*) real*8 fpar(16),rhs(n),sol(n),guess(n),wk(*),a(*),au(*) external solver !----------------------------------------------------------------------- ! the actual tester. It starts the iterative linear system solvers ! with a initial guess suppied by the user. ! ! The structure {au, jau, ju} is assumed to have the output from ! the ILU* routines in ilut.f. ! !----------------------------------------------------------------------- ! local variables ! integer :: i, its ! real :: dtime, dt(2), time ! external dtime save its ! ! ipar(2) can be 0, 1, 2, please don't use 3 ! if (ipar(2).gt.2) then WRITE(*,*) 'I can not do both left and right preconditioning.' return endif its = 0 ! do i = 1, n sol(i) = guess(i) enddo ! ipar(1) = 0 ! time = dtime(dt) 10 call solver(n,rhs,sol,ipar,fpar,wk) if (ipar(7).ne.its) then its = ipar(7) endif if (ipar(1).eq.1) then call amux(n, wk(ipar(8)), wk(ipar(9)), a, ja, ia) goto 10 else if (ipar(1).eq.2) then call atmux(n, wk(ipar(8)), wk(ipar(9)), a, ja, ia) goto 10 else if (ipar(1).eq.3 .or. ipar(1).eq.5) then call lusol(n,wk(ipar(8)),wk(ipar(9)),au,jau,ju) goto 10 else if (ipar(1).eq.4 .or. ipar(1).eq.6) then call lutsol(n,wk(ipar(8)),wk(ipar(9)),au,jau,ju) goto 10 else if (ipar(1).le.0) then if (ipar(1).eq.0) then ! WRITE(*,*) 'Iterative sovler has satisfied convergence test.' else if (ipar(1).eq.-1) then WRITE(*,*) 'Iterative solver has iterated too many times.' else if (ipar(1).eq.-2) then WRITE(*,*) 'Iterative solver was not given enough work space.' WRITE(*,*) 'The work space should at least have ', ipar(4), & & ' elements.' else if (ipar(1).eq.-3) then WRITE(*,*) 'Iterative sovler is facing a break-down.' else WRITE(*,*) 'Iterative solver terminated. code =', ipar(1) endif endif end !-----end-of-runrc !----------------------------------------------------------------------c ! S P A R S K I T c !----------------------------------------------------------------------c ! ITERATIVE SOLVERS MODULE c !----------------------------------------------------------------------c ! This Version Dated: August 13, 1996. Warning: meaning of some c ! ============ arguments have changed w.r.t. earlier versions. Some c ! Calling sequences may also have changed c ! subroutine ilut(n,a,ja,ia,lfil,droptol,alu,jlu,ju,iwk,w,jw,ierr) !----------------------------------------------------------------------- implicit none integer n real*8 a(*),alu(*),w(n+1),droptol integer ja(*),ia(n+1),jlu(*),ju(n),jw(2*n),lfil,iwk,ierr !----------------------------------------------------------------------* ! *** ILUT preconditioner *** * ! incomplete LU factorization with dual truncation mechanism * !----------------------------------------------------------------------* ! Author: Yousef Saad *May, 5, 1990, Latest revision, August 1996 * !----------------------------------------------------------------------* ! PARAMETERS !----------- ! ! on entry: !========== ! n = integer. The row dimension of the matrix A. The matrix ! ! a,ja,ia = matrix stored in Compressed Sparse Row format. ! ! lfil = integer. The fill-in parameter. Each row of L and each row ! of U will have a maximum of lfil elements (excluding the ! diagonal element). lfil must be .ge. 0. ! ** WARNING: THE MEANING OF LFIL HAS CHANGED WITH RESPECT TO ! EARLIER VERSIONS. ! ! droptol = real*8. Sets the threshold for dropping small terms in the ! factorization. See below for details on dropping strategy. ! ! iwk = integer. The lengths of arrays alu and jlu. If the arrays ! are not big enough to store the ILU factorizations, ilut ! will stop with an error message. ! ! On return: !=========== ! ! alu,jlu = matrix stored in Modified Sparse Row (MSR) format containing ! the L and U factors together. The diagonal (stored in ! alu(1:n) ) is inverted. Each i-th row of the alu,jlu matrix ! contains the i-th row of L (excluding the diagonal entry=1) ! followed by the i-th row of U. ! ! ju = integer array of length n containing the pointers to ! the beginning of each row of U in the matrix alu,jlu. ! ! ierr = integer. Error message with the following meaning. ! ierr = 0 --> successful return. ! ierr .gt. 0 --> zero pivot encountered at step number ierr. ! ierr = -1 --> Error. input matrix may be wrong. ! (The elimination process has generated a ! row in L or U whose length is .gt. n.) ! ierr = -2 --> The matrix L overflows the array al. ! ierr = -3 --> The matrix U overflows the array alu. ! ierr = -4 --> Illegal value for lfil. ! ierr = -5 --> zero row encountered. ! ! work arrays: !============= ! jw = integer work array of length 2*n. ! w = real work array of length n+1. ! !---------------------------------------------------------------------- ! w, ju (1:n) store the working array [1:ii-1 = L-part, ii:n = u] ! jw(n+1:2n) stores nonzero indicators ! ! Notes: ! ------ ! The diagonal elements of the input matrix must be nonzero (at least ! 'structurally'). ! !----------------------------------------------------------------------* !---- Dual drop strategy works as follows. * ! * ! 1) Theresholding in L and U as set by droptol. Any element whose * ! magnitude is less than some tolerance (relative to the abs * ! value of diagonal element in u) is dropped. * ! * ! 2) Keeping only the largest lfil elements in the i-th row of L * ! and the largest lfil elements in the i-th row of U (excluding * ! diagonal elements). * ! * ! Flexibility: one can use droptol=0 to get a strategy based on * ! keeping the largest elements in each row of L and U. Taking * ! droptol .ne. 0 but lfil=n will give the usual threshold strategy * ! (however, fill-in is then mpredictible). * !----------------------------------------------------------------------* ! locals integer ju0,k,j1,j2,j,ii,i,lenl,lenu,jj,jrow,jpos,lenn real*8 tnorm, t, abs, s, fact if (lfil .lt. 0) goto 998 !----------------------------------------------------------------------- ! initialize ju0 (points to next element to be added to alu,jlu) ! and pointer array. !----------------------------------------------------------------------- ju0 = n+2 jlu(1) = ju0 ! ! initialize nonzero indicator array. ! do 1 j=1,n jw(n+j) = 0 1 continue !----------------------------------------------------------------------- ! beginning of main loop. !----------------------------------------------------------------------- do 500 ii = 1, n j1 = ia(ii) j2 = ia(ii+1) - 1 tnorm = 0.0d0 do 501 k=j1,j2 tnorm = tnorm+abs(a(k)) 501 continue if (abs(tnorm) .lt. tiny(1.)) goto 999 tnorm = tnorm/real(j2-j1+1) ! ! unpack L-part and U-part of row of A in arrays w ! lenu = 1 lenl = 0 jw(ii) = ii w(ii) = 0.0 jw(n+ii) = ii ! do 170 j = j1, j2 k = ja(j) t = a(j) if (k .lt. ii) then lenl = lenl+1 jw(lenl) = k w(lenl) = t jw(n+k) = lenl else if (k .eq. ii) then w(ii) = t else lenu = lenu+1 jpos = ii+lenu-1 jw(jpos) = k w(jpos) = t jw(n+k) = jpos endif 170 continue jj = 0 lenn = 0 ! ! eliminate previous rows ! 150 jj = jj+1 if (jj .gt. lenl) goto 160 !----------------------------------------------------------------------- ! in order to do the elimination in the correct order we must select ! the smallest column index among jw(k), k=jj+1, ..., lenl. !----------------------------------------------------------------------- jrow = jw(jj) k = jj ! ! determine smallest column index ! do 151 j=jj+1,lenl if (jw(j) .lt. jrow) then jrow = jw(j) k = j endif 151 continue ! if (k .ne. jj) then ! exchange in jw j = jw(jj) jw(jj) = jw(k) jw(k) = j ! exchange in jr jw(n+jrow) = jj jw(n+j) = k ! exchange in w s = w(jj) w(jj) = w(k) w(k) = s endif ! ! zero out element in row by setting jw(n+jrow) to zero. ! jw(n+jrow) = 0 ! ! get the multiplier for row to be eliminated (jrow). ! fact = w(jj)*alu(jrow) if (abs(fact) .le. droptol) goto 150 ! ! combine current row and row jrow ! do 203 k = ju(jrow), jlu(jrow+1)-1 s = fact*alu(k) j = jlu(k) jpos = jw(n+j) if (j .ge. ii) then ! ! dealing with upper part. ! if (jpos .eq. 0) then ! ! this is a fill-in element ! lenu = lenu+1 if (lenu .gt. n) goto 995 i = ii+lenu-1 jw(i) = j jw(n+j) = i w(i) = - s else ! ! this is not a fill-in element ! w(jpos) = w(jpos) - s endif else ! ! dealing with lower part. ! if (jpos .eq. 0) then ! ! this is a fill-in element ! lenl = lenl+1 if (lenl .gt. n) goto 995 jw(lenl) = j jw(n+j) = lenl w(lenl) = - s else ! ! this is not a fill-in element ! w(jpos) = w(jpos) - s endif endif 203 continue ! ! store this pivot element -- (from left to right -- no danger of ! overlap with the working elements in L (pivots). ! lenn = lenn+1 w(lenn) = fact jw(lenn) = jrow goto 150 160 continue ! ! reset double-pointer to zero (U-part) ! do 308 k=1, lenu jw(n+jw(ii+k-1)) = 0 308 continue ! ! update L-matrix ! lenl = lenn lenn = min0(lenl,lfil) ! ! sort by quick-split ! call qsplit (w,jw,lenl,lenn) ! ! store L-part ! do 204 k=1, lenn if (ju0 .gt. iwk) goto 996 alu(ju0) = w(k) jlu(ju0) = jw(k) ju0 = ju0+1 204 continue ! ! save pointer to beginning of row ii of U ! ju(ii) = ju0 ! ! update U-matrix -- first apply dropping strategy ! lenn = 0 do k=1, lenu-1 if (abs(w(ii+k)) .gt. droptol*tnorm) then lenn = lenn+1 w(ii+lenn) = w(ii+k) jw(ii+lenn) = jw(ii+k) endif enddo lenu = lenn+1 lenn = min0(lenu,lfil) ! call qsplit (w(ii+1), jw(ii+1), lenu-1,lenn) ! ! copy ! t = abs(w(ii)) if (lenn + ju0 .gt. iwk) goto 997 do 302 k=ii+1,ii+lenn-1 jlu(ju0) = jw(k) alu(ju0) = w(k) t = t + abs(w(k) ) ju0 = ju0+1 302 continue ! ! store inverse of diagonal element of u ! !2do check if it works ... after correction ... if (abs(w(ii)) .lt. tiny(1.d0)) w(ii) = (0.0001d0 + droptol)*tnorm ! alu(ii) = 1.0d0/ w(ii) ! ! update pointer to beginning of next row of U. ! jlu(ii+1) = ju0 !----------------------------------------------------------------------- ! end main loop !----------------------------------------------------------------------- 500 continue ierr = 0 return ! ! incomprehensible error. Matrix must be wrong. ! 995 ierr = -1 return ! ! insufficient storage in L. ! 996 ierr = -2 return ! ! insufficient storage in U. ! 997 ierr = -3 return ! ! illegal lfil entered. ! 998 ierr = -4 return ! ! zero row encountered ! 999 ierr = -5 return !----------------end-of-ilut-------------------------------------------- !----------------------------------------------------------------------- end !---------------------------------------------------------------------- ! subroutine ilu0(n, a, ja, ia, alu, jlu, ju, iw, ipoint1, ipoint2, ierr) subroutine ilu0(n, a, ja, ia, alu, jlu, ju, iw, ierr) implicit real*8 (a-h,o-z) real*8 a(*), alu(*), tl integer n, ju0, ii, jj, i, j, jcol, js, jf, jm, jrow, jw, ierr integer ja(*), ia(*), ju(*), jlu(*), iw(n) ! !----------------------------------------------------------------------- ju0 = n+2 jlu(1) = ju0 !!! iw = 0 do ii = 1, n js = ju0 do j=ia(ii),ia(ii+1)-1 jcol = ja(j) if (jcol .eq. ii) then alu(ii) = a(j) iw(jcol) = ii ju(ii) = ju0 !!! else alu(ju0) = a(j) jlu(ju0) = ja(j) iw(jcol) = ju0 ju0 = ju0+1 endif end do jlu(ii+1) = ju0 !!! jf = ju0-1 jm = ju(ii)-1 ! exit if diagonal element is reached. do j=js, jm jrow = jlu(j) tl = alu(j)*alu(jrow) alu(j) = tl ! perform linear combination do jj = ju(jrow), jlu(jrow+1)-1 jw = iw(jlu(jj)) if (jw .ne. 0) then alu(jw) = alu(jw) - tl*alu(jj) ! write(*,*) ii, jw, jj end if end do end do ! invert and store diagonal element. if (abs(alu(ii)) .lt. tiny(1.)) goto 600 alu(ii) = 1.0d0/alu(ii) ! reset pointer iw to zero iw(ii) = 0 do i = js, jf iw(jlu(i)) = 0 end do end do ierr = 0 return ! zero pivot : 600 ierr = ii return end !----------------------------------------------------------------------- ! subroutine pgmres(n, im, rhs, sol, eps, maxits, ierr) ! subroutine pgmres(n, im, rhs, sol, eps, maxits, aspar, ierr) subroutine pgmres(n, im, rhs, sol, eps, maxits, aspar, nnz, ia, ja, alu, jlu, ju, vv, ierr) !----------------------------------------------------------------------- ! use datapool, only : nnz, ia, ja, alu, jlu, ju, vv, aspar!, rhs, sol implicit none integer :: n, im, maxits, ierr, nnz integer :: ja(nnz), ia(n+1) integer :: jlu(nnz+1), ju(n) real*8 :: vv(n,im+1), alu(nnz+1) real*8 :: aspar(nnz) real*8 :: rhs(*), sol(*) real*8 :: eps real*8 :: eps1, epsmac, gam, t, ddot, dnrm2, ro, tl integer :: i,i1,j,jj,k,k1,iii,ii,ju0 integer :: its,jrow,jcol,jf,jm,js,jw real*8 :: hh(im+1,im), c(im), s(im), rs(im+1) real*8 :: iw(n) logical :: lblas = .false. ! use sparskit matvec and external blas libs (true), don't use them (false) logical :: lilu = .true. ! use simple ilu preconditioner data epsmac/1.d-16/ ! ilu0 preconditioner if (lilu) then ju0 = n+2 jlu(1) = ju0 !!! iw = 0 do ii = 1, n js = ju0 do j=ia(ii),ia(ii+1)-1 jcol = ja(j) if (jcol .eq. ii) then alu(ii) = aspar(j) iw(jcol) = ii ju(ii) = ju0 !!! else alu(ju0) = aspar(j) jlu(ju0) = ja(j) iw(jcol) = ju0 ju0 = ju0+1 endif end do jlu(ii+1) = ju0 !!! jf = ju0-1 jm = ju(ii)-1 ! exit if diagonal element is reached. do j=js, jm jrow = jlu(j) tl = alu(j)*alu(jrow) alu(j) = tl ! perform linear combination do jj = ju(jrow), jlu(jrow+1)-1 jw = int(iw(jlu(jj))) if (jw .ne. 0) then alu(jw) = alu(jw) - tl*alu(jj) ! write(*,*) ii, jw, jj end if end do end do ! invert and store diagonal element. if (abs(alu(ii)) .lt. epsmac) then write (*,*) 'zero pivot' stop end if alu(ii) = 1.0d0/alu(ii) ! reset pointer iw to zero iw(ii) = 0 do i = js, jf iw(jlu(i)) = 0 end do end do ! end preconditioner end if !------------------------------------------------------------- its = 0 ! outer loop starts here.. if (lblas) then call amux (n, sol, vv, aspar, ja, ia) else do iii = 1, n t = 0.0d0 do k = ia(iii), ia(iii+1)-1 t = t + aspar(k) * sol(ja(k)) end do vv(iii,1) = t end do end if do j=1,n vv(j,1) = rhs(j) - vv(j,1) end do 20 if (lblas) then ro = dnrm2(n, vv) else ro = sqrt(sum(vv(:,1)*vv(:,1))) end if if (abs(ro) .lt. epsmac) goto 999 t = 1.0d0 / ro do j=1, n vv(j,1) = vv(j,1)*t end do if (its .eq. 0) eps1=eps*ro ! initialize 1-st term of rhs of hessenberg system.. rs(1) = ro i = 0 4 i=i+1 its = its + 1 i1 = i + 1 if (lblas) then call lusol (n, vv(1,i), rhs, alu, jlu, ju) call amux (n, rhs, vv(1,i1), aspar, ja, ia) else do iii = 1, n !- lusol rhs(iii) = vv(iii,i) do k=jlu(iii),ju(iii)-1 rhs(iii) = rhs(iii) - alu(k)* rhs(jlu(k)) end do end do do iii = n, 1, -1 do k=ju(iii),jlu(iii+1)-1 rhs(iii) = rhs(iii) - alu(k)*rhs(jlu(k)) end do rhs(iii) = alu(iii)*rhs(iii) end do do iii = 1, n !- amux t = 0.0d0 do k = ia(iii), ia(iii+1)-1 t = t + aspar(k) * rhs(ja(k)) end do vv(iii,i1) = t end do end if ! modified gram - schmidt... if (lblas) then do j=1, i t = ddot(n, vv(1,j),vv(1,i1)) hh(j,i) = t call daxpy(n, -t, vv(1,j), 1, vv(1,i1), 1) t = dnrm2(n, vv(1,i1)) end do else do j=1, i t = 0.d0 do iii = 1,n t = t + vv(iii,j)*vv(iii,i1) end do hh(j,i) = t vv(:,i1) = vv(:,i1) - t * vv(:,j) t = sqrt(sum(vv(:,i1)*vv(:,i1))) end do end if hh(i1,i) = t if ( abs(t) .lt. epsmac) goto 58 t = 1.0d0/t do k=1,n vv(k,i1) = vv(k,i1)*t end do ! done with modified gram schimd and arnoldi step.. now update factorization of hh 58 if (i .eq. 1) goto 121 do k=2,i k1 = k-1 t = hh(k1,i) hh(k1,i) = c(k1)*t + s(k1)*hh(k,i) hh(k,i) = -s(k1)*t + c(k1)*hh(k,i) end do 121 gam = sqrt(hh(i,i)**2 + hh(i1,i)**2) if (abs(gam) .lt. epsmac) gam = epsmac ! get next plane rotation c(i) = hh(i,i)/gam s(i) = hh(i1,i)/gam rs(i1) = -s(i)*rs(i) rs(i) = c(i)*rs(i) ! detrermine residual norm and test for convergence- hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i) ro = abs(rs(i1)) if (i .lt. im .and. (ro .gt. eps1)) goto 4 ! now compute solution. first solve upper triangular system. rs(i) = rs(i)/hh(i,i) do ii=2,i k=i-ii+1 k1 = k+1 t=rs(k) do j=k1,i t = t-hh(k,j)*rs(j) end do rs(k) = t/hh(k,k) end do ! form linear combination of v(*,i)'s to get solution t = rs(1) do k=1, n rhs(k) = vv(k,1)*t end do do j = 2, i t = rs(j) do k=1, n rhs(k) = rhs(k)+t*vv(k,j) end do end do ! call preconditioner. if (lblas) then call lusol (n, rhs, rhs, alu, jlu, ju) else do iii = 1, n do k=jlu(iii),ju(iii)-1 rhs(iii) = rhs(iii) - alu(k)* rhs(jlu(k)) end do end do do iii = n, 1, -1 do k=ju(iii),jlu(iii+1)-1 rhs(iii) = rhs(iii) - alu(k)*rhs(jlu(k)) end do rhs(iii) = alu(iii)*rhs(iii) end do end if do k=1, n sol(k) = sol(k) + rhs(k) end do ! restart outer loop when necessary if (ro .le. eps1) goto 990 if (its .ge. maxits) goto 991 ! else compute residual vector and continue.. do j=1,i jj = i1-j+1 rs(jj-1) = -s(jj-1)*rs(jj) rs(jj) = c(jj-1)*rs(jj) end do do j=1,i1 t = rs(j) if (j .eq. 1) t = t-1.0d0 if (lblas) then call daxpy (n, t, vv(1,j), 1, vv, 1) else vv(:,j) = vv(:,j) + t * vv(:,1) end if end do ! 199 format(' its =', i4, ' res. norm =', d20.6) ! restart outer loop. goto 20 990 ierr = 0 return 991 ierr = 1 return 999 continue ierr = -1 return !--------------------------------------------------------------------- end !----------------------------------------------------------------------- !----------------------------------------------------------------------- !----------------------------------------------------------------------- ! subroutine from blas1.f90 !----------------------------------------------------------------------- DOUBLE PRECISION FUNCTION DNRM2(N,X) ! .. Scalar Arguments .. INTEGER N ! .. ! .. Array Arguments .. DOUBLE PRECISION X(*) ! .. ! ! Purpose ! ======= ! ! DNRM2 returns the euclidean norm of a vector via the function ! name, so that ! ! DNRM2 := sqrt( x'*x ) ! ! Further Details ! =============== ! ! -- This version written on 25-October-1982. ! Modified on 14-October-1993 to inline the call to DLASSQ. ! Sven Hammarling, Nag Ltd. ! ! ===================================================================== ! ! .. Parameters .. DOUBLE PRECISION ONE,ZERO PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) ! .. ! .. Local Scalars .. DOUBLE PRECISION ABSXI,NORM,SCALE,SSQ INTEGER IX ! .. ! .. Intrinsic Functions .. INTRINSIC ABS,SQRT ! .. IF (N.LT.1 ) THEN NORM = ZERO ELSE IF (N.EQ.1) THEN NORM = ABS(X(1)) ELSE SCALE = ZERO SSQ = ONE ! The following loop is equivalent to this call to the LAPACK ! auxiliary routine: ! CALL DLASSQ( N, X, SCALE, SSQ ) ! DO 10 IX = 1,1 + (N-1) IF (X(IX).NE.ZERO) THEN ABSXI = ABS(X(IX)) IF (SCALE.LT.ABSXI) THEN SSQ = ONE + SSQ* (SCALE/ABSXI)**2 SCALE = ABSXI ELSE SSQ = SSQ + (ABSXI/SCALE)**2 END IF END IF 10 CONTINUE NORM = SCALE*SQRT(SSQ) END IF ! DNRM2 = NORM RETURN ! ! End of DNRM2. ! END !----------------------------------------------------------------------- SUBROUTINE DLASSQ( N, X, SCALE, SUMSQ ) ! ! -- LAPACK auxiliary routine (version 3.1) -- ! Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. ! November 2006 INTEGER N DOUBLE PRECISION SCALE, SUMSQ DOUBLE PRECISION X( * ) ! ! DLASSQ returns the values scl and smsq such that ! ! ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, ! ! where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is ! assumed to be non-negative and scl returns the value ! ! scl = max( scale, abs( x( i ) ) ). ! ! SCALE (input/output) DOUBLE PRECISION ! On entry, the value scale in the equation above. ! On exit, SCALE is overwritten with scl , the scaling factor ! for the sum of squares. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D+0 ) INTEGER IX DOUBLE PRECISION ABSXI INTRINSIC ABS ! IF( N.GT.0 ) THEN DO IX = 1, 1 + ( N-1 ) IF( X( IX ).NE.ZERO ) THEN ABSXI = ABS( X( IX ) ) IF( SCALE.LT.ABSXI ) THEN SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2 SCALE = ABSXI ELSE SUMSQ = SUMSQ + ( ABSXI / SCALE )**2 END IF END IF END DO END IF RETURN END !------------------------------------------------------------------------- double precision function ddot(n,dx,dy) ! ! forms the dot product of two vectors. ! uses unrolled loops for increments equal to one. ! jack dongarra, linpack, 3/11/78. ! double precision dx(*),dy(*),dtemp integer i,m,mp1,n ! ddot = 0.0d0 dtemp = 0.0d0 if(n.le.0)return 20 m = mod(n,5) if( m .eq. 0 ) go to 40 do 30 i = 1,m dtemp = dtemp + dx(i)*dy(i) 30 continue if( n .lt. 5 ) go to 60 40 mp1 = m + 1 do 50 i = mp1,n,5 dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + & & dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4) 50 continue 60 ddot = dtemp return end !---------------------------------------------------------------------- subroutine daxpy(n,da,dx,incx,dy,incy) ! ! constant times a vector plus a vector. ! uses unrolled loops for increments equal to one. ! jack dongarra, linpack, 3/11/78. ! double precision dx(1),dy(1),da integer i,incx,incy,ix,iy,m,mp1,n ! if(n.le.0)return if (abs(da) .lt. tiny(1.d0)) return if(incx.eq.1.and.incy.eq.1)go to 20 ! ! code for unequal increments or equal increments ! not equal to 1 ! ix = 1 iy = 1 if(incx.lt.0)ix = (-n+1)*incx + 1 if(incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n dy(iy) = dy(iy) + da*dx(ix) ix = ix + incx iy = iy + incy 10 continue return ! ! code for both increments equal to 1 ! ! clean-up loop ! 20 m = mod(n,4) if( m .eq. 0 ) go to 40 do 30 i = 1,m dy(i) = dy(i) + da*dx(i) 30 continue if( n .lt. 4 ) return 40 mp1 = m + 1 do 50 i = mp1,n,4 dy(i) = dy(i) + da*dx(i) dy(i + 1) = dy(i + 1) + da*dx(i + 1) dy(i + 2) = dy(i + 2) + da*dx(i + 2) dy(i + 3) = dy(i + 3) + da*dx(i + 3) 50 continue return end