subroutine trans_z2x ( np, comm, dir, r_wordsize, i_wordsize, memorder, & a, & sd1, ed1, sd2, ed2, sd3, ed3, & sp1, ep1, sp2, ep2, sp3, ep3, & sm1, em1, sm2, em2, sm3, em3, & ax, & sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & sm1x, em1x, sm2x, em2x, sm3x, em3x ) USE duplicate_of_driver_constants implicit none integer, intent(in) :: sd1, ed1, sd2, ed2, sd3, ed3, & sp1, ep1, sp2, ep2, sp3, ep3, & sm1, em1, sm2, em2, sm3, em3, & sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & sm1x, em1x, sm2x, em2x, sm3x, em3x integer, intent(in) :: np, comm, r_wordsize, i_wordsize integer, intent(in) :: dir ! 1 is a->ax, otherwise ax->a integer, intent(in) :: memorder integer, dimension((ep1-sp1+1)*(ep2-sp2+1)*(ep3-sp3+1)*max(1,(r_wordsize/i_wordsize))) :: a integer, dimension((ep1x-sp1x+1)*(ep2x-ep2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: ax include 'mpif.h' !local integer :: ids, ide, jds, jde, kds, kde, & ips, ipe, jps, jpe, kps, kpe, & ims, ime, jms, jme, kms, kme, & ipsx, ipex, jpsx, jpex, kpsx, kpex, & imsx, imex, jmsx, jmex, kmsx, kmex integer, dimension(0:(ep1-sp1+1)*(ep2-sp2+1)*(ep3-sp3+1)*max(1,(r_wordsize/i_wordsize))) :: zbuf integer, dimension(0:(ep1x-sp1x+1)*(ep2x-sp2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: xbuf integer pencil(4), allpencils(4,np) integer sendcnts(np), sdispls(np), recvcnts(np), rdispls(np) integer allsendcnts(np+2,np), is(np), ie(np), ks(np),ke(np) integer sendcurs(np), recvcurs(np) integer i,j,k,p,sc,sp,rp,yp,zp,curs,zbufsz,cells,nkcells,ivectype,ierr SELECT CASE ( memorder ) CASE ( DATA_ORDER_XYZ ) ids = sd1 ; ide = ed1 ; jds = sd2 ; jde = ed2 ; kds = sd3 ; kde = ed3 ips = sp1 ; ipe = ep1 ; jps = sp2 ; jpe = ep2 ; kps = sp3 ; kpe = ep3 ims = sm1 ; ime = em1 ; jms = sm2 ; jme = em2 ; kms = sm3 ; kme = em3 ipsx = sp1x ; ipex = ep1x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp3x ; kpex = ep3x imsx = sm1x ; imex = em1x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm3x ; kmex = em3x CASE ( DATA_ORDER_YXZ ) ids = sd2 ; ide = ed2 ; jds = sd1 ; jde = ed1 ; kds = sd3 ; kde = ed3 ips = sp2 ; ipe = ep2 ; jps = sp1 ; jpe = ep1 ; kps = sp3 ; kpe = ep3 ims = sm2 ; ime = em2 ; jms = sm1 ; jme = em1 ; kms = sm3 ; kme = em3 ipsx = sp2x ; ipex = ep2x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp3x ; kpex = ep3x imsx = sm2x ; imex = em2x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm3x ; kmex = em3x CASE ( DATA_ORDER_XZY ) ids = sd1 ; ide = ed1 ; jds = sd3 ; jde = ed3 ; kds = sd2 ; kde = ed2 ips = sp1 ; ipe = ep1 ; jps = sp3 ; jpe = ep3 ; kps = sp2 ; kpe = ep2 ims = sm1 ; ime = em1 ; jms = sm3 ; jme = em3 ; kms = sm2 ; kme = em2 ipsx = sp1x ; ipex = ep1x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp2x ; kpex = ep2x imsx = sm1x ; imex = em1x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm2x ; kmex = em2x CASE ( DATA_ORDER_YZX ) ids = sd3 ; ide = ed3 ; jds = sd1 ; jde = ed1 ; kds = sd2 ; kde = ed2 ips = sp3 ; ipe = ep3 ; jps = sp1 ; jpe = ep1 ; kps = sp2 ; kpe = ep2 ims = sm3 ; ime = em3 ; jms = sm1 ; jme = em1 ; kms = sm2 ; kme = em2 ipsx = sp3x ; ipex = ep3x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp2x ; kpex = ep2x imsx = sm3x ; imex = em3x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm2x ; kmex = em2x CASE ( DATA_ORDER_ZXY ) ids = sd2 ; ide = ed2 ; jds = sd3 ; jde = ed3 ; kds = sd1 ; kde = ed1 ips = sp2 ; ipe = ep2 ; jps = sp3 ; jpe = ep3 ; kps = sp1 ; kpe = ep1 ims = sm2 ; ime = em2 ; jms = sm3 ; jme = em3 ; kms = sm1 ; kme = em1 ipsx = sp2x ; ipex = ep2x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp1x ; kpex = ep1x imsx = sm2x ; imex = em2x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm1x ; kmex = em1x CASE ( DATA_ORDER_ZYX ) ids = sd3 ; ide = ed3 ; jds = sd2 ; jde = ed2 ; kds = sd1 ; kde = ed1 ips = sp3 ; ipe = ep3 ; jps = sp2 ; jpe = ep2 ; kps = sp1 ; kpe = ep1 ims = sm3 ; ime = em3 ; jms = sm2 ; jme = em2 ; kms = sm1 ; kme = em1 ipsx = sp3x ; ipex = ep3x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp1x ; kpex = ep1x imsx = sm3x ; imex = em3x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm1x ; kmex = em1x END SELECT sendcnts = 0 ; recvcnts = 0 xbuf = 0 zbuf = 0 ! work out send/recv sizes to each processor in X dimension pencil(1) = ips pencil(2) = ipe pencil(3) = kpsx pencil(4) = kpex call mpi_allgather( pencil, 4, MPI_INTEGER, allpencils, 4, MPI_INTEGER, comm, ierr ) do p = 1, np is(p) = allpencils(1,p) ie(p) = allpencils(2,p) ks(p) = allpencils(3,p) ke(p) = allpencils(4,p) enddo ! pack send buffer sendcurs = 0 sdispls = 0 sc = 0 do p = 1, np if ( r_wordsize .eq. i_wordsize ) then if ( dir .eq. 1 ) then call f_pack_int ( a, zbuf(sc), memorder, & & jps, jpe, ks(p), ke(p), ips, ipe, & & jms, jme, kms, kme, ims, ime, sendcurs(p) ) else call f_pack_int ( ax, xbuf(sc), memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) endif else if ( r_wordsize .eq. 8 ) THEN if ( dir .eq. 1 ) then call f_pack_lint ( a, zbuf(sc), memorder, & & jps, jpe, ks(p), ke(p), ips, ipe, & & jms, jme, kms, kme, ims, ime, sendcurs(p) ) else call f_pack_lint ( ax, xbuf(sc), memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) endif sendcurs(p) = sendcurs(p) * max(1,(r_wordsize/i_wordsize)) else write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.F90",123 call mpi_abort(ierr) endif sc = sc + sendcurs(p) sendcnts(p) = sendcurs(p) if ( p .GT. 1 ) sdispls(p) = sdispls(p-1) + sendcnts(p-1) enddo ! work out receive counts and displs rdispls = 0 recvcnts = 0 do p = 1, np if ( dir .eq. 1 ) then recvcnts(p) = (ie(p)-is(p)+1)*(kpex-kpsx+1)*(jpex-jpsx+1) * max(1,(r_wordsize/i_wordsize)) else recvcnts(p) = (ke(p)-ks(p)+1)*(ipe-ips+1)*(jpe-jps+1) * max(1,(r_wordsize/i_wordsize)) endif if ( p .GT. 1 ) rdispls(p) = rdispls(p-1) + recvcnts(p-1) enddo ! do the transpose if ( dir .eq. 1 ) then call mpi_alltoallv(zbuf, sendcnts, sdispls, MPI_INTEGER, & xbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) else call mpi_alltoallv(xbuf, sendcnts, sdispls, MPI_INTEGER, & zbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) endif ! unpack do p = 1, np if ( r_wordsize .eq. i_wordsize ) then if ( dir .eq. 1 ) then call f_unpack_int ( xbuf(rdispls(p)), ax, memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) else call f_unpack_int ( zbuf(rdispls(p)), a, memorder, & & jps, jpe, ks(p), ke(p), ips, ipe, & & jms, jme, kms, kme, ims, ime, curs ) endif else if ( r_wordsize .eq. 8 ) THEN if ( dir .eq. 1 ) then call f_unpack_lint ( xbuf(rdispls(p)), ax, memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) else call f_unpack_lint ( zbuf(rdispls(p)), a, memorder, & & jps, jpe, ks(p), ke(p), ips, ipe, & & jms, jme, kms, kme, ims, ime, curs ) endif else write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.F90",172 call mpi_abort(ierr) endif enddo return end subroutine trans_z2x subroutine trans_x2y ( np, comm, dir, r_wordsize, i_wordsize, memorder, & ax, & sd1, ed1, sd2, ed2, sd3, ed3, & sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & sm1x, em1x, sm2x, em2x, sm3x, em3x, & ay, & sp1y, ep1y, sp2y, ep2y, sp3y, ep3y, & sm1y, em1y, sm2y, em2y, sm3y, em3y ) USE duplicate_of_driver_constants implicit none integer, intent(in) :: memorder integer, intent(in) :: sd1, ed1, sd2, ed2, sd3, ed3, & sp1x, ep1x, sp2x, ep2x, sp3x, ep3x, & sm1x, em1x, sm2x, em2x, sm3x, em3x, & sp1y, ep1y, sp2y, ep2y, sp3y, ep3y, & sm1y, em1y, sm2y, em2y, sm3y, em3y integer, intent(in) :: np, comm, r_wordsize, i_wordsize integer, intent(in) :: dir ! 1 is a->ax, otherwise ax->a integer, dimension((ep1x-sp1x+1)*(ep2x-ep2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: ax integer, dimension((ep1y-sp1y+1)*(ep2y-sp2y+1)*(ep3y-sp3y+1)*max(1,(r_wordsize/i_wordsize))) :: ay include 'mpif.h' integer, dimension(0:(ep1x-sp1x+1)*(ep2x-sp2x+1)*(ep3x-sp3x+1)*max(1,(r_wordsize/i_wordsize))) :: xbuf integer, dimension(0:(ep1y-sp1y+1)*(ep2y-sp2y+1)*(ep3y-sp3y+1)*max(1,(r_wordsize/i_wordsize))) :: ybuf !local integer ids, ide, jds, jde, kds, kde, & ipsx, ipex, jpsx, jpex, kpsx, kpex, & imsx, imex, jmsx, jmex, kmsx, kmex, & ipsy, ipey, jpsy, jpey, kpsy, kpey, & imsy, imey, jmsy, jmey, kmsy, kmey integer pencil(4), allpencils(4,np) integer sendcnts(np), sdispls(np), recvcnts(np), rdispls(np) integer allsendcnts(np+2,np), is(np), ie(np), js(np), je(np) integer sendcurs(np), recvcurs(np) integer i,j,k,p,sc,sp,rp,yp,zp,curs,xbufsz,cells,nkcells,ivectype,ierr SELECT CASE ( memorder ) CASE ( DATA_ORDER_XYZ ) ids = sd1 ; ide = ed1 ; jds = sd2 ; jde = ed2 ; kds = sd3 ; kde = ed3 ipsx = sp1x ; ipex = ep1x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp3x ; kpex = ep3x imsx = sm1x ; imex = em1x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm3x ; kmex = em3x ipsy = sp1y ; ipey = ep1y ; jpsy = sp2y ; jpey = ep2y ; kpsy = sp3y ; kpey = ep3y imsy = sm1y ; imey = em1y ; jmsy = sm2y ; jmey = em2y ; kmsy = sm3y ; kmey = em3y CASE ( DATA_ORDER_YXZ ) ids = sd2 ; ide = ed2 ; jds = sd1 ; jde = ed1 ; kds = sd3 ; kde = ed3 ipsx = sp2x ; ipex = ep2x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp3x ; kpex = ep3x imsx = sm2x ; imex = em2x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm3x ; kmex = em3x ipsy = sp2y ; ipey = ep2y ; jpsy = sp1y ; jpey = ep1y ; kpsy = sp3y ; kpey = ep3y imsy = sm2y ; imey = em2y ; jmsy = sm1y ; jmey = em1y ; kmsy = sm3y ; kmey = em3y CASE ( DATA_ORDER_XZY ) ids = sd1 ; ide = ed1 ; jds = sd3 ; jde = ed3 ; kds = sd2 ; kde = ed2 ipsx = sp1x ; ipex = ep1x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp2x ; kpex = ep2x imsx = sm1x ; imex = em1x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm2x ; kmex = em2x ipsy = sp1y ; ipey = ep1y ; jpsy = sp3y ; jpey = ep3y ; kpsy = sp2y ; kpey = ep2y imsy = sm1y ; imey = em1y ; jmsy = sm3y ; jmey = em3y ; kmsy = sm2y ; kmey = em2y CASE ( DATA_ORDER_YZX ) ids = sd3 ; ide = ed3 ; jds = sd1 ; jde = ed1 ; kds = sd2 ; kde = ed2 ipsx = sp3x ; ipex = ep3x ; jpsx = sp1x ; jpex = ep1x ; kpsx = sp2x ; kpex = ep2x imsx = sm3x ; imex = em3x ; jmsx = sm1x ; jmex = em1x ; kmsx = sm2x ; kmex = em2x ipsy = sp3y ; ipey = ep3y ; jpsy = sp1y ; jpey = ep1y ; kpsy = sp2y ; kpey = ep2y imsy = sm3y ; imey = em3y ; jmsy = sm1y ; jmey = em1y ; kmsy = sm2y ; kmey = em2y CASE ( DATA_ORDER_ZXY ) ids = sd2 ; ide = ed2 ; jds = sd3 ; jde = ed3 ; kds = sd1 ; kde = ed1 ipsx = sp2x ; ipex = ep2x ; jpsx = sp3x ; jpex = ep3x ; kpsx = sp1x ; kpex = ep1x imsx = sm2x ; imex = em2x ; jmsx = sm3x ; jmex = em3x ; kmsx = sm1x ; kmex = em1x ipsy = sp2y ; ipey = ep2y ; jpsy = sp3y ; jpey = ep3y ; kpsy = sp1y ; kpey = ep1y imsy = sm2y ; imey = em2y ; jmsy = sm3y ; jmey = em3y ; kmsy = sm1y ; kmey = em1y CASE ( DATA_ORDER_ZYX ) ids = sd3 ; ide = ed3 ; jds = sd2 ; jde = ed2 ; kds = sd1 ; kde = ed1 ipsx = sp3x ; ipex = ep3x ; jpsx = sp2x ; jpex = ep2x ; kpsx = sp1x ; kpex = ep1x imsx = sm3x ; imex = em3x ; jmsx = sm2x ; jmex = em2x ; kmsx = sm1x ; kmex = em1x ipsy = sp3y ; ipey = ep3y ; jpsy = sp2y ; jpey = ep2y ; kpsy = sp1y ; kpey = ep1y imsy = sm3y ; imey = em3y ; jmsy = sm2y ; jmey = em2y ; kmsy = sm1y ; kmey = em1y END SELECT sendcnts = 0 ; recvcnts = 0 xbuf = 0 ybuf = 0 ! work out send/recv sizes to each processor in X dimension pencil(1) = jpsx pencil(2) = jpex pencil(3) = ipsy pencil(4) = ipey call mpi_allgather( pencil, 4, MPI_INTEGER, allpencils, 4, MPI_INTEGER, comm, ierr ) do p = 1, np js(p) = allpencils(1,p) je(p) = allpencils(2,p) is(p) = allpencils(3,p) ie(p) = allpencils(4,p) enddo ! pack send buffer sendcurs = 0 sdispls = 0 sc = 0 do p = 1, np if ( r_wordsize .eq. i_wordsize ) then if ( dir .eq. 1 ) then call f_pack_int ( ax, xbuf(sc), memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) else call f_pack_int ( ay, ybuf(sc), memorder, & & js(p), je(p), kpsy, kpey, ipsy, ipey, & & jmsy, jmey, kmsy, kmey, imsy, imey, sendcurs(p) ) endif else if ( r_wordsize .eq. 8 ) THEN if ( dir .eq. 1 ) then call f_pack_lint ( ax, xbuf(sc), memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, sendcurs(p) ) else call f_pack_lint ( ay, ybuf(sc), memorder, & & js(p), je(p), kpsy, kpey, ipsy, ipey, & & jmsy, jmey, kmsy, kmey, imsy, imey, sendcurs(p) ) endif sendcurs(p) = sendcurs(p) * max(1,(r_wordsize/i_wordsize)) else write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.F90",305 call mpi_abort(ierr) endif sc = sc + sendcurs(p) sendcnts(p) = sendcurs(p) if ( p .GT. 1 ) sdispls(p) = sdispls(p-1) + sendcnts(p-1) enddo ! work out receive counts and displs rdispls = 0 recvcnts = 0 do p = 1, np if ( dir .eq. 1 ) then recvcnts(p) = (je(p)-js(p)+1)*(kpey-kpsy+1)*(ipey-ipsy+1) * max(1,(r_wordsize/i_wordsize)) else recvcnts(p) = (ie(p)-is(p)+1)*(kpex-kpsx+1)*(jpex-jpsx+1) * max(1,(r_wordsize/i_wordsize)) endif if ( p .GT. 1 ) rdispls(p) = rdispls(p-1) + recvcnts(p-1) enddo ! do the transpose if ( dir .eq. 1 ) then call mpi_alltoallv(xbuf, sendcnts, sdispls, MPI_INTEGER, & ybuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) else call mpi_alltoallv(ybuf, sendcnts, sdispls, MPI_INTEGER, & xbuf, recvcnts, rdispls, MPI_INTEGER, comm, ierr ) endif ! unpack do p = 1, np if ( r_wordsize .eq. i_wordsize ) then if ( dir .eq. 1 ) then call f_unpack_int ( ybuf(rdispls(p)), ay, memorder, & & js(p), je(p), kpsy, kpey, ipsy, ipey, & & jmsy, jmey, kmsy, kmey, imsy, imey, curs ) else call f_unpack_int ( xbuf(rdispls(p)), ax, memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) endif else if ( r_wordsize .eq. 8 ) THEN if ( dir .eq. 1 ) then call f_unpack_lint ( ybuf(rdispls(p)), ay, memorder, & & js(p), je(p), kpsy, kpey, ipsy, ipey, & & jmsy, jmey, kmsy, kmey, imsy, imey, curs ) else call f_unpack_lint ( xbuf(rdispls(p)), ax, memorder, & & jpsx, jpex, kpsx, kpex, is(p), ie(p), & & jmsx, jmex, kmsx, kmex, imsx, imex, curs ) endif else write(0,*)'RSL_LITE internal error: type size mismatch ',"f_xpose.F90",356 call mpi_abort(ierr) endif enddo return end subroutine trans_x2y