SUBROUTINE PDCOL2ROW( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, $ CSRC, RDEST, CDEST, WORK)
*
* -- ScaLAPACK tools routine (version 1.5) --
* University of Tennessee, Knoxville, Oak Ridge National Laboratory, * and University of California, Berkeley. * May 1, 1997 * * .. Scalar Arguments .. INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB, $ RDEST, RSRC * .. * .. Array Arguments .. DOUBLE PRECISION VD( LDVD, * ), VS( LDVS, * ), WORK( * ) * ..
*
* Purpose
* The BLACS context handle, indicating the global context of * the operation. The context itself is global. * * M (global input) INTEGER * The number of global rows each vector has. * * N (global input) INTEGER * The number of vectors in the vector block. * * NB (global input) INTEGER * The blocking factor used to divide the rows of the vector * amongst the processes of a column. * * VS (local input) DOUBLE PRECISION * Array of dimension (LDVS,N), the block of vectors stored on * process column CSRC to be put into memory VD, and stored * on process row RDEST.
*
* LDVS (local input) INTEGER
* The leading dimension of VS, LDVS >= MAX( 1, MP ).
*
* VD (local output) DOUBLE PRECISION * Array of dimension (LDVD,N), on output, the contents of VS * stored on process row RDEST will be here.
*
* LDVD (local input) INTEGER
* The leading dimension of VD, LDVD >= MAX( 1, MQ ).
*
* RSRC (global input) INTEGER
* The process row the distributed block of vectors VS begins * on.
*
* CSRC (global input) INTEGER
* The process column VS is distributed over.
*
* RDEST (global input) INTEGER
* The process row to distribute VD over.
*
* CDEST (global input) INTEGER
* The process column that VD begins on.
*
* WORK (local workspace) DOUBLE PRECISION
* Array of dimension (LDW), the required size of work varies: * if( nprow.eq.npcol ) then * LDW = 0; WORK not accessed. * else * lcm = least common multiple of process rows and columns. * Mp = number of rows of VS on my process. * nprow = number of process rows * CEIL = the ceiling of given operation * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) ) * end if
*
* .. Local Scalars .. INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB, $ JJ, K, LCM, MP, MQ, MYCOL, MYDIST, MYROW, $ NBLOCKS, NPCOL, NPROW, RBLKSKIP * .. * .. External Subroutines .. EXTERNAL BLACS_GRIDINFO, DGESD2D, DGERV2D, DLACPY * .. * .. External Functions .. INTEGER ILCM, NUMROC EXTERNAL ILCM, NUMROC * .. * .. Executable Statements .. * * Get grid parameters. * CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) * * If we are not in special case for NPROW = NPCOL where there * is no copying required * IF( NPROW.NE.NPCOL ) THEN LCM = ILCM( NPROW, NPCOL ) RBLKSKIP = LCM / NPCOL CBLKSKIP = LCM / NPROW * * If I have part of VS, the source vector(s) * IF( MYCOL.EQ.CSRC ) THEN * ISTART = 1 * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my first block * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * * Loop over all possible destination processes * DO 20 K = 1, CBLKSKIP JJ = 1 * * If I am not destination process * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN * * Pack all data I own that destination needs * DO 10 II = ISTART, MP, NB*CBLKSKIP JB = MIN(NB, MP-II+1) CALL DLACPY( 'G', JB, N, VS(II,1), LDVS, $ WORK(JJ), JB ) JJ = JJ + NB*N 10 CONTINUE * * Figure how many rows are to be sent and send them if * necessary (NOTE: will send extra if NB > JB) * JJ = JJ - 1 IF( JJ.GT.0 ) $ CALL DGESD2D( ICTXT, JJ, 1, WORK, JJ, RDEST, $ ICDEST ) * ELSE * * I am both source and destination, save where to start * copying from for later use. * ICPY = ISTART END IF * ISTART = ISTART + NB ICDEST = MOD(ICDEST+NPROW, NPCOL) 20 CONTINUE END IF * * If I should receive info into VD * IF( MYROW.EQ.RDEST ) THEN * ISTART = 1 * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my first block. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) DO 50 K = 1, RBLKSKIP * * If I don't already possess the required data * IF( (MYCOL.NE.CSRC).OR.(MYROW.NE.IRSRC) ) THEN * * Figure how many rows to receive, and receive them * NOTE: may receive to much -- NB instead of JB * NBLOCKS = (MQ - ISTART + NB) / NB JJ = ((NBLOCKS+RBLKSKIP-1) / RBLKSKIP)*NB IF( JJ.GT.0 ) $ CALL DGERV2D( ICTXT, JJ, N, WORK, JJ, IRSRC, CSRC ) * * Copy data to destination vector * JJ = 1 DO 30 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL DLACPY( 'G', JB, N, WORK(JJ), JB, $ VD(II,1), LDVD ) JJ = JJ + NB*N 30 CONTINUE * * If I am both source and destination * ELSE JJ = ICPY DO 40 II = ISTART, MQ, NB*RBLKSKIP JB = MIN( NB, MQ-II+1 ) CALL DLACPY( 'G', JB, N, VS(JJ,1), LDVS, $ VD(II,1), LDVD ) JJ = JJ + NB*CBLKSKIP 40 CONTINUE END IF ISTART = ISTART + NB IRSRC = MOD( IRSRC+NPCOL, NPROW ) 50 CONTINUE END IF * * If NPROW = NPCOL, there is a one-to-one correspondance between * process rows and columns, so no work space or copying required * ELSE * IF( MYCOL.EQ.CSRC ) THEN * * Figure my distance from RSRC: the process in RDEST the same * distance from CDEST will want my piece of the vector. * MYDIST = MOD( NPROW+MYROW-RSRC, NPROW ) MP = NUMROC( M, NB, MYROW, RSRC, NPROW ) ICDEST = MOD( CDEST+MYDIST, NPCOL ) * IF( (MYCOL.NE.ICDEST).OR.(MYROW.NE.RDEST) ) THEN CALL DGESD2D( ICTXT, MP, N, VS, LDVS, RDEST, ICDEST ) ELSE CALL DLACPY( 'G', MP, N, VS, LDVS, VD, LDVD ) END IF END IF * IF( MYROW.EQ.RDEST ) THEN * * Figure my distance from CDEST: the process in CSRC the same * distance from RSRC will have my piece of the vector. * MYDIST = MOD( NPCOL+MYCOL-CDEST, NPCOL ) MQ = NUMROC( M, NB, MYCOL, CDEST, NPCOL ) IRSRC = MOD( RSRC+MYDIST, NPROW ) * IF( (MYROW.NE.IRSRC).OR.(MYCOL.NE.CSRC) ) $ CALL DGERV2D( ICTXT, MQ, N, VD, LDVD, IRSRC, CSRC ) * END IF * END IF * RETURN * * End of PDCOL2ROW * END