Commit 5a294ec26332b827173e0e8a166fe5c647cac8f2

Authored by James Caveen
0 parents
Exists in master

initial commit for meopar demo

lanceur_heat2D.sh 0 → 100755
  1 +++ a/lanceur_heat2D.sh
... ... @@ -0,0 +1,13 @@
  1 +#!/bin/sh
  2 +#PBS -q default
  3 +#PBS -N heat_1_6
  4 +#PBS -j oe
  5 +#PBS -l walltime=00:15:00
  6 +#PBS -l nodes=1:ppn=6
  7 +#PBS -l nice=19
  8 +module load gcc/4.9.2
  9 +module load openmpi/1.8.3_new-gcc-4.9.2
  10 +
  11 +cd $PBS_O_WORKDIR
  12 +time mpirun --report-bindings --map-by core --nooversubscribe --mca btl sm,self,tcp ./heat2d
  13 +
... ...
lanceur_laplace.sh 0 → 100755
  1 +++ a/lanceur_laplace.sh
... ... @@ -0,0 +1,13 @@
  1 +#!/bin/sh
  2 +#PBS -q R
  3 +#PBS -N laplace_1x4
  4 +#PBS -j oe
  5 +#PBS -l walltime=05:00:00
  6 +#PBS -l nodes=1:ppn=4
  7 +#PBS -l nice=19
  8 +module load gcc/4.9.2
  9 +module load openmpi/2.0.1-gcc-4.9.2_knem
  10 +
  11 +cd $PBS_O_WORKDIR
  12 +time mpirun --report-bindings --map-by hwthread --nooversubscribe --mca btl sm,self,tcp ./laplace
  13 +
... ...
laplace.c 0 → 100644
  1 +++ a/laplace.c
... ... @@ -0,0 +1,915 @@
  1 +# include <stdlib.h>
  2 +# include <stdio.h>
  3 +# include <math.h>
  4 +
  5 +# include "mpi.h"
  6 +
  7 +# define n 48 /* matrix is nxn, excluding boundary values */
  8 +# define nodeedge 24 /* a task works on a nodeedge x nodeedge matrix */
  9 +# define nblock n/nodeedge /* number of tasks per row of matrix */
  10 +# define nproc nblock*nblock /* total number of tasks (processors) */
  11 +//# define nproc nblock*nblock /* total number of tasks (processors) */
  12 +
  13 +int main ( int argc, char **argv );
  14 +void doblack ( double w, double M[][nodeedge+2] );
  15 +void dored ( double w, double M[][nodeedge+2] );
  16 +void exchange ( double M[][nodeedge+2], int comm[], int rank );
  17 +void iterate ( double w, double M[][nodeedge+2], double result[][n], int rank, int comm[] );
  18 +void setcomm ( int rank, int comm[] );
  19 +void setex ( double ex[], double M[][nodeedge+2], int which );
  20 +void initialize_matrix ( double M[][nodeedge+2] );
  21 +void unpack ( double M[][nodeedge+2], int where, double in[] );
  22 +
  23 +/******************************************************************************/
  24 +
  25 +int main ( int argc, char **argv )
  26 +
  27 +/******************************************************************************/
  28 +/*
  29 + Purpose:
  30 +
  31 + LAPLACE_MPI solves Laplace's equation on a rectangle, using MPI.
  32 +
  33 + Discussion:
  34 +
  35 + This program uses a finite difference scheme to solve
  36 + Laplace's equation for a square matrix distributed over a square
  37 + (logical) processor topology. A complete description of the algorithm
  38 + is found in Fox.
  39 +
  40 + This program works on the SPMD (single program, multiple data)
  41 + paradigm. It illustrates 2-d block decomposition, nodes exchanging
  42 + edge values, and convergence checking.
  43 +
  44 + Each matrix element is updated based on the values of the four
  45 + neighboring matrix elements. This process is repeated until the data
  46 + converges, that is, until the average change in any matrix element (compared
  47 + to the value 20 iterations previous) is smaller than a specified value.
  48 +
  49 + To ensure reproducible results between runs, a red/black
  50 + checkerboard algorithm is used. Each process exchanges edge values
  51 + with its four neighbors. Then new values are calculated for the upper
  52 + left and lower right corners (the "red" corners) of each node's
  53 + matrix. The processes exchange edge values again. The upper right
  54 + and lower left corners (the "black" corners) are then calculated.
  55 +
  56 + The program is currently configured for a 48x48 matrix
  57 + distributed over four processors. It can be edited to handle
  58 + different matrix sizes or number of processors, as long as the matrix
  59 + can be divided evenly between the processors.
  60 +
  61 + Modified:
  62 +
  63 + 14 November 2011
  64 +
  65 + Author:
  66 +
  67 + Sequential C version by Robb Newman.
  68 + MPI C version by Xianneng Shen.
  69 +
  70 + Reference:
  71 +
  72 + Geoffrey Fox, Mark Johnson, Gregory Lyzenga, Steve Otto, John Salmon,
  73 + David Walker,
  74 + Solving Problems on Concurrent Processors,
  75 + Volume 1: General Techniques and Regular Problems,
  76 + Prentice Hall, 1988,
  77 + ISBN: 0-13-8230226,
  78 + LC: QA76.5.F627.
  79 +
  80 + Local parameters:
  81 +
  82 + Local, int COMM[4], contains a 0 (no) or 1 (yes) if
  83 + communication is needed for the UP(0), RIGHT(1), DOWN(2)
  84 + and LEFT(3) neighbors.
  85 +
  86 + Local, FILE *fp, a pointer to the output file.
  87 +
  88 + Local, double M[nodeedge+2][nodeedge+2], the part of the results
  89 + kept by this process.
  90 +
  91 + Local, double RESULT[n][n], the results for the complete problem,
  92 + kept by process 0.
  93 +
  94 + Local, double W, the SOR factor, which must be strictly between 0 and 2.
  95 +*/
  96 +{
  97 + int comm[4];
  98 + FILE *fp;
  99 + int i;
  100 + int j;
  101 + double M[nodeedge+2][nodeedge+2];
  102 + int ntasks;
  103 + int rank;
  104 + double result[n][n];
  105 + double w;
  106 + double wtime;
  107 +
  108 + MPI_Init ( &argc, &argv );
  109 +
  110 + MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
  111 +
  112 + MPI_Comm_size ( MPI_COMM_WORLD, &ntasks );
  113 +
  114 + wtime = MPI_Wtime ( );
  115 +
  116 + if ( rank == 0 )
  117 + {
  118 + printf ( "\n" );
  119 + printf ( "LAPLACE_MPI:\n" );
  120 + printf ( " C/MPI version\n" );
  121 + printf ( " Solve the Laplace equation using MPI.\n" );
  122 + }
  123 +
  124 + if ( ntasks != nproc )
  125 + {
  126 + if ( rank == 0 )
  127 + {
  128 + printf ( "\n" );
  129 + printf ( "Fatal error!\n" );
  130 + printf ( " MP_PROCS should be set to %i!\n", nproc );
  131 + }
  132 + MPI_Finalize ( );
  133 + exit ( 1 );
  134 + }
  135 +
  136 + if ( rank == 0 )
  137 + {
  138 + printf ( "\n" );
  139 + printf ( " MPI has been set up.\n" );
  140 + }
  141 +/*
  142 + Initialize the matrix M.
  143 +*/
  144 + if ( rank == 0 )
  145 + {
  146 + printf ( " Initialize the matrix M.\n" );
  147 + }
  148 + initialize_matrix ( M );
  149 +/*
  150 + Figure out who I communicate with.
  151 +*/
  152 + if ( rank == 0 )
  153 + {
  154 + printf ( " Set the list of neighbors.\n" );
  155 + }
  156 + setcomm ( rank, comm );
  157 +/*
  158 + Update M, using SOR value W, until convergence.
  159 +*/
  160 + if ( rank == 0 )
  161 + {
  162 + printf ( " Begin the iteration.\n" );
  163 + }
  164 + w = 1.2;
  165 + iterate ( w, M, result, rank, comm );
  166 +/*
  167 + Report timing
  168 +*/
  169 + wtime = MPI_Wtime ( ) - wtime;
  170 +
  171 + printf ( " Task %i took %6.3f seconds\n", rank, wtime );
  172 +/*
  173 + Write the solution to a file.
  174 +*/
  175 + if ( rank == 0 )
  176 + {
  177 + fp = fopen ( "laplace_solution.txt", "w" );
  178 +
  179 + for ( i = 0; i < n; i++ )
  180 + {
  181 + for ( j = 0; j < n; j++ )
  182 + {
  183 + fprintf ( fp, "%f\n ", result[i][j] );
  184 + }
  185 + fprintf ( fp, "\n");
  186 + }
  187 + fclose ( fp );
  188 + printf ( " Solution written to \"laplace_solution.txt\".\n" );
  189 + }
  190 +/*
  191 + Terminate MPI.
  192 +*/
  193 + MPI_Finalize ( );
  194 +/*
  195 + Terminate.
  196 +*/
  197 + if ( rank == 0 )
  198 + {
  199 + printf ( "\n" );
  200 + printf ( "LAPLACE_MPI:\n" );
  201 + printf ( " Normal end of execution.\n" );
  202 + }
  203 + return 0;
  204 +}
  205 +/******************************************************************************/
  206 +
  207 +void doblack ( double w, double M[][nodeedge+2] )
  208 +
  209 +/******************************************************************************/
  210 +/*
  211 + Purpose:
  212 +
  213 + DOBLACK iterates on the upper right and lower left corners of my matrix.
  214 +
  215 + Modified:
  216 +
  217 + 16 February 2013
  218 +
  219 + Author:
  220 +
  221 + Sequential C version by Robb Newman.
  222 + MPI C version by Xianneng Shen.
  223 +
  224 + Parameters:
  225 +
  226 + Input, double W, the SOR factor, which must be strictly between 0 and 2.
  227 +
  228 + Input/output, double M[nodeedge+2][nodeedge+2], the part of the results
  229 + kept by this process.
  230 +*/
  231 +{
  232 + int i;
  233 + int j;
  234 +/*
  235 + Upper right corner.
  236 +*/
  237 + for ( i = 1; i <= nodeedge / 2; i++ )
  238 + {
  239 + for ( j = nodeedge / 2 + 1; j <= nodeedge; j++ )
  240 + {
  241 + M[i][j] = w / 4.0 * ( M[i-1][j] + M[i][j-1] + M[i+1][j] + M[i][j+1] )
  242 + + ( 1.0 - w ) * M[i][j];
  243 + }
  244 + }
  245 +/*
  246 + Lower left corner.
  247 +*/
  248 + for ( i = nodeedge / 2 + 1; i <= nodeedge; i++ )
  249 + {
  250 + for ( j = 1; j <= nodeedge / 2; j++ )
  251 + {
  252 + M[i][j] = w / 4.0 * ( M[i-1][j] + M[i][j-1] + M[i+1][j] + M[i][j+1] )
  253 + + ( 1.0 - w ) * M[i][j];
  254 + }
  255 + }
  256 + return;
  257 +}
  258 +/******************************************************************************/
  259 +
  260 +void dored ( double w, double M[][nodeedge+2] )
  261 +
  262 +/******************************************************************************/
  263 +/*
  264 + Purpose:
  265 +
  266 + DORED iterates on the upper left and lower right corners of my matrix.
  267 +
  268 + Modified:
  269 +
  270 + 16 February 2013
  271 +
  272 + Author:
  273 +
  274 + Sequential C version by Robb Newman.
  275 + MPI C version by Xianneng Shen.
  276 +
  277 + Parameters:
  278 +
  279 + Input, double W, the SOR factor, which must be strictly between 0 and 2.
  280 +
  281 + Input/output, double M[nodeedge+2][nodeedge+2], the part of the results
  282 + kept by this process.
  283 +*/
  284 +{
  285 + int i;
  286 + int j;
  287 +/*
  288 + Upper left corner.
  289 +*/
  290 + for ( i = 1; i <= nodeedge / 2; i++ )
  291 + {
  292 + for ( j = 1; j <= nodeedge / 2; j++ )
  293 + {
  294 + M[i][j] = w / 4.0 * ( M[i-1][j] + M[i][j-1] + M[i+1][j] + M[i][j+1] )
  295 + + ( 1.0 - w ) * M[i][j];
  296 + }
  297 + }
  298 +/*
  299 + Lower right corner.
  300 +*/
  301 + for ( i = nodeedge / 2 + 1; i <= nodeedge; i++ )
  302 + {
  303 + for ( j = nodeedge / 2 + 1; j <= nodeedge; j++ )
  304 + {
  305 + M[i][j] = w / 4.0 * ( M[i-1][j] + M[i][j-1] + M[i+1][j] + M[i][j+1] )
  306 + + ( 1.0 - w ) * M[i][j];
  307 + }
  308 + }
  309 + return;
  310 +}
  311 +/******************************************************************************/
  312 +
  313 +void exchange ( double M[][nodeedge+2], int comm[], int rank )
  314 +
  315 +/******************************************************************************/
  316 +/*
  317 + Purpose:
  318 +
  319 + EXCHANGE trades edge values with up to four neighbors.
  320 +
  321 + Discussion:
  322 +
  323 + Up to 4 MPI sends are carried out, and up to 4 MPI receives.
  324 +
  325 + Modified:
  326 +
  327 + 14 November 2011
  328 +
  329 + Author:
  330 +
  331 + Sequential C version by Robb Newman.
  332 + MPI C version by Xianneng Shen.
  333 +
  334 + Parameters:
  335 +
  336 + Input/output, double M[nodeedge+2][nodeedge+2], the part of the results
  337 + kept by this process.
  338 +
  339 + Input, int COMM[4], contains a 0 (no) or 1 (yes) if
  340 + communication is needed for the UP(0), RIGHT(1), DOWN(2)
  341 + and LEFT(3) neighbors.
  342 +
  343 + Input, int RANK, the rank of this process.
  344 +*/
  345 +{
  346 + double ex0[nodeedge];
  347 + double ex1[nodeedge];
  348 + double ex2[nodeedge];
  349 + double ex3[nodeedge];
  350 + int i;
  351 + double in0[nodeedge];
  352 + double in1[nodeedge];
  353 + double in2[nodeedge];
  354 + double in3[nodeedge];
  355 + int partner;
  356 + MPI_Request requests[8];
  357 + MPI_Status status[8];
  358 + int tag;
  359 +/*
  360 + Initialize requests.
  361 +*/
  362 + for ( i = 0; i < 8; i++ )
  363 + {
  364 + requests[i] = MPI_REQUEST_NULL;
  365 + }
  366 +/*
  367 + Receive from UP neighbor (0).
  368 +*/
  369 + if ( comm[0] == 1 )
  370 + {
  371 + partner = rank - nblock;
  372 + tag = 0;
  373 + MPI_Irecv ( &in0, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  374 + &requests[0] );
  375 + }
  376 +/*
  377 + Receive from RIGHT neighbor (1).
  378 +*/
  379 + if ( comm[1] == 1 )
  380 + {
  381 + partner = rank + 1;
  382 + tag = 1;
  383 + MPI_Irecv ( &in1, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  384 + &requests[1] );
  385 + }
  386 +/*
  387 + Receive from DOWN neighbor (2).
  388 +*/
  389 + if ( comm[2] == 1 )
  390 + {
  391 + partner = rank + nblock;
  392 + tag = 2;
  393 + MPI_Irecv ( &in2, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  394 + &requests[2] );
  395 + }
  396 +/*
  397 + Receive from LEFT neighbor (3).
  398 +*/
  399 + if ( comm[3] == 1 )
  400 + {
  401 + partner = rank - 1;
  402 + tag = 3;
  403 + MPI_Irecv ( &in3, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  404 + &requests[3] );
  405 + }
  406 +/*
  407 + Send up from DOWN (2) neighbor.
  408 +*/
  409 + if ( comm[0] == 1 )
  410 + {
  411 + partner = rank - nblock;
  412 + tag = 2;
  413 + setex ( ex0, M, 0 );
  414 + MPI_Isend ( &ex0, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  415 + &requests[4] );
  416 + }
  417 +/*
  418 + Send right form LEFT (3) neighbor.
  419 +*/
  420 + if (comm[1] == 1 )
  421 + {
  422 + partner = rank + 1;
  423 + tag = 3;
  424 + setex ( ex1, M, 1 );
  425 + MPI_Isend ( &ex1, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  426 + &requests[5] );
  427 + }
  428 +/*
  429 + Send down from UP (0) neighbor.
  430 +*/
  431 + if ( comm[2] == 1 )
  432 + {
  433 + partner = rank + nblock;
  434 + tag = 0;
  435 + setex ( ex2, M, 2 );
  436 + MPI_Isend ( &ex2, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  437 + &requests[6] );
  438 + }
  439 +/*
  440 + Send left from RIGHT (1) neighbor.
  441 +*/
  442 + if ( comm[3] == 1 )
  443 + {
  444 + partner = rank - 1;
  445 + tag = 1;
  446 + setex ( ex3, M, 3 );
  447 + MPI_Isend ( &ex3, nodeedge, MPI_DOUBLE, partner, tag, MPI_COMM_WORLD,
  448 + &requests[7] );
  449 + }
  450 +/*
  451 + Wait for all communication to complete.
  452 +*/
  453 + MPI_Waitall ( 8, requests, status );
  454 +/*
  455 + Copy boundary values, sent by neighbors, into M.
  456 +*/
  457 + if ( comm[0] == 1 )
  458 + {
  459 + unpack ( M, 0, in0 );
  460 + }
  461 + if ( comm[1] == 1 )
  462 + {
  463 + unpack ( M, 1, in1 );
  464 + }
  465 + if ( comm[2] == 1 )
  466 + {
  467 + unpack ( M, 2, in2 );
  468 + }
  469 + if ( comm[3] == 1 )
  470 + {
  471 + unpack ( M, 3, in3 );
  472 + }
  473 +
  474 + return;
  475 +}
  476 +/******************************************************************************/
  477 +
  478 +void initialize_matrix ( double M[][nodeedge+2] )
  479 +
  480 +/******************************************************************************/
  481 +/*
  482 + Purpose:
  483 +
  484 + INITIALIZE_MATRIX initializes the partial results array M.
  485 +
  486 + Modified:
  487 +
  488 + 10 January 2012
  489 +
  490 + Author:
  491 +
  492 + Sequential C version by Robb Newman.
  493 + MPI C version by Xianneng Shen.
  494 +
  495 + Parameters:
  496 +
  497 + Output, double M[nodeedge+2][nodeedge+2], the initialized partial
  498 + results array.
  499 +*/
  500 +{
  501 + double avg;
  502 + double bv[4];
  503 + int i;
  504 + int j;
  505 +
  506 + bv[0] = 100.0;
  507 + bv[1] = 0.0;
  508 + bv[2] = 0.0;
  509 + bv[3] = 0.0;
  510 +/*
  511 + Put the boundary values into M.
  512 +*/
  513 + for ( i = 1; i <= nodeedge; i++ )
  514 + {
  515 + M[0][i] = bv[0];
  516 + M[i][nodeedge+1] = bv[1];
  517 + M[nodeedge+1][i] = bv[2];
  518 + M[i][0] = bv[3];
  519 + }
  520 +/*
  521 + Set all interior values to be the average of the boundary values.
  522 +*/
  523 + avg = ( bv[0] + bv[1] + bv[2] + bv[3] ) / 4.0;
  524 +
  525 + for ( i = 1; i <= nodeedge; i++ )
  526 + {
  527 + for ( j = 1; j <= nodeedge; j++ )
  528 + {
  529 + M[i][j] = avg;
  530 + }
  531 + }
  532 +
  533 + return;
  534 +}
  535 +/******************************************************************************/
  536 +
  537 +void iterate ( double w, double M[][nodeedge+2], double result[][n], int rank,
  538 + int comm[] )
  539 +
  540 +/******************************************************************************/
  541 +/*
  542 + Purpose:
  543 +
  544 + ITERATE controls the iteration, including convergence checking.
  545 +
  546 + Modified:
  547 +
  548 + 16 February 2013
  549 +
  550 + Author:
  551 +
  552 + Sequential C version by Robb Newman.
  553 + MPI C version by Xianneng Shen.
  554 +
  555 + Parameters:
  556 +
  557 + Input, double W, the SOR factor, which must be strictly between 0 and 2.
  558 +
  559 + Input/output, double M[nodeedge+2][nodeedge+2], the part of the results
  560 + kept by this process.
  561 +
  562 + Output, double RESULT[n][n], the results for the complete problem,
  563 + kept by process 0.
  564 +
  565 + Input, int RANK, the rank of the process.
  566 +
  567 + Input, int COMM[4], contains a 0 (no) or 1 (yes) if
  568 + communication is needed for the UP(0), RIGHT(1), DOWN(2)
  569 + and LEFT(3) neighbors.
  570 +
  571 + Local parameters:
  572 +
  573 + Local, int COUNT, the length, in elements, of messages.
  574 +
  575 + Local, double DIFF, the average absolute difference in elements
  576 + of M since the last comparison.
  577 +
  578 + Local, int IT, the iteration counter.
  579 +
  580 + Local, double MM[n*n], a vector, used to gather the data from
  581 + all processes. This data is then rearranged into a 2D array.
  582 +*/
  583 +{
  584 + int count;
  585 + double diff;
  586 + int done;
  587 + double ediff;
  588 + int i;
  589 + double in;
  590 + int index;
  591 + int it;
  592 + int j;
  593 + int k;
  594 + int l;
  595 + double MM[n*n];
  596 + double mold[nodeedge+2][nodeedge+2];
  597 + double send[nodeedge][nodeedge];
  598 +
  599 + it = 0;
  600 + done = 0;
  601 + for ( i = 1; i <= nodeedge; i++ )
  602 + {
  603 + for ( j = 1; j <= nodeedge; j++ )
  604 + {
  605 + mold[i][j] = M[i][j];
  606 + }
  607 + }
  608 +
  609 + while ( done == 0 )
  610 + {
  611 + it++;
  612 +/*
  613 + Exchange values with neighbors, update red squares, exchange values
  614 + with neighbors, update black squares.
  615 +*/
  616 + exchange ( M, comm, rank );
  617 + dored ( w, M );
  618 + exchange ( M, comm, rank );
  619 + doblack ( w, M );
  620 +/*
  621 + Check for convergence every 20 iterations.
  622 + Find the average absolute change in elements of M.
  623 + Maximum iterations is 5000.
  624 +*/
  625 + if ( 5000 < it )
  626 + {
  627 + done = 1;
  628 + }
  629 +
  630 + if ( ( it % 20 == 0 ) && ( done != 1 ) )
  631 + {
  632 + diff = 0.0;
  633 + for ( i = 1; i <= nodeedge; i++ )
  634 + {
  635 + for ( j = 1; j <= nodeedge; j++ )
  636 + {
  637 + ediff = M[i][j] - mold[i][j];
  638 + if ( ediff < 0.0 )
  639 + {
  640 + ediff = - ediff;
  641 + }
  642 + diff = diff + ediff;
  643 + mold[i][j] = M[i][j];
  644 + }
  645 + }
  646 + diff = diff / ( ( double ) ( nodeedge * nodeedge ) );
  647 +/*
  648 + IN = sum of DIFF over all processes.
  649 +*/
  650 + MPI_Allreduce ( &diff, &in, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
  651 +
  652 + if ( in < ( double ) nproc * 0.001 )
  653 + {
  654 + done = 1;
  655 + }
  656 + }
  657 + }
  658 +/*
  659 + Send results to task 0.
  660 +*/
  661 + for ( i = 0; i < nodeedge; i++ )
  662 + {
  663 + for ( j = 0; j < nodeedge; j++ )
  664 + {
  665 + send[i][j] = M[i+1][j+1];
  666 + }
  667 + }
  668 +
  669 + count = nodeedge * nodeedge;
  670 +
  671 + MPI_Gather ( &send, count, MPI_DOUBLE, &MM, count, MPI_DOUBLE, 0,
  672 + MPI_COMM_WORLD );
  673 +
  674 + printf ( " ITERATE gathered updated results to process 0.\n" );
  675 +/*
  676 + Storage on task 0 has to be consistent with a NBLOCK x NBLOCK decomposition.
  677 +
  678 + I believe the array form of RESULT is only needed at the end of the
  679 + program (and not even then, really). So we could probably skip this
  680 + work of rearranging the data here. JVB, 11 January 2012.
  681 +*/
  682 + if ( rank == 0 )
  683 + {
  684 + printf ( "did %i iterations\n", it );
  685 +
  686 + index = 0;
  687 + for ( k = 0; k < nblock; k++ )
  688 + {
  689 + for ( l = 0; l < nblock; l++ )
  690 + {
  691 + for ( i = k * nodeedge; i < ( k + 1 ) * nodeedge; i++ )
  692 + {
  693 + for ( j = l * nodeedge; j < ( l + 1 ) * nodeedge; j++ )
  694 + {
  695 + result[i][j] = MM[index];
  696 + index++;
  697 + }
  698 + }
  699 + }
  700 + }
  701 + }
  702 + return;
  703 +}
  704 +/******************************************************************************/
  705 +
  706 +void setcomm ( int rank, int comm[] )
  707 +
  708 +/******************************************************************************/
  709 +/*
  710 + Purpose:
  711 +
  712 + SETCOMM determines the active communication directions.
  713 +
  714 + Discussion:
  715 +
  716 + In this picture, we're assuming the RESULTS array is split among
  717 + four processes, numbered 0 through 3 and arranged as suggested by the
  718 + following:
  719 +
  720 + 0 | 1
  721 + ------+-------
  722 + 2 | 3
  723 +
  724 + Then process 0 must communicate with processes 1 and 2 only,
  725 + so its COMM array would be { 0, 1, 1, 0 }.
  726 +
  727 + Modified:
  728 +
  729 + 14 November 2011
  730 +
  731 + Author:
  732 +
  733 + Sequential C version by Robb Newman.
  734 + MPI C version by Xianneng Shen.
  735 +
  736 + Parameters:
  737 +
  738 + Input, int RANK, the rank of the process.
  739 +
  740 + Output, int COMM[4], contains a 0 (no) or 1 (yes) if
  741 + communication is needed for the UP(0), RIGHT(1), DOWN(2)
  742 + and LEFT(3) neighbors.
  743 +*/
  744 +{
  745 + int i;
  746 +/*
  747 + Start out by assuming all four neighbors exist.
  748 +*/
  749 + for ( i = 0; i < 4; i++ )
  750 + {
  751 + comm[i] = 1;
  752 + }
  753 +/*
  754 + Up neighbor?
  755 +*/
  756 + if ( rank < nblock )
  757 + {
  758 + comm[0] = 0;
  759 + }
  760 +/*
  761 + Right neighbor?
  762 +*/
  763 + if ( ( rank + 1 ) % nblock == 0 )
  764 + {
  765 + comm[1] = 0;
  766 + }
  767 +/*
  768 + Down neighbor?
  769 +*/
  770 + if ( rank > (nblock*(nblock-1)-1) )
  771 + {
  772 + comm[2] = 0;
  773 + }
  774 +/*
  775 + Left neighbor?
  776 +*/
  777 + if ( ( rank % nblock ) == 0 )
  778 + {
  779 + comm[3] = 0;
  780 + }
  781 +
  782 + return;
  783 +}
  784 +/******************************************************************************/
  785 +
  786 +void setex ( double ex[], double M[][nodeedge+2], int which )
  787 +
  788 +/******************************************************************************/
  789 +/*
  790 + Purpose:
  791 +
  792 + SETEX pulls off the edge values of M to send to another task.
  793 +
  794 + Modified:
  795 +
  796 + 14 November 2011
  797 +
  798 + Author:
  799 +
  800 + Sequential C version by Robb Newman.
  801 + MPI C version by Xianneng Shen.
  802 +
  803 + Parameters:
  804 +
  805 + Output, double EX[NODEEDGE], the values to be exchanged.
  806 +
  807 + Input, double M[nodeedge+2][nodeedge+2], the part of the results
  808 + kept by this process.
  809 +
  810 + Input, int WHICH, 0, 1, 2, or 3, indicates the edge from which
  811 + the data is to be copied.
  812 +*/
  813 +{
  814 + int i;
  815 +
  816 + switch ( which )
  817 + {
  818 + case 0:
  819 + {
  820 + for ( i = 1; i <= nodeedge; i++)
  821 + {
  822 + ex[i-1] = M[1][i];
  823 + }
  824 + break;
  825 + }
  826 + case 1:
  827 + {
  828 + for ( i = 1; i <= nodeedge; i++)
  829 + {
  830 + ex[i-1] = M[i][nodeedge];
  831 + }
  832 + break;
  833 + }
  834 + case 2:
  835 + {
  836 + for ( i = 1; i <= nodeedge; i++)
  837 + {
  838 + ex[i-1] = M[nodeedge][i];
  839 + }
  840 + break;
  841 + }
  842 + case 3:
  843 + {
  844 + for ( i = 1; i <= nodeedge; i++)
  845 + {
  846 + ex[i-1] = M[i][1];
  847 + }
  848 + break;
  849 + }
  850 + }
  851 + return;
  852 +}
  853 +/******************************************************************************/
  854 +
  855 +void unpack ( double M[][nodeedge+2], int where, double in[] )
  856 +
  857 +/******************************************************************************/
  858 +/*
  859 + Purpose:
  860 +
  861 + UNPACK puts the vector of new edge values into the edges of M.
  862 +
  863 + Modified:
  864 +
  865 + 14 November 2011
  866 +
  867 + Author:
  868 +
  869 + Sequential C version by Robb Newman.
  870 + MPI C version by Xianneng Shen.
  871 +
  872 + Parameters:
  873 +
  874 + Output, double M[nodeedge+2][nodeedge+2], the part of the results
  875 + kept by this process.
  876 +
  877 + Input, int WHERE, 0, 1, 2, or 3, indicates the edge to which the
  878 + data is to be applied.
  879 +
  880 + Input, int IN[nodeedge], the boundary data.
  881 +*/
  882 +{
  883 + int i;
  884 +
  885 + if ( where == 0 )
  886 + {
  887 + for ( i = 0; i < nodeedge; i++ )
  888 + {
  889 + M[0][i+1] = in[i];
  890 + }
  891 + }
  892 + else if ( where == 1 )
  893 + {
  894 + for ( i = 0; i < nodeedge; i++ )
  895 + {
  896 + M[i+1][nodeedge+1] = in[i];
  897 + }
  898 + }
  899 + else if ( where == 2 )
  900 + {
  901 + for ( i = 0; i < nodeedge; i++ )
  902 + {
  903 + M[nodeedge+1][i+1] = in[i];
  904 + }
  905 + }
  906 + else if ( where == 3 )
  907 + {
  908 + for ( i = 0; i < nodeedge; i++ )
  909 + {
  910 + M[i+1][0] = in[i];
  911 + }
  912 + }
  913 +
  914 + return;
  915 +}
... ...
mpi_heat2d.c 0 → 100644
  1 +++ a/mpi_heat2d.c
... ... @@ -0,0 +1,259 @@
  1 +/****************************************************************************
  2 + * * FILE: mpi_heat2D.c
  3 + * * OTHER FILES: draw_heat.c
  4 + * * DESCRIPTIONS:
  5 + * * HEAT2D Example - Parallelized C Version
  6 + * * This example is based on a simplified two-dimensional heat
  7 + * * equation domain decomposition. The initial temperature is computed to be
  8 + * * high in the middle of the domain and zero at the boundaries. The
  9 + * * boundaries are held at zero throughout the simulation. During the
  10 + * * time-stepping, an array containing two domains is used; these domains
  11 + * * alternate between old data and new data.
  12 + * *
  13 + * * In this parallelized version, the grid is decomposed by the master
  14 + * * process and then distributed by rows to the worker processes. At each
  15 + * * time step, worker processes must exchange border data with neighbors,
  16 + * * because a grid point's current temperature depends upon it's previous
  17 + * * time step value plus the values of the neighboring grid points. Upon
  18 + * * completion of all time steps, the worker processes return their results
  19 + * * to the master process.
  20 + * *
  21 + * * Two data files are produced: an initial data set and a final data set.
  22 + * * An X graphic of these two states displays after all calculations have
  23 + * * completed.
  24 + * * AUTHOR: Blaise Barney - adapted from D. Turner's serial C version. Converted
  25 + * * to MPI: George L. Gusciora (1/95)
  26 + * * LAST REVISED: 06/12/13 Blaise Barney
  27 + * ****************************************************************************/
  28 +#include "mpi.h"
  29 +#include <stdio.h>
  30 +#include <stdlib.h>
  31 +//extern void draw_heat(int nx, int ny); /* X routine to create graph */
  32 +
  33 +#define NXPROB 200000 /* x dimension of problem grid */
  34 +#define NYPROB 20 /* y dimension of problem grid */
  35 +#define STEPS 1000 /* number of time steps */
  36 +#define MAXWORKER 8 /* maximum number of worker tasks */
  37 +#define MINWORKER 3 /* minimum number of worker tasks */
  38 +#define BEGIN 1 /* message tag */
  39 +#define LTAG 2 /* message tag */
  40 +#define RTAG 3 /* message tag */
  41 +#define NONE 0 /* indicates no neighbor */
  42 +#define DONE 4 /* message tag */
  43 +#define MASTER 0 /* taskid of first process */
  44 +
  45 +struct Parms {
  46 + float cx;
  47 + float cy;
  48 +} parms = {0.1, 0.1};