I am trying to create a 2D Cellular Automaton Simulation using C and MPI, and everything is working as expected till I get to the MPI Communication between processes using nonblocking communication, and it ends in a deadlock. I've tried to isolate the tags and log error messages, but it still ends in a deadlock, and I have no idea why, as I feel I am doing everything correctly
Based on the logging, it seems that the messages are initiated but not received by the ranks, and that is the reason for the deadlock; each rank initiates the message, but no rank receives it, and due to that, all messages are at a standstill.
Below is the code plus the log messages; if anyone could help me with this, it would be very appreciated.
cellular.c
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
// Assuming "automaton.h" contains the declarations for uni() and rinit()
#include "automaton.h"
#define DIM 10
#define MASTER 0
#define HALO 1 // One halo on each side
#define UP 0
#define DOWN 1
#define LEFT 2
#define RIGHT 3
void print_buffer_contents(const char* label, int rank, const int* buffer, int count) {
printf("Rank %d: %s buffer contents: ", rank, label);
for (int i = 0; i < count; ++i) {
printf("%d ", buffer[i]);
}
printf("\n");
}
void populate_send_buffers(int *sub_array, int *send_up, int *send_down, int *send_left, int *send_right, int rows, int cols) {
int base_rows = rows - 2 * HALO; // Adjusting for halo
int base_cols = cols - 2 * HALO; // Adjusting for halo
for (int j = HALO; j < HALO + cols - 2*HALO; ++j) {
send_up[j - HALO] = sub_array[HALO * cols + j];
}
// Populate the send_down buffer with the bottommost row of the actual data
for (int j = HALO; j < HALO + cols - 2*HALO; ++j) {
send_down[j - HALO] = sub_array[(HALO + rows - 2*HALO - 1) * cols + j];
}
// Populate the send_left buffer with the leftmost column of the actual data
for (int i = HALO; i < HALO + rows - 2*HALO; ++i) {
send_left[i - HALO] = sub_array[i * cols + HALO];
}
// Populate the send_right buffer with the rightmost column of the actual data
for (int i = HALO; i < HALO + rows - 2*HALO; ++i) {
send_right[i - HALO] = sub_array[i * cols + HALO + cols - 2*HALO - 1];
}
}
int main(int argc, char* argv[]) {
int seed;
double rho = 0.51;
int i, j, ncell = 0; int outbuf, step, maxstep, printfreq, source, dest, localncell;
double r;
int nbrs[4];
int inbuf[4] = {MPI_PROC_NULL,MPI_PROC_NULL,MPI_PROC_NULL,MPI_PROC_NULL};
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm cart_comm; // Cartesian communicator
int dims[2] = {2, 2}; // 2x2 Cartesian topology
int periods[2] = {0, 0};
int reorder = 1;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
maxstep = 10*DIM;
printfreq = 500;
if (size != 4) {
fprintf(stderr, "This program is designed to run with 4 MPI processes.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
if (argc != 2) {
if (rank == MASTER) {
printf("Usage: automaton <seed>\n");
}
MPI_Finalize();
return 1;
}
seed = atoi(argv[1]);
rinit(seed);
int cells_per_process = (DIM * DIM) / size;
// Advance the RNG state to this rank's starting point
int offset = rank * cells_per_process;
for (int skip = 0; skip < offset; ++skip) {
uni(); // Advance the RNG by calling uni()
}
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, reorder, &cart_comm);
int up, down, left, right;
MPI_Cart_shift(cart_comm, 0, 1, &up, &down);
MPI_Cart_shift(cart_comm, 1, 1, &left, &right);
MPI_Datatype column_type;
MPI_Type_vector(DIM / 2, 1, DIM + 2 * HALO, MPI_INT, &column_type);
MPI_Type_commit(&column_type);
int base_rows = DIM / 2; // Assuming a 2x2 grid
int base_cols = DIM / 2;
int rows = base_rows + 2 * HALO; // Adding halos
int cols = base_cols + 2 * HALO;
int sub_array[rows][cols];
int neighbors[rows][cols];
// Initialize the sub-array with advanced RNG state
ncell = 0;
for (i = HALO; i < rows - HALO; ++i) {
for (j = HALO; j < cols - HALO; ++j) {
r = uni();
if (r < rho) {
sub_array[i][j] = 1;
ncell++;
} else {
sub_array[i][j] = 0;
}
}
}
int total_ncell;
MPI_Reduce(&ncell, &total_ncell, 1, MPI_INT, MPI_SUM, MASTER, cart_comm);
if (rank == MASTER) {
int cell[DIM][DIM] = {0};
printf("Total number of cells (ncell): %d\n", total_ncell);
// Copy the master's portion (excluding halos)
for (i = HALO; i < rows - HALO; ++i) {
for (j = HALO; j < cols - HALO; ++j) {
cell[i - HALO][j - HALO] = sub_array[i][j];
}
}
}
MPI_Request reqs[8];
MPI_Status stats[8];
int num_reqs = 0;
for (step = 1; step <= maxstep; step++) {
int send_up[base_cols], send_down[base_cols], send_left[base_rows], send_right[base_rows];
int recv_up[base_cols], recv_down[base_cols], recv_left[base_rows], recv_right[base_rows];
// Assuming the existence of a function to populate the send buffers...
// Assuming the existence of a function to populate the send buffers...
populate_send_buffers(&sub_array[0][0], send_up, send_down, send_left, send_right, rows, cols);
MPI_Request requests[8]; // 2 requests per direction (send and receive), for 4 directions
int request_count = 0;
print_buffer_contents("send_up", rank, send_up, base_cols);
print_buffer_contents("send_down", rank, send_down, base_cols);
print_buffer_contents("send_left", rank, send_left, base_rows);
print_buffer_contents("send_right", rank, send_right, base_rows);
// Non-blocking exchange rows with up and down neighbors
if (up != MPI_PROC_NULL) {
printf("Rank %d: Starting Isend to UP (%d) Buffer size: %d\n", rank, up, base_cols);
MPI_Isend(send_up, base_cols, MPI_INT, up, UP, cart_comm, &requests[request_count++]);
printf("Rank %d: Starting Irecv from UP (%d) Buffer size: %d\n", rank, up, base_cols);
MPI_Irecv(recv_up, base_cols, MPI_INT, up, UP, cart_comm, &requests[request_count++]);
}
if (down != MPI_PROC_NULL) {
printf("Rank %d: Starting Isend to DOWN (%d) Buffer size: %d\n", rank, down, base_cols);
MPI_Isend(send_down, base_cols, MPI_INT, down, DOWN, cart_comm, &requests[request_count++]);
printf("Rank %d: Starting Irecv from DOWN (%d) Buffer size: %d\n", rank, down, base_cols);
MPI_Irecv(recv_down, base_cols, MPI_INT, down, DOWN, cart_comm, &requests[request_count++]);
}
if (left != MPI_PROC_NULL) {
printf("Rank %d: Starting Isend to LEFT (%d) Buffer size: %d\n", rank, left, base_rows);
MPI_Isend(send_left, base_rows, MPI_INT, left, LEFT, cart_comm, &requests[request_count++]);
printf("Rank %d: Starting Irecv from LEFT (%d) Buffer Size: %d\n", rank, left, base_rows);
MPI_Irecv(recv_left, base_rows, MPI_INT, left, LEFT, cart_comm, &requests[request_count++]);
}
if (right != MPI_PROC_NULL) {
printf("Rank %d: Starting Isend to RIGHT (%d)\n", rank, right);
MPI_Isend(send_right, base_rows, MPI_INT, right, RIGHT, cart_comm, &requests[request_count++]);
printf("Rank %d: Starting Irecv from RIGHT (%d)\n", rank, right);
MPI_Irecv(recv_right, base_rows, MPI_INT, right, RIGHT, cart_comm, &requests[request_count++]);
}
// Wait for all non-blocking operations to complete
printf("Rank %d: Waiting for all non-blocking operations to complete\n", rank);
MPI_Waitall(request_count, requests, MPI_STATUSES_IGNORE);
printf("Rank %d: All non-blocking operations completed\n", rank);
// Continue with simulation logic for this step.
}
MPI_Finalize();
return 0;
}
unirand.c
/*
* C version of Marsaglia's UNI random number generator
* More or less transliterated from the Fortran -- with 1 bug fix
* Hence horrible style
*
* Features:
* ANSI C
* not callable from Fortran (yet)
*/
#include <stdio.h>
#include <stdlib.h>
/*
* Global variables for rstart & uni
*/
float uni_u[98]; /* Was U(97) in Fortran version -- too lazy to fix */
float uni_c, uni_cd, uni_cm;
int uni_ui, uni_uj;
float uni(void)
{
float luni; /* local variable for uni */
luni = uni_u[uni_ui] - uni_u[uni_uj];
if (luni < 0.0)
luni += 1.0;
uni_u[uni_ui] = luni;
if (--uni_ui == 0)
uni_ui = 97;
if (--uni_uj == 0)
uni_uj = 97;
if ((uni_c -= uni_cd) < 0.0)
uni_c += uni_cm;
if ((luni -= uni_c) < 0.0)
luni += 1.0;
return (float) luni;
}
void rstart(int i, int j, int k, int l)
{
int ii, jj, m;
float s, t;
for (ii = 1; ii <= 97; ii++) {
s = 0.0;
t = 0.5;
for (jj = 1; jj <= 24; jj++) {
m = ((i*j % 179) * k) % 179;
i = j;
j = k;
k = m;
l = (53*l+1) % 169;
if (l*m % 64 >= 32)
s += t;
t *= 0.5;
}
uni_u[ii] = s;
}
uni_c = 362436.0 / 16777216.0;
uni_cd = 7654321.0 / 16777216.0;
uni_cm = 16777213.0 / 16777216.0;
uni_ui = 97; /* There is a bug in the original Fortran version */
uni_uj = 33; /* of UNI -- i and j should be SAVEd in UNI() */
}
/* ~rinit: this takes a single integer in the range
0 <= ijkl <= 900 000 000
and produces the four smaller integers needed for rstart. It is
* based on the ideas contained in the RMARIN subroutine in
* F. James, "A Review of Pseudorandom Number Generators",
* Comp. Phys. Commun. Oct 1990, p.340
* To reduce the modifications to the existing code, rinit now
* takes the role of a preprocessor for rstart.
*
* This is useful for the parallel version of the code as James
* states that any integer ijkl will produce a statistically
* independent sequence of random numbers.
*
* Very funny. If that statement was worth anything he would have provided
* a proof to go with it. spb 12/12/90
*/
void rinit(int ijkl)
{
int i, j, k, l, ij, kl;
/* check ijkl is within range */
if( (ijkl < 0) || (ijkl > 900000000) )
{
printf("rinit: ijkl = %d -- out of range\n\n", ijkl);
exit(3);
}
/* printf("rinit: seed_ijkl = %d\n", ijkl); */
/* decompose the long integer into the the equivalent four
* integers for rstart. This should be a 1-1 mapping
* ijkl <--> (i, j, k, l)
* though not quite all of the possible sets of (i, j, k, l)
* can be produced.
*/
ij = ijkl/30082;
kl = ijkl - (30082 * ij);
i = ((ij/177) % 177) + 2;
j = (ij % 177) + 2;
k = ((kl/169) % 178) + 1;
l = kl % 169;
if( (i <= 0) || (i > 178) )
{
printf("rinit: i = %d -- out of range\n\n", i);
exit(3);
}
if( (j <= 0) || (j > 178) )
{
printf("rinit: j = %d -- out of range\n\n", j);
exit(3);
}
if( (k <= 0) || (k > 178) )
{
printf("rinit: k = %d -- out of range\n\n", k);
exit(3);
}
if( (l < 0) || (l > 168) )
{
printf("rinit: l = %d -- out of range\n\n", l);
exit(3);
}
if (i == 1 && j == 1 && k == 1)
{
printf("rinit: 1 1 1 not allowed for 1st 3 seeds\n\n");
exit(4);
}
/* printf("rinit: initialising RNG via rstart(%d, %d, %d, %d)\n",
i, j, k, l); */
rstart(i, j, k, l);
}
To compile use mpicc -o cellular cellular.c unirand.c to run use mpiexec -np 4 cellular 60000