MPI Deadlock in a 2D Cellular Automaton

39 views Asked by At

I am trying to create a 2D Cellular Automaton Simulation using C and MPI, and everything is working as expected till I get to the MPI Communication between processes using nonblocking communication, and it ends in a deadlock. I've tried to isolate the tags and log error messages, but it still ends in a deadlock, and I have no idea why, as I feel I am doing everything correctly

Based on the logging, it seems that the messages are initiated but not received by the ranks, and that is the reason for the deadlock; each rank initiates the message, but no rank receives it, and due to that, all messages are at a standstill.

Below is the code plus the log messages; if anyone could help me with this, it would be very appreciated.

cellular.c

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

// Assuming "automaton.h" contains the declarations for uni() and rinit()
#include "automaton.h"

#define DIM 10
#define MASTER 0
#define HALO 1 // One halo on each side
#define UP    0
#define DOWN  1
#define LEFT  2
#define RIGHT 3

void print_buffer_contents(const char* label, int rank, const int* buffer, int count) {
    printf("Rank %d: %s buffer contents: ", rank, label);
    for (int i = 0; i < count; ++i) {
        printf("%d ", buffer[i]);
    }
    printf("\n");
}

void populate_send_buffers(int *sub_array, int *send_up, int *send_down, int *send_left, int *send_right, int rows, int cols) {
    int base_rows = rows - 2 * HALO; // Adjusting for halo
    int base_cols = cols - 2 * HALO; // Adjusting for halo
  for (int j = HALO; j < HALO + cols - 2*HALO; ++j) {
        send_up[j - HALO] = sub_array[HALO * cols + j];
    }

    // Populate the send_down buffer with the bottommost row of the actual data
    for (int j = HALO; j < HALO + cols - 2*HALO; ++j) {
        send_down[j - HALO] = sub_array[(HALO + rows - 2*HALO - 1) * cols + j];
    }

    // Populate the send_left buffer with the leftmost column of the actual data
    for (int i = HALO; i < HALO + rows - 2*HALO; ++i) {
        send_left[i - HALO] = sub_array[i * cols + HALO];
    }

    // Populate the send_right buffer with the rightmost column of the actual data
    for (int i = HALO; i < HALO + rows - 2*HALO; ++i) {
        send_right[i - HALO] = sub_array[i * cols + HALO + cols - 2*HALO - 1];
    }
}


int main(int argc, char* argv[]) {

    int seed;
    double rho = 0.51;
    int i, j, ncell = 0; int outbuf, step, maxstep, printfreq, source, dest, localncell;
    double r;

    int nbrs[4];
    int inbuf[4] = {MPI_PROC_NULL,MPI_PROC_NULL,MPI_PROC_NULL,MPI_PROC_NULL};
    MPI_Init(&argc, &argv);

    int rank, size;
    MPI_Comm cart_comm; // Cartesian communicator
    int dims[2] = {2, 2}; // 2x2 Cartesian topology
    int periods[2] = {0, 0};
    int reorder = 1;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);


   maxstep = 10*DIM;
   printfreq = 500;

    if (size != 4) {
        fprintf(stderr, "This program is designed to run with 4 MPI processes.\n");
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }

    if (argc != 2) {
        if (rank == MASTER) {
            printf("Usage: automaton <seed>\n");
        }
        MPI_Finalize();
        return 1;
    }

    seed = atoi(argv[1]);
    rinit(seed);

    int cells_per_process = (DIM * DIM) / size;

    // Advance the RNG state to this rank's starting point
    int offset = rank * cells_per_process;
    for (int skip = 0; skip < offset; ++skip) {
        uni(); // Advance the RNG by calling uni()
    }


    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, reorder, &cart_comm);

    int up, down, left, right;
    MPI_Cart_shift(cart_comm, 0, 1, &up, &down);
    MPI_Cart_shift(cart_comm, 1, 1, &left, &right);

    MPI_Datatype column_type;
    MPI_Type_vector(DIM / 2, 1, DIM + 2 * HALO, MPI_INT, &column_type);
    MPI_Type_commit(&column_type);

    int base_rows = DIM / 2; // Assuming a 2x2 grid
    int base_cols = DIM / 2;
    int rows = base_rows + 2 * HALO; // Adding halos
    int cols = base_cols + 2 * HALO;
    int sub_array[rows][cols];
    int neighbors[rows][cols];

    // Initialize the sub-array with advanced RNG state
    ncell = 0;
    for (i = HALO; i < rows - HALO; ++i) {
        for (j = HALO; j < cols - HALO; ++j) {
            r = uni();
            if (r < rho) {
                sub_array[i][j] = 1;
                ncell++;
            } else {
                sub_array[i][j] = 0;
            }
        }
    }

    int total_ncell;
    MPI_Reduce(&ncell, &total_ncell, 1, MPI_INT, MPI_SUM, MASTER, cart_comm);

    if (rank == MASTER) {
        int cell[DIM][DIM] = {0};
        printf("Total number of cells (ncell): %d\n", total_ncell);


        // Copy the master's portion (excluding halos)
        for (i = HALO; i < rows - HALO; ++i) {
            for (j = HALO; j < cols - HALO; ++j) {
                cell[i - HALO][j - HALO] = sub_array[i][j];
            }
        }
    }

        MPI_Request reqs[8]; 
        MPI_Status stats[8];
        int num_reqs = 0; 

        for (step = 1; step <= maxstep; step++) {

            int send_up[base_cols], send_down[base_cols], send_left[base_rows], send_right[base_rows];
            int recv_up[base_cols], recv_down[base_cols], recv_left[base_rows], recv_right[base_rows];
    
            // Assuming the existence of a function to populate the send buffers...
            // Assuming the existence of a function to populate the send buffers...
            populate_send_buffers(&sub_array[0][0], send_up, send_down, send_left, send_right, rows, cols);

            MPI_Request requests[8]; // 2 requests per direction (send and receive), for 4 directions
            int request_count = 0;

                print_buffer_contents("send_up", rank, send_up, base_cols);
                print_buffer_contents("send_down", rank, send_down, base_cols);
                print_buffer_contents("send_left", rank, send_left, base_rows);
                print_buffer_contents("send_right", rank, send_right, base_rows);
            // Non-blocking exchange rows with up and down neighbors
            if (up != MPI_PROC_NULL) {
                printf("Rank %d: Starting Isend to UP (%d) Buffer size: %d\n", rank, up, base_cols);
                MPI_Isend(send_up, base_cols, MPI_INT, up, UP, cart_comm, &requests[request_count++]);
                printf("Rank %d: Starting Irecv from UP (%d) Buffer size: %d\n", rank, up, base_cols);
                MPI_Irecv(recv_up, base_cols, MPI_INT, up, UP, cart_comm, &requests[request_count++]);
                }
            if (down != MPI_PROC_NULL) {
                printf("Rank %d: Starting Isend to DOWN (%d) Buffer size: %d\n", rank, down, base_cols);
                MPI_Isend(send_down, base_cols, MPI_INT, down, DOWN, cart_comm, &requests[request_count++]);
                printf("Rank %d: Starting Irecv from DOWN (%d) Buffer size: %d\n", rank, down, base_cols);
                MPI_Irecv(recv_down, base_cols, MPI_INT, down, DOWN, cart_comm, &requests[request_count++]);
            }
            if (left != MPI_PROC_NULL) {
                printf("Rank %d: Starting Isend to LEFT (%d) Buffer size: %d\n", rank, left, base_rows);
                MPI_Isend(send_left, base_rows, MPI_INT, left, LEFT, cart_comm, &requests[request_count++]);
                printf("Rank %d: Starting Irecv from LEFT (%d) Buffer Size: %d\n", rank, left, base_rows);
                MPI_Irecv(recv_left, base_rows, MPI_INT, left, LEFT, cart_comm, &requests[request_count++]);
            }
            if (right != MPI_PROC_NULL) {
                printf("Rank %d: Starting Isend to RIGHT (%d)\n", rank, right);
                MPI_Isend(send_right, base_rows, MPI_INT, right, RIGHT, cart_comm, &requests[request_count++]);
                printf("Rank %d: Starting Irecv from RIGHT (%d)\n", rank, right);
                MPI_Irecv(recv_right, base_rows, MPI_INT, right, RIGHT, cart_comm, &requests[request_count++]);
        }

            // Wait for all non-blocking operations to complete
            printf("Rank %d: Waiting for all non-blocking operations to complete\n", rank);
            MPI_Waitall(request_count, requests, MPI_STATUSES_IGNORE);
            printf("Rank %d: All non-blocking operations completed\n", rank);

            // Continue with simulation logic for this step.

    }

    MPI_Finalize();
    return 0;

    }

unirand.c


/*
 *  C version of Marsaglia's UNI random number generator
 *  More or less transliterated from the Fortran -- with 1 bug fix
 *  Hence horrible style
 *
 *  Features:
 *      ANSI C
 *      not callable from Fortran (yet)
 */

#include <stdio.h>
#include <stdlib.h>

/*
 *  Global variables for rstart & uni
 */

float uni_u[98];    /* Was U(97) in Fortran version -- too lazy to fix */
float uni_c, uni_cd, uni_cm;
int uni_ui, uni_uj;

float uni(void)
{
    float luni;         /* local variable for uni */

    luni = uni_u[uni_ui] - uni_u[uni_uj];
    if (luni < 0.0)
        luni += 1.0;
    uni_u[uni_ui] = luni;
    if (--uni_ui == 0)
        uni_ui = 97;
    if (--uni_uj == 0)
        uni_uj = 97;
    if ((uni_c -= uni_cd) < 0.0)
        uni_c += uni_cm;
    if ((luni -= uni_c) < 0.0)
        luni += 1.0;
    return (float) luni;
}

void rstart(int i, int j, int k, int l)
{
    int ii, jj, m;
    float s, t;

    for (ii = 1; ii <= 97; ii++) {
        s = 0.0;
        t = 0.5;
        for (jj = 1; jj <= 24; jj++) {
            m = ((i*j % 179) * k) % 179;
            i = j;
            j = k;
            k = m;
            l = (53*l+1) % 169;
            if (l*m % 64 >= 32)
                s += t;
            t *= 0.5;
        }
        uni_u[ii] = s;
    }
    uni_c  = 362436.0   / 16777216.0;
    uni_cd = 7654321.0  / 16777216.0;
    uni_cm = 16777213.0 / 16777216.0;
    uni_ui = 97;    /*  There is a bug in the original Fortran version */
    uni_uj = 33;    /*  of UNI -- i and j should be SAVEd in UNI()     */
}


/* ~rinit: this takes a single integer in the range
        0 <= ijkl <= 900 000 000
    and produces the four smaller integers needed for rstart. It is
 *  based on the ideas contained in the RMARIN subroutine in
 *      F. James, "A Review of Pseudorandom Number Generators",
 *          Comp. Phys. Commun. Oct 1990, p.340
 *  To reduce the modifications to the existing code, rinit now
 *  takes the role of a preprocessor for rstart.
 *
 *  This is useful for the parallel version of the code as James
 *  states that any integer ijkl will produce a statistically
 *  independent sequence of random numbers.
 *
 *     Very funny. If that statement was worth anything he would have provided
 *     a proof to go with it. spb 12/12/90 
 */

void rinit(int ijkl)
{
    int i, j, k, l, ij, kl;

    /* check ijkl is within range */
    if( (ijkl < 0) || (ijkl > 900000000) )
        {
        printf("rinit: ijkl = %d -- out of range\n\n", ijkl);
        exit(3);
                }

/*        printf("rinit: seed_ijkl = %d\n", ijkl); */

    /* decompose the long integer into the the equivalent four
     * integers for rstart. This should be a 1-1 mapping
     *  ijkl <--> (i, j, k, l)
     * though not quite all of the possible sets of (i, j, k, l)
     * can be produced.
     */

    ij = ijkl/30082;
    kl = ijkl - (30082 * ij);

    i = ((ij/177) % 177) + 2;
    j = (ij % 177) + 2;
    k = ((kl/169) % 178) + 1;
    l = kl % 169;

    if( (i <= 0) || (i > 178) )
        {
        printf("rinit: i = %d -- out of range\n\n", i);
        exit(3);
                }

    if( (j <= 0) || (j > 178) )
        {
        printf("rinit: j = %d -- out of range\n\n", j);
        exit(3);
                }

    if( (k <= 0) || (k > 178) )
        {
        printf("rinit: k = %d -- out of range\n\n", k);
        exit(3);
                }

    if( (l < 0) || (l > 168) )
        {
        printf("rinit: l = %d -- out of range\n\n", l);
        exit(3);
                }

    if (i == 1 && j == 1 && k == 1)
        {
                printf("rinit: 1 1 1 not allowed for 1st 3 seeds\n\n");
        exit(4);
                }

/*        printf("rinit: initialising RNG via rstart(%d, %d, %d, %d)\n",
                i, j, k, l); */

        rstart(i, j, k, l);

}

To compile use mpicc -o cellular cellular.c unirand.c to run use mpiexec -np 4 cellular 60000

0

There are 0 answers