libgomp: Thread creation failed: Resource temporarily unavailable OpenMP parallel Merge Sorting

1.6k views Asked by At

I am new to OpenMP. I was trying to implement a parallel version of merge sorting. The code of my serial implementation is the same of this, but I did not use the parallelMergeSort function. My parallel implementation is the following:

//PARALLEL MERGE SORT
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<omp.h>

void merge(int arr[], int indexA, int indexB, int end, int arrOut[]);
void mergeSort(int arr[], int inf, int sup, int arrOut[]);
void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int threads);


int main(int argc, char *argv[]){
    int threads, availableThreads;
    int N = 10;
    int my_array[N];
    int outputArray[N];
    int length = sizeof(my_array) / sizeof(my_array[0]);
    srand(time(NULL));
    int i;
    for (i=0; i<N; i++){
        my_array[i]=rand()%100 + 1;
    }
    //print the array 
    for (i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }   

    availableThreads=omp_get_max_threads();
    printf("Numero massimo di threads disponibile: %d\n", availableThreads);

    printf("Inserisci numero di threads non superiore a %d: \n", availableThreads);
    scanf("%d", &threads);

    omp_set_nested(1);
    if (omp_get_nested() != 1)
        printf("Errore nested parallelism\n");

    int processori = omp_get_num_procs(); 
    printf("Processori: %d\n", processori);

    if(threads>processori)
        omp_set_num_threads(threads);

    printf("\n--------------\n");

    double start=omp_get_wtime();
    parallelMergeSort(my_array, 0, length-1, outputArray, threads); 
    double end=omp_get_wtime();
    printf("Time: %f", end-start);

    for(i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }
    printf("\n");
} 



void merge(int arr[], int indexA, int indexB, int end, int arrOut[]){
    int i=indexA, j=indexB, k=indexA;
    while(i<=indexB-1 && j<=end){
        if(arr[i]<arr[j]){
            //i=i+1;
            arrOut[k]=arr[i++];
        }
        else{
            //j=j+1;
            arrOut[k]=arr[j++];
        }
        k++;
    }
    while(i<=indexB-1){
        //i++;
        arrOut[k]=arr[i++];
        k++;
    }
    while(j<=end){
        //j++;
        arrOut[k]=arr[j++];
        k++;
    }
    for(i=indexA; i<=end; i++)
        arr[i]=arrOut[i];
}

void mergeSort(int arr[], int inf, int sup, int arrOut[]){
    int medium;
    if(inf<sup){
        medium=(inf+sup)/2;
        mergeSort(arr, inf, medium, arrOut);
        mergeSort(arr, medium+1, sup, arrOut);
        merge(arr, inf, medium+1, sup, arrOut);
    }
}

void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int threads){
    if (threads==1)
        mergeSort(arr, inf, sup, arrOut);
    else if(threads>1){
        #pragma omp parallel sections
        {
            #pragma omp section
            {
            parallelMergeSort(arr, inf, (inf+sup)/2, arrOut, threads);}
            #pragma omp section
            {
            parallelMergeSort(arr, (inf+sup)/2 + 1, sup, arrOut, threads-threads/2);}
        }
        
    }
}

I have an error in the parallelMergeSort function, because the printf("\n--------------\n"); is printed. The error is the one in title: libgomp: Thread creation failed: Resource temporarily unavailable. I don't know where is the error. I tried to implement following this example: Parallel Merge-Sort in OpenMP I do no think that is a problem of resources because simple programs work. Obviously i made some errors in that function...

Edit: this happens both if i use 2 or 3, or 4 threads, and so on.

EDIT: If I use OMP_DISPLAY_ENV=TRUE

OPENMP DISPLAY ENVIRONMENT BEGIN
  _OPENMP = '201511'
  OMP_DYNAMIC = 'FALSE'
  OMP_NESTED = 'FALSE'
  OMP_NUM_THREADS = '8'
  OMP_SCHEDULE = 'DYNAMIC'
  OMP_PROC_BIND = 'FALSE'
  OMP_PLACES = ''
  OMP_STACKSIZE = '140422814161920'
  OMP_WAIT_POLICY = 'PASSIVE'
  OMP_THREAD_LIMIT = '4294967295'
  OMP_MAX_ACTIVE_LEVELS = '2147483647'
  OMP_CANCELLATION = 'FALSE'
  OMP_DEFAULT_DEVICE = '0'
  OMP_MAX_TASK_PRIORITY = '0'
OPENMP DISPLAY ENVIRONMENT END
14 26 52 67 39 9 64 27 58 50 
9 14 26 27 39 50 52 58 64 67 

EDIT:

//PARALLEL MERGE SORT
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<omp.h>

void merge(int arr[], int indexA, int indexB, int end, int arrOut[]);
void mergeSort(int arr[], int inf, int sup, int arrOut[]);
void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int threads);


int main(int argc, char *argv[]){
    int threads, availableThreads, level=0;
    int N = 1000;
    int my_array[N];
    int outputArray[N];
    int length = sizeof(my_array) / sizeof(my_array[0]);
    srand(time(NULL));
    int i;
    for (i=0; i<N; i++){
        my_array[i]=rand()%100 + 1;
    }
    //print the array 
    for (i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }   

    availableThreads=omp_get_max_threads();
    printf("Numero massimo di threads disponibile: %d\n", availableThreads);

    //printf("Inserisci numero di threads non superiore a %d: \n", availableThreads);
    //scanf("%d", &threads);

    //omp_set_nested(1);
    //if (omp_get_nested() != 1)
        //printf("Errore nested parallelism\n");

    int processori = omp_get_num_procs(); 
    printf("Processori: %d\n", processori);

    //if(threads>processori)
        //omp_set_num_threads(threads);

    printf("\n--------------\n");

    double start=omp_get_wtime();
    parallelMergeSort(my_array, 0, length-1, outputArray, level); 
    double end=omp_get_wtime();
    printf("Time: %f", end-start);

    for(i=0; i<N; i++){
        printf("%d ", my_array[i]);
    }
    printf("\n");
} 



void merge(int arr[], int indexA, int indexB, int end, int arrOut[]){
    int i=indexA, j=indexB, k=indexA;
    while(i<=indexB-1 && j<=end){
        if(arr[i]<arr[j]){
            //i=i+1;
            arrOut[k]=arr[i++];
        }
        else{
            //j=j+1;
            arrOut[k]=arr[j++];
        }
        k++;
    }
    while(i<=indexB-1){
        //i++;
        arrOut[k]=arr[i++];
        k++;
    }
    while(j<=end){
        //j++;
        arrOut[k]=arr[j++];
        k++;
    }
    for(i=indexA; i<=end; i++)
        arr[i]=arrOut[i];
}

void mergeSort(int arr[], int inf, int sup, int arrOut[]){
    int medium;
    if(inf<sup){
        medium=(inf+sup)/2;
        mergeSort(arr, inf, medium, arrOut);
        mergeSort(arr, medium+1, sup, arrOut);
        merge(arr, inf, medium+1, sup, arrOut);
    }
}

void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int level){
    if (level==0){
        #pragma omp parallel
        #pragma omp single
        parallelMergeSort(arr, inf, sup, arrOut, 1);
    }
    else if(level<8){
        #pragma omp task shared(arr, arrOut)
        {
            parallelMergeSort(arr, inf, (inf+sup)/2, arrOut, level+1);
            parallelMergeSort(arr, (inf+sup)/2 + 1, sup, arrOut, level+1);
        }
    }
    else{
        mergeSort(arr, inf, sup, arrOut);
    }   
}

This is the last code. It works if I write level=8, but if I write for example level=0 or level=1 the array will not be sorted. EDIT: If I write something different from 8 in the code, I have Segmentation error. Especially If I increase the number of random elements of the array. Also 1000 elements with level=8 I have this error of segmentation

1

There are 1 answers

8
Jérôme Richard On

You should really not use parallel sections to implement a recursive merge sort as it creates a lot of nested threads (that are not reused by subsequent parallel computations). The first level creates N threads where N is the number of thread chosen by the OpenMP runtime (typically the number of hardware thread on your machine or the number of cores). Only two thread will be used to perform the merge sort. The second level creates N*N threads (4 thread will actually be used). The thirds N*N*N. This quickly creates an exponential number of thread that is much bigger than what you expect. You can limit the number of thread of a parallel section (using num_threads(2) here) but nested parallel sections are known not to be efficient. Consider using OpenMP tasks instead. However, be aware that variables used by task are firstprivate by default while the one of parallel sections are shared by default.

The code should look like this (untested):

void parallelMergeSort(int arr[], int inf, int sup, int arrOut[], int level){
    if (level == 0){ /* Initialization of the OpenMP parallel context */
        #pragma omp parallel
        #pragma omp single
        parallelMergeSort(arr, inf, sup, arrOut, 1);
    }
    else if(level <= THRESHOLD){ /* Parallel recursion with tasks */
        #pragma omp task shared(arr, arrOut)
        parallelMergeSort(arr, inf, (inf+sup)/2, arrOut, level+1);

        /* There is no need to create a task for this one as it should recursively create new one or directly compute the array */
        parallelMergeSort(arr, (inf+sup)/2 + 1, sup, arrOut, level+1);
    }
    else { /* Leaf computation */
        mergeSort(arr, inf, sup, arrOut);
    }
}