view libgomp/testsuite/libgomp.c/sort-1.c @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
line wrap: on
line source

/* Test and benchmark of a couple of parallel sorting algorithms.
   Copyright (C) 2008-2020 Free Software Foundation, Inc.

   GCC is free software; you can redistribute it and/or modify it under
   the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 3, or (at your option) any later
   version.

   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   for more details.

   You should have received a copy of the GNU General Public License
   along with GCC; see the file COPYING3.  If not see
   <http://www.gnu.org/licenses/>.  */

#include <limits.h>
#include <omp.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int failures;

#define THRESHOLD 100

static void
verify (const char *name, double stime, int *array, int count)
{
  int i;
  double etime = omp_get_wtime ();

  printf ("%s: %g\n", name, etime - stime);
  for (i = 1; i < count; i++)
    if (array[i] < array[i - 1])
      {
	printf ("%s: incorrectly sorted\n", name);
	failures = 1;
      }
}

static void
insertsort (int *array, int s, int e)
{
  int i, j, val;
  for (i = s + 1; i <= e; i++)
    {
      val = array[i];
      j = i;
      while (j-- > s && val < array[j])
	array[j + 1] = array[j];
      array[j + 1] = val;
    }
}

struct int_pair
{
  int lo;
  int hi;
};

struct int_pair_stack
{
  struct int_pair *top;
#define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
  struct int_pair arr[STACK_SIZE];
};

static inline void
init_int_pair_stack (struct int_pair_stack *stack)
{
  stack->top = &stack->arr[0];
}

static inline void
push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
{
  stack->top->lo = lo;
  stack->top->hi = hi;
  stack->top++;
}

static inline void
pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
{
  stack->top--;
  *lo = stack->top->lo;
  *hi = stack->top->hi;
}

static inline int
size_int_pair_stack (struct int_pair_stack *stack)
{
  return stack->top - &stack->arr[0];
}

static inline void
busy_wait (void)
{
#if defined __i386__ || defined __x86_64__
  __builtin_ia32_pause ();
#elif defined __ia64__
  __asm volatile ("hint @pause" : : : "memory");
#elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
  __asm volatile ("membar #LoadLoad" : : : "memory");
#else
  __asm volatile ("" : : : "memory");
#endif
}

static inline void
swap (int *array, int a, int b)
{
  int val = array[a];
  array[a] = array[b];
  array[b] = val;
}

static inline int
choose_pivot (int *array, int lo, int hi)
{
  int mid = (lo + hi) / 2;

  if (array[mid] < array[lo])
    swap (array, lo, mid);
  if (array[hi] < array[mid])
    {
      swap (array, mid, hi);
      if (array[mid] < array[lo])
	swap (array, lo, mid);
    }
  return array[mid];
}

static inline int
partition (int *array, int lo, int hi)
{
  int pivot = choose_pivot (array, lo, hi);
  int left = lo;
  int right = hi;

  for (;;)
    {
      while (array[++left] < pivot);
      while (array[--right] > pivot);
      if (left >= right)
	break;
      swap (array, left, right);
    }
  return left;
}

static void
sort1 (int *array, int count)
{
  omp_lock_t lock;
  struct int_pair_stack global_stack;
  int busy = 1;
  int num_threads;

  omp_init_lock (&lock);
  init_int_pair_stack (&global_stack);
  #pragma omp parallel firstprivate (array, count)
  {
    int lo = 0, hi = 0, mid, next_lo, next_hi;
    bool idle = true;
    struct int_pair_stack local_stack;

    init_int_pair_stack (&local_stack);
    if (omp_get_thread_num () == 0)
      {
	num_threads = omp_get_num_threads ();
	hi = count - 1;
	idle = false;
      }

    for (;;)
      {
	if (hi - lo < THRESHOLD)
	  {
	    insertsort (array, lo, hi);
	    lo = hi;
	  }
	if (lo >= hi)
	  {
	    if (size_int_pair_stack (&local_stack) == 0)
	      {
	      again:
		omp_set_lock (&lock);
		if (size_int_pair_stack (&global_stack) == 0)
		  {
		    if (!idle)
		      busy--;
		    if (busy == 0)
		      {
			omp_unset_lock (&lock);
			break;
		      }
		    omp_unset_lock (&lock);
		    idle = true;
		    while (size_int_pair_stack (&global_stack) == 0
			   && busy)
		      busy_wait ();
		    goto again;
		  }
		if (idle)
		  busy++;
		pop_int_pair_stack (&global_stack, &lo, &hi);
		omp_unset_lock (&lock);
		idle = false;
	      }
	    else
	      pop_int_pair_stack (&local_stack, &lo, &hi);
	  }

	mid = partition (array, lo, hi);
	if (mid - lo < hi - mid)
	  {
	    next_lo = mid;
	    next_hi = hi;
	    hi = mid - 1;
	  }
	else
	  {
	    next_lo = lo;
	    next_hi = mid - 1;
	    lo = mid;
	  }

	if (next_hi - next_lo < THRESHOLD)
	  insertsort (array, next_lo, next_hi);
	else
	  {
	    if (size_int_pair_stack (&global_stack) < num_threads - 1)
	      {
		int size;

		omp_set_lock (&lock);
		size = size_int_pair_stack (&global_stack);
		if (size < num_threads - 1 && size < STACK_SIZE)
		  push_int_pair_stack (&global_stack, next_lo, next_hi);
		else
		  push_int_pair_stack (&local_stack, next_lo, next_hi);
		omp_unset_lock (&lock);
	      }
	    else
	      push_int_pair_stack (&local_stack, next_lo, next_hi);
	  }
      }
    }
  omp_destroy_lock (&lock);
}

static void
sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
{
  int mid;

  if (hi - lo < THRESHOLD)
    {
      insertsort (array, lo, hi);
      return;
    }

  mid = partition (array, lo, hi);

  if (*busy >= num_threads)
    {
      sort2_1 (array, lo, mid - 1, num_threads, busy);
      sort2_1 (array, mid, hi, num_threads, busy);
      return;
    }

  #pragma omp atomic
    *busy += 1;

  #pragma omp parallel num_threads (2) \
		       firstprivate (array, lo, hi, mid, num_threads, busy)
  {
    if (omp_get_thread_num () == 0)
      sort2_1 (array, lo, mid - 1, num_threads, busy);
    else
      {
	sort2_1 (array, mid, hi, num_threads, busy);
	#pragma omp atomic
	  *busy -= 1;
      }
  }
}

static void
sort2 (int *array, int count)
{
  int num_threads;
  int busy = 1;

  #pragma omp parallel
    #pragma omp single nowait
      num_threads = omp_get_num_threads ();

  sort2_1 (array, 0, count - 1, num_threads, &busy);
}

#if _OPENMP >= 200805
static void
sort3_1 (int *array, int lo, int hi)
{
  int mid;

  if (hi - lo < THRESHOLD)
    {
      insertsort (array, lo, hi);
      return;
    }

  mid = partition (array, lo, hi);
  #pragma omp task
    sort3_1 (array, lo, mid - 1);
  sort3_1 (array, mid, hi);
}

static void
sort3 (int *array, int count)
{
  #pragma omp parallel
    #pragma omp single
      sort3_1 (array, 0, count - 1);
}
#endif

int
main (int argc, char **argv)
{
  int i, count = 1000000;
  double stime;
  int *unsorted, *sorted, num_threads;
  if (argc >= 2)
    count = strtoul (argv[1], NULL, 0);

  unsorted = malloc (count * sizeof (int));
  sorted = malloc (count * sizeof (int));
  if (unsorted == NULL || sorted == NULL)
    {
      puts ("allocation failure");
      exit (1);
    }

  srand (0xdeadbeef);
  for (i = 0; i < count; i++)
    unsorted[i] = rand ();

  omp_set_nested (1);
  omp_set_dynamic (0);
  #pragma omp parallel
    #pragma omp single nowait
      num_threads = omp_get_num_threads ();
  printf ("Threads: %d\n", num_threads);

  memcpy (sorted, unsorted, count * sizeof (int));
  stime = omp_get_wtime ();
  sort1 (sorted, count);
  verify ("sort1", stime, sorted, count);

  memcpy (sorted, unsorted, count * sizeof (int));
  stime = omp_get_wtime ();
  sort2 (sorted, count);
  verify ("sort2", stime, sorted, count);

#if _OPENMP >= 200805
  memcpy (sorted, unsorted, count * sizeof (int));
  stime = omp_get_wtime ();
  sort3 (sorted, count);
  verify ("sort3", stime, sorted, count);
#endif

  return 0;
}