diff libgomp/testsuite/libgomp.c/sort-1.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgomp/testsuite/libgomp.c/sort-1.c	Fri Jul 17 14:47:48 2009 +0900
@@ -0,0 +1,379 @@
+/* Test and benchmark of a couple of parallel sorting algorithms.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   GCC is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3, or (at your option) any later
+   version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <limits.h>
+#include <omp.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int failures;
+
+#define THRESHOLD 100
+
+static void
+verify (const char *name, double stime, int *array, int count)
+{
+  int i;
+  double etime = omp_get_wtime ();
+
+  printf ("%s: %g\n", name, etime - stime);
+  for (i = 1; i < count; i++)
+    if (array[i] < array[i - 1])
+      {
+	printf ("%s: incorrectly sorted\n", name);
+	failures = 1;
+      }
+}
+
+static void
+insertsort (int *array, int s, int e)
+{
+  int i, j, val;
+  for (i = s + 1; i <= e; i++)
+    {
+      val = array[i];
+      j = i;
+      while (j-- > s && val < array[j])
+	array[j + 1] = array[j];
+      array[j + 1] = val;
+    }
+}
+
+struct int_pair
+{
+  int lo;
+  int hi;
+};
+
+struct int_pair_stack
+{
+  struct int_pair *top;
+#define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
+  struct int_pair arr[STACK_SIZE];
+};
+
+static inline void
+init_int_pair_stack (struct int_pair_stack *stack)
+{
+  stack->top = &stack->arr[0];
+}
+
+static inline void
+push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
+{
+  stack->top->lo = lo;
+  stack->top->hi = hi;
+  stack->top++;
+}
+
+static inline void
+pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
+{
+  stack->top--;
+  *lo = stack->top->lo;
+  *hi = stack->top->hi;
+}
+
+static inline int
+size_int_pair_stack (struct int_pair_stack *stack)
+{
+  return stack->top - &stack->arr[0];
+}
+
+static inline void
+busy_wait (void)
+{
+#if defined __i386__ || defined __x86_64__
+  __asm volatile ("rep; nop" : : : "memory");
+#elif defined __ia64__
+  __asm volatile ("hint @pause" : : : "memory");
+#elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
+  __asm volatile ("membar #LoadLoad" : : : "memory");
+#else
+  __asm volatile ("" : : : "memory");
+#endif
+}
+
+static inline void
+swap (int *array, int a, int b)
+{
+  int val = array[a];
+  array[a] = array[b];
+  array[b] = val;
+}
+
+static inline int
+choose_pivot (int *array, int lo, int hi)
+{
+  int mid = (lo + hi) / 2;
+
+  if (array[mid] < array[lo])
+    swap (array, lo, mid);
+  if (array[hi] < array[mid])
+    {
+      swap (array, mid, hi);
+      if (array[mid] < array[lo])
+	swap (array, lo, mid);
+    }
+  return array[mid];
+}
+
+static inline int
+partition (int *array, int lo, int hi)
+{
+  int pivot = choose_pivot (array, lo, hi);
+  int left = lo;
+  int right = hi;
+
+  for (;;)
+    {
+      while (array[++left] < pivot);
+      while (array[--right] > pivot);
+      if (left >= right)
+	break;
+      swap (array, left, right);
+    }
+  return left;
+}
+
+static void
+sort1 (int *array, int count)
+{
+  omp_lock_t lock;
+  struct int_pair_stack global_stack;
+  int busy = 1;
+  int num_threads;
+
+  omp_init_lock (&lock);
+  init_int_pair_stack (&global_stack);
+  #pragma omp parallel firstprivate (array, count)
+  {
+    int lo = 0, hi = 0, mid, next_lo, next_hi;
+    bool idle = true;
+    struct int_pair_stack local_stack;
+
+    init_int_pair_stack (&local_stack);
+    if (omp_get_thread_num () == 0)
+      {
+	num_threads = omp_get_num_threads ();
+	hi = count - 1;
+	idle = false;
+      }
+
+    for (;;)
+      {
+	if (hi - lo < THRESHOLD)
+	  {
+	    insertsort (array, lo, hi);
+	    lo = hi;
+	  }
+	if (lo >= hi)
+	  {
+	    if (size_int_pair_stack (&local_stack) == 0)
+	      {
+	      again:
+		omp_set_lock (&lock);
+		if (size_int_pair_stack (&global_stack) == 0)
+		  {
+		    if (!idle)
+		      busy--;
+		    if (busy == 0)
+		      {
+			omp_unset_lock (&lock);
+			break;
+		      }
+		    omp_unset_lock (&lock);
+		    idle = true;
+		    while (size_int_pair_stack (&global_stack) == 0
+			   && busy)
+		      busy_wait ();
+		    goto again;
+		  }
+		if (idle)
+		  busy++;
+		pop_int_pair_stack (&global_stack, &lo, &hi);
+		omp_unset_lock (&lock);
+		idle = false;
+	      }
+	    else
+	      pop_int_pair_stack (&local_stack, &lo, &hi);
+	  }
+
+	mid = partition (array, lo, hi);
+	if (mid - lo < hi - mid)
+	  {
+	    next_lo = mid;
+	    next_hi = hi;
+	    hi = mid - 1;
+	  }
+	else
+	  {
+	    next_lo = lo;
+	    next_hi = mid - 1;
+	    lo = mid;
+	  }
+
+	if (next_hi - next_lo < THRESHOLD)
+	  insertsort (array, next_lo, next_hi);
+	else
+	  {
+	    if (size_int_pair_stack (&global_stack) < num_threads - 1)
+	      {
+		int size;
+
+		omp_set_lock (&lock);
+		size = size_int_pair_stack (&global_stack);
+		if (size < num_threads - 1 && size < STACK_SIZE)
+		  push_int_pair_stack (&global_stack, next_lo, next_hi);
+		else
+		  push_int_pair_stack (&local_stack, next_lo, next_hi);
+		omp_unset_lock (&lock);
+	      }
+	    else
+	      push_int_pair_stack (&local_stack, next_lo, next_hi);
+	  }
+      }
+    }
+  omp_destroy_lock (&lock);
+}
+
+static void
+sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
+{
+  int mid;
+
+  if (hi - lo < THRESHOLD)
+    {
+      insertsort (array, lo, hi);
+      return;
+    }
+
+  mid = partition (array, lo, hi);
+
+  if (*busy >= num_threads)
+    {
+      sort2_1 (array, lo, mid - 1, num_threads, busy);
+      sort2_1 (array, mid, hi, num_threads, busy);
+      return;
+    }
+
+  #pragma omp atomic
+    *busy += 1;
+
+  #pragma omp parallel num_threads (2) \
+		       firstprivate (array, lo, hi, mid, num_threads, busy)
+  {
+    if (omp_get_thread_num () == 0)
+      sort2_1 (array, lo, mid - 1, num_threads, busy);
+    else
+      {
+	sort2_1 (array, mid, hi, num_threads, busy);
+	#pragma omp atomic
+	  *busy -= 1;
+      }
+  }
+}
+
+static void
+sort2 (int *array, int count)
+{
+  int num_threads;
+  int busy = 1;
+
+  #pragma omp parallel
+    #pragma omp single nowait
+      num_threads = omp_get_num_threads ();
+
+  sort2_1 (array, 0, count - 1, num_threads, &busy);
+}
+
+#if _OPENMP >= 200805
+static void
+sort3_1 (int *array, int lo, int hi)
+{
+  int mid;
+
+  if (hi - lo < THRESHOLD)
+    {
+      insertsort (array, lo, hi);
+      return;
+    }
+
+  mid = partition (array, lo, hi);
+  #pragma omp task
+    sort3_1 (array, lo, mid - 1);
+  sort3_1 (array, mid, hi);
+}
+
+static void
+sort3 (int *array, int count)
+{
+  #pragma omp parallel
+    #pragma omp single
+      sort3_1 (array, 0, count - 1);
+}
+#endif
+
+int
+main (int argc, char **argv)
+{
+  int i, count = 1000000;
+  double stime;
+  int *unsorted, *sorted, num_threads;
+  if (argc >= 2)
+    count = strtoul (argv[1], NULL, 0);
+
+  unsorted = malloc (count * sizeof (int));
+  sorted = malloc (count * sizeof (int));
+  if (unsorted == NULL || sorted == NULL)
+    {
+      puts ("allocation failure");
+      exit (1);
+    }
+
+  srand (0xdeadbeef);
+  for (i = 0; i < count; i++)
+    unsorted[i] = rand ();
+
+  omp_set_nested (1);
+  omp_set_dynamic (0);
+  #pragma omp parallel
+    #pragma omp single nowait
+      num_threads = omp_get_num_threads ();
+  printf ("Threads: %d\n", num_threads);
+
+  memcpy (sorted, unsorted, count * sizeof (int));
+  stime = omp_get_wtime ();
+  sort1 (sorted, count);
+  verify ("sort1", stime, sorted, count);
+
+  memcpy (sorted, unsorted, count * sizeof (int));
+  stime = omp_get_wtime ();
+  sort2 (sorted, count);
+  verify ("sort2", stime, sorted, count);
+
+#if _OPENMP >= 200805
+  memcpy (sorted, unsorted, count * sizeof (int));
+  stime = omp_get_wtime ();
+  sort3 (sorted, count);
+  verify ("sort3", stime, sorted, count);
+#endif
+
+  return 0;
+}