view libgomp/testsuite/libgomp.c/affinity-1.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children 84e7813d76e9
line wrap: on
line source

/* Affinity tests.
   Copyright (C) 2013-2017 Free Software Foundation, Inc.

   GCC is free software; you can redistribute it and/or modify it under
   the terms of the GNU General Public License as published by the Free
   Software Foundation; either version 3, or (at your option) any later
   version.

   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   for more details.

   You should have received a copy of the GNU General Public License
   along with GCC; see the file COPYING3.  If not see
   <http://www.gnu.org/licenses/>.  */

/* { dg-do run } */
/* { dg-set-target-env-var OMP_PROC_BIND "false" } */
/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include "config.h"
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#ifdef DO_FORK
#include <signal.h>
#include <sys/wait.h>
#endif
#ifdef HAVE_PTHREAD_AFFINITY_NP
#include <sched.h>
#include <pthread.h>
#ifdef INTERPOSE_GETAFFINITY
#include <dlfcn.h>
#endif
#endif

struct place
{
  int start, len;
};
struct places
{
  char name[40];
  int count;
  struct place places[8];
} places_array[] = {
  { "", 1, { { -1, -1 } } },
  { "{0}:8", 8,
    { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 },
      { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
  { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } },
  { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } },
  { "{1}:7:1", 7,
    { { 1, 1 }, { 2, 1 }, { 3, 1 },
      { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
  { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5,
    { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }
};

unsigned long contig_cpucount;
unsigned long min_cpusetsize;

#if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \
    && defined (CPU_ALLOC_SIZE)

#if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY)
int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *);

int
pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset)
{
  int ret;
  unsigned long i, max;
  if (orig_getaffinity_np == NULL)
    {
      orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *))
			    dlsym (RTLD_NEXT, "pthread_getaffinity_np");
      if (orig_getaffinity_np == NULL)
	exit (0);
    }
  ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
  if (ret != 0)
    return ret;
  if (contig_cpucount == 0)
    {
      max = 8 * cpusetsize;
      for (i = 0; i < max; i++)
	if (!CPU_ISSET_S (i, cpusetsize, cpuset))
	  break;
      contig_cpucount = i;
      min_cpusetsize = cpusetsize;
    }
  return ret;
}
#endif

void
print_affinity (struct place p)
{
  static unsigned long size;
  if (size == 0)
    {
      if (min_cpusetsize)
	size = min_cpusetsize;
      else
	{
	  size = sysconf (_SC_NPROCESSORS_CONF);
	  size = CPU_ALLOC_SIZE (size);
	  if (size < sizeof (cpu_set_t))
	    size = sizeof (cpu_set_t);
	}
    }
  cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size);
  if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0)
    {
      unsigned long i, len, max = 8 * size;
      int notfirst = 0, unexpected = 1;

      printf (" bound to {");
      for (i = 0, len = 0; i < max; i++)
	if (CPU_ISSET_S (i, size, cpusetp))
	  {
	    if (len == 0)
	      {
		if (notfirst)
		  {
		    unexpected = 1;
		    printf (",");
		  }
		else if (i == (unsigned long) p.start)
		  unexpected = 0;
		notfirst = 1;
		printf ("%lu", i);
	      }
	    ++len;
	  }
	else
	  {
	    if (len && len != (unsigned long) p.len)
	      unexpected = 1;
	    if (len > 1)
	      printf (":%lu", len);
	    len = 0;
	  }
      if (len && len != (unsigned long) p.len)
	unexpected = 1;
      if (len > 1)
	printf (":%lu", len);
      printf ("}");
      if (p.start != -1 && unexpected)
	{
	  printf (", expected {%d", p.start);
	  if (p.len != 1)
	    printf (":%d", p.len);
	  printf ("} instead");
	}
      else if (p.start != -1)
	printf (", verified");
    }
}
#else
void
print_affinity (struct place p)
{
  (void) p.start;
  (void) p.len;
}
#endif


int
main ()
{
  char *env_proc_bind = getenv ("OMP_PROC_BIND");
  int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0;
  int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0;
  int test_spread_master_close
    = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0;
  char *env_places = getenv ("OMP_PLACES");
  int test_places = 0;

#ifdef DO_FORK
  if (env_places == NULL && contig_cpucount >= 8 && test_false
      && getenv ("GOMP_AFFINITY") == NULL)
    {
      int i, j, status;
      pid_t pid;
      for (j = 0; j < 2; j++)
	{
	  if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1)
	      < 0)
	    break;
	  for (i = sizeof (places_array) / sizeof (places_array[0]) - 1;
	       i; --i)
	    {
	      if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0)
		break;
	      pid = fork ();
	      if (pid == -1)
		break;
	      if (pid == 0)
		{
		  execl ("/proc/self/exe", "affinity-1.exe", NULL);
		  _exit (1);
		}
	      if (waitpid (pid, &status, 0) < 0)
		break;
	      if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT)
		abort ();
	      else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
		break;
	    }
	  if (i)
	    break;
	}
    }
#endif

  int first = 1;
  if (env_proc_bind)
    {
      printf ("OMP_PROC_BIND='%s'", env_proc_bind);
      first = 0;
    }
  if (env_places)
    printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places);
  printf ("\n");

  if (env_places && contig_cpucount >= 8
      && (test_true || test_spread_master_close))
    {
      for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1;
	   test_places; --test_places)
	if (strcmp (env_places, places_array[test_places].name) == 0)
	  break;
    }

#define verify(if_true, if_s_m_c) \
  if (test_false && omp_get_proc_bind () != omp_proc_bind_false)	\
    abort ();								\
  if (test_true && omp_get_proc_bind () != if_true)			\
    abort ();								\
  if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c)	\
    abort ();

  verify (omp_proc_bind_true, omp_proc_bind_spread);

  printf ("Initial thread");
  print_affinity (places_array[test_places].places[0]);
  printf ("\n");
  omp_set_nested (1);
  omp_set_dynamic (0);

  #pragma omp parallel if (0)
  {
    verify (omp_proc_bind_true, omp_proc_bind_master);
    #pragma omp parallel if (0)
    {
      verify (omp_proc_bind_true, omp_proc_bind_close);
      #pragma omp parallel if (0)
      {
	verify (omp_proc_bind_true, omp_proc_bind_close);
      }
      #pragma omp parallel if (0) proc_bind (spread)
      {
	verify (omp_proc_bind_spread, omp_proc_bind_spread);
      }
    }
    #pragma omp parallel if (0) proc_bind (master)
    {
      verify (omp_proc_bind_master, omp_proc_bind_close);
      #pragma omp parallel if (0)
      {
	verify (omp_proc_bind_master, omp_proc_bind_close);
      }
      #pragma omp parallel if (0) proc_bind (spread)
      {
	verify (omp_proc_bind_spread, omp_proc_bind_spread);
      }
    }
  }

  /* True/spread */
  #pragma omp parallel num_threads (4)
  {
    verify (omp_proc_bind_true, omp_proc_bind_master);
    #pragma omp critical
    {
      struct place p = places_array[0].places[0];
      int thr = omp_get_thread_num ();
      printf ("#1 thread %d", thr);
      if (omp_get_num_threads () == 4 && test_spread_master_close)
	switch (places_array[test_places].count)
	  {
	  case 8:
	    /* T = 4, P = 8, each subpartition has 2 places.  */
	  case 7:
	    /* T = 4, P = 7, each subpartition has 2 places, but
	       last partition, which has just one place.  */
	    p = places_array[test_places].places[2 * thr];
	    break;
	  case 5:
	    /* T = 4, P = 5, first subpartition has 2 places, the
	       rest just one.  */
	    p = places_array[test_places].places[thr ? 1 + thr : 0];
	    break;
	  case 3:
	    /* T = 4, P = 3, unit sized subpartitions, first gets
	       thr0 and thr3, second thr1, third thr2.  */
	    p = places_array[test_places].places[thr == 3 ? 0 : thr];
	    break;
	  case 2:
	    /* T = 4, P = 2, unit sized subpartitions, each with
	       2 threads.  */
	    p = places_array[test_places].places[thr / 2];
	    break;
	  }
      print_affinity (p);
      printf ("\n");
    }
    #pragma omp barrier
    if (omp_get_thread_num () == 3)
      {
	/* True/spread, true/master.  */
	#pragma omp parallel num_threads (3)
	{
	  verify (omp_proc_bind_true, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#1,#1 thread 3,%d", thr);
	    if (omp_get_num_threads () == 3 && test_spread_master_close)
	      /* Outer is spread, inner master, so just bind to the
		 place or the master thread, which is thr 3 above.  */
	      switch (places_array[test_places].count)
		{
		case 8:
		case 7:
		  p = places_array[test_places].places[6];
		  break;
		case 5:
		  p = places_array[test_places].places[4];
		  break;
		case 3:
		  p = places_array[test_places].places[0];
		  break;
		case 2:
		  p = places_array[test_places].places[1];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* True/spread, spread.  */
	#pragma omp parallel num_threads (5) proc_bind (spread)
	{
	  verify (omp_proc_bind_spread, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#1,#2 thread 3,%d", thr);
	    if (omp_get_num_threads () == 5 && test_spread_master_close)
	      /* Outer is spread, inner spread.  */
	      switch (places_array[test_places].count)
		{
		case 8:
		  /* T = 5, P = 2, unit sized subpartitions.  */
		  p = places_array[test_places].places[thr == 4 ? 6
						       : 6 + thr / 2];
		  break;
		/* The rest are T = 5, P = 1.  */
		case 7:
		  p = places_array[test_places].places[6];
		  break;
		case 5:
		  p = places_array[test_places].places[4];
		  break;
		case 3:
		  p = places_array[test_places].places[0];
		  break;
		case 2:
		  p = places_array[test_places].places[1];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	  #pragma omp barrier
	  if (omp_get_thread_num () == 3)
	    {
	      /* True/spread, spread, close.  */
	      #pragma omp parallel num_threads (5) proc_bind (close)
	      {
		verify (omp_proc_bind_close, omp_proc_bind_close);
		#pragma omp critical
		{
		  struct place p = places_array[0].places[0];
		  int thr = omp_get_thread_num ();
		  printf ("#1,#2,#1 thread 3,3,%d", thr);
		  if (omp_get_num_threads () == 5 && test_spread_master_close)
		    /* Outer is spread, inner spread, innermost close.  */
		    switch (places_array[test_places].count)
		      {
		      /* All are T = 5, P = 1.  */
		      case 8:
			p = places_array[test_places].places[7];
			break;
		      case 7:
			p = places_array[test_places].places[6];
			break;
		      case 5:
			p = places_array[test_places].places[4];
			break;
		      case 3:
			p = places_array[test_places].places[0];
			break;
		      case 2:
			p = places_array[test_places].places[1];
			break;
		      }
		  print_affinity (p);
		  printf ("\n");
		}
	      }
	    }
	}
	/* True/spread, master.  */
	#pragma omp parallel num_threads (4) proc_bind(master)
	{
	  verify (omp_proc_bind_master, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#1,#3 thread 3,%d", thr);
	    if (omp_get_num_threads () == 4 && test_spread_master_close)
	      /* Outer is spread, inner master, so just bind to the
		 place or the master thread, which is thr 3 above.  */
	      switch (places_array[test_places].count)
		{
		case 8:
		case 7:
		  p = places_array[test_places].places[6];
		  break;
		case 5:
		  p = places_array[test_places].places[4];
		  break;
		case 3:
		  p = places_array[test_places].places[0];
		  break;
		case 2:
		  p = places_array[test_places].places[1];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* True/spread, close.  */
	#pragma omp parallel num_threads (6) proc_bind (close)
	{
	  verify (omp_proc_bind_close, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#1,#4 thread 3,%d", thr);
	    if (omp_get_num_threads () == 6 && test_spread_master_close)
	      /* Outer is spread, inner close.  */
	      switch (places_array[test_places].count)
		{
		case 8:
		  /* T = 6, P = 2, unit sized subpartitions.  */
		  p = places_array[test_places].places[6 + thr / 3];
		  break;
		/* The rest are T = 6, P = 1.  */
		case 7:
		  p = places_array[test_places].places[6];
		  break;
		case 5:
		  p = places_array[test_places].places[4];
		  break;
		case 3:
		  p = places_array[test_places].places[0];
		  break;
		case 2:
		  p = places_array[test_places].places[1];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	}
      }
  }

  /* Spread.  */
  #pragma omp parallel num_threads (5) proc_bind(spread)
  {
    verify (omp_proc_bind_spread, omp_proc_bind_master);
    #pragma omp critical
    {
      struct place p = places_array[0].places[0];
      int thr = omp_get_thread_num ();
      printf ("#2 thread %d", thr);
      if (omp_get_num_threads () == 5
	  && (test_spread_master_close || test_true))
	switch (places_array[test_places].count)
	  {
	  case 8:
	    /* T = 5, P = 8, first 3 subpartitions have 2 places, last
	       2 one place.  */
	    p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr];
	    break;
	  case 7:
	    /* T = 5, P = 7, first 2 subpartitions have 2 places, last
	       3 one place.  */
	    p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr];
	    break;
	  case 5:
	    /* T = 5, P = 5, unit sized subpartitions, each one with one
	       thread.  */
	    p = places_array[test_places].places[thr];
	    break;
	  case 3:
	    /* T = 5, P = 3, unit sized subpartitions, first gets
	       thr0 and thr3, second thr1 and thr4, third thr2.  */
	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
	    break;
	  case 2:
	    /* T = 5, P = 2, unit sized subpartitions, first with
	       thr{0,1,4} and second with thr{2,3}.  */
	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
	    break;
	  }
      print_affinity (p);
      printf ("\n");
    }
    #pragma omp barrier
    if (omp_get_thread_num () == 3)
      {
	int pp = 0;
	switch (places_array[test_places].count)
	  {
	  case 8: pp = 6; break;
	  case 7: pp = 5; break;
	  case 5: pp = 3; break;
	  case 2: pp = 1; break;
	  }
	/* Spread, spread/master.  */
	#pragma omp parallel num_threads (3) firstprivate (pp)
	{
	  verify (omp_proc_bind_spread, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#2,#1 thread 3,%d", thr);
	    if (test_spread_master_close || test_true)
	      /* Outer is spread, inner spread resp. master, bit we have
		 just unit sized partitions.  */
	      p = places_array[test_places].places[pp];
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* Spread, spread.  */
	#pragma omp parallel num_threads (5) proc_bind (spread) \
			     firstprivate (pp)
	{
	  verify (omp_proc_bind_spread, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#2,#2 thread 3,%d", thr);
	    if (test_spread_master_close || test_true)
	      /* Outer is spread, inner spread, bit we have
		 just unit sized partitions.  */
	      p = places_array[test_places].places[pp];
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* Spread, master.  */
	#pragma omp parallel num_threads (4) proc_bind(master) \
			     firstprivate(pp)
	{
	  verify (omp_proc_bind_master, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#2,#3 thread 3,%d", thr);
	    if (test_spread_master_close || test_true)
	      /* Outer is spread, inner master, bit we have
		 just unit sized partitions.  */
	      p = places_array[test_places].places[pp];
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* Spread, close.  */
	#pragma omp parallel num_threads (6) proc_bind (close) \
			     firstprivate (pp)
	{
	  verify (omp_proc_bind_close, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#2,#4 thread 3,%d", thr);
	    if (test_spread_master_close || test_true)
	      /* Outer is spread, inner close, bit we have
		 just unit sized partitions.  */
	      p = places_array[test_places].places[pp];
	    print_affinity (p);
	    printf ("\n");
	  }
	}
      }
  }

  /* Master.  */
  #pragma omp parallel num_threads (3) proc_bind(master)
  {
    verify (omp_proc_bind_master, omp_proc_bind_master);
    #pragma omp critical
    {
      struct place p = places_array[0].places[0];
      int thr = omp_get_thread_num ();
      printf ("#3 thread %d", thr);
      if (test_spread_master_close || test_true)
	p = places_array[test_places].places[0];
      print_affinity (p);
      printf ("\n");
    }
    #pragma omp barrier
    if (omp_get_thread_num () == 2)
      {
	/* Master, master.  */
	#pragma omp parallel num_threads (4)
	{
	  verify (omp_proc_bind_master, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#3,#1 thread 2,%d", thr);
	    if (test_spread_master_close || test_true)
	      /* Outer is master, inner is master.  */
	      p = places_array[test_places].places[0];
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* Master, spread.  */
	#pragma omp parallel num_threads (4) proc_bind (spread)
	{
	  verify (omp_proc_bind_spread, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#3,#2 thread 2,%d", thr);
	    if (omp_get_num_threads () == 4
		&& (test_spread_master_close || test_true))
	      /* Outer is master, inner is spread.  */
	      switch (places_array[test_places].count)
		{
		case 8:
		  /* T = 4, P = 8, each subpartition has 2 places.  */
		case 7:
		  /* T = 4, P = 7, each subpartition has 2 places, but
		     last partition, which has just one place.  */
		  p = places_array[test_places].places[2 * thr];
		  break;
		case 5:
		  /* T = 4, P = 5, first subpartition has 2 places, the
		     rest just one.  */
		  p = places_array[test_places].places[thr ? 1 + thr : 0];
		  break;
		case 3:
		  /* T = 4, P = 3, unit sized subpartitions, first gets
		     thr0 and thr3, second thr1, third thr2.  */
		  p = places_array[test_places].places[thr == 3 ? 0 : thr];
		  break;
		case 2:
		  /* T = 4, P = 2, unit sized subpartitions, each with
		     2 threads.  */
		  p = places_array[test_places].places[thr / 2];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	  #pragma omp barrier
	  if (omp_get_thread_num () == 0)
	    {
	      /* Master, spread, close.  */
	      #pragma omp parallel num_threads (5) proc_bind (close)
	      {
		verify (omp_proc_bind_close, omp_proc_bind_close);
		#pragma omp critical
		{
		  struct place p = places_array[0].places[0];
		  int thr = omp_get_thread_num ();
		  printf ("#3,#2,#1 thread 2,0,%d", thr);
		  if (omp_get_num_threads () == 5
		      && (test_spread_master_close || test_true))
		    /* Outer is master, inner spread, innermost close.  */
		    switch (places_array[test_places].count)
		      {
		      /* First 3 are T = 5, P = 2.  */
		      case 8:
		      case 7:
		      case 5:
			p = places_array[test_places].places[(thr & 2) / 2];
			break;
		      /* All the rest are T = 5, P = 1.  */
		      case 3:
		      case 2:
			p = places_array[test_places].places[0];
			break;
		      }
		  print_affinity (p);
		  printf ("\n");
		}
	      }
	    }
	  #pragma omp barrier
	  if (omp_get_thread_num () == 3)
	    {
	      /* Master, spread, close.  */
	      #pragma omp parallel num_threads (5) proc_bind (close)
	      {
		verify (omp_proc_bind_close, omp_proc_bind_close);
		#pragma omp critical
		{
		  struct place p = places_array[0].places[0];
		  int thr = omp_get_thread_num ();
		  printf ("#3,#2,#2 thread 2,3,%d", thr);
		  if (omp_get_num_threads () == 5
		      && (test_spread_master_close || test_true))
		    /* Outer is master, inner spread, innermost close.  */
		    switch (places_array[test_places].count)
		      {
		      case 8:
			/* T = 5, P = 2.  */
			p = places_array[test_places].places[6
							     + (thr & 2) / 2];
			break;
		      /* All the rest are T = 5, P = 1.  */
		      case 7:
			p = places_array[test_places].places[6];
			break;
		      case 5:
			p = places_array[test_places].places[4];
			break;
		      case 3:
			p = places_array[test_places].places[0];
			break;
		      case 2:
			p = places_array[test_places].places[1];
			break;
		      }
		  print_affinity (p);
		  printf ("\n");
		}
	      }
	    }
	}
	/* Master, master.  */
	#pragma omp parallel num_threads (4) proc_bind(master)
	{
	  verify (omp_proc_bind_master, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#3,#3 thread 2,%d", thr);
	    if (test_spread_master_close || test_true)
	      /* Outer is master, inner master.  */
	      p = places_array[test_places].places[0];
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* Master, close.  */
	#pragma omp parallel num_threads (6) proc_bind (close)
	{
	  verify (omp_proc_bind_close, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#3,#4 thread 2,%d", thr);
	    if (omp_get_num_threads () == 6
		&& (test_spread_master_close || test_true))
	      switch (places_array[test_places].count)
		{
		case 8:
		  /* T = 6, P = 8.  */
		case 7:
		  /* T = 6, P = 7.  */
		  p = places_array[test_places].places[thr];
		  break;
		case 5:
		  /* T = 6, P = 5.  thr{0,5} go into the first place.  */
		  p = places_array[test_places].places[thr == 5 ? 0 : thr];
		  break;
		case 3:
		  /* T = 6, P = 3, two threads into each place.  */
		  p = places_array[test_places].places[thr / 2];
		  break;
		case 2:
		  /* T = 6, P = 2, 3 threads into each place.  */
		  p = places_array[test_places].places[thr / 3];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	}
      }
  }

  #pragma omp parallel num_threads (5) proc_bind(close)
  {
    verify (omp_proc_bind_close, omp_proc_bind_master);
    #pragma omp critical
    {
      struct place p = places_array[0].places[0];
      int thr = omp_get_thread_num ();
      printf ("#4 thread %d", thr);
      if (omp_get_num_threads () == 5
	  && (test_spread_master_close || test_true))
	switch (places_array[test_places].count)
	  {
	  case 8:
	    /* T = 5, P = 8.  */
	  case 7:
	    /* T = 5, P = 7.  */
	  case 5:
	    /* T = 5, P = 5.  */
	    p = places_array[test_places].places[thr];
	    break;
	  case 3:
	    /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second,
	       thr2 in third.  */
	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
	    break;
	  case 2:
	    /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second.  */
	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
	    break;
	  }
      print_affinity (p);
      printf ("\n");
    }
    #pragma omp barrier
    if (omp_get_thread_num () == 2)
      {
	int pp = 0;
	switch (places_array[test_places].count)
	  {
	  case 8:
	  case 7:
	  case 5:
	  case 3:
	    pp = 2;
	    break;
	  case 2:
	    pp = 1;
	    break;
	  }
	/* Close, close/master.  */
	#pragma omp parallel num_threads (4) firstprivate (pp)
	{
	  verify (omp_proc_bind_close, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#4,#1 thread 2,%d", thr);
	    if (test_spread_master_close)
	      /* Outer is close, inner is master.  */
	      p = places_array[test_places].places[pp];
	    else if (omp_get_num_threads () == 4 && test_true)
	      /* Outer is close, inner is close.  */
	      switch (places_array[test_places].count)
		{
		case 8:
		  /* T = 4, P = 8.  */
		case 7:
		  /* T = 4, P = 7.  */
		  p = places_array[test_places].places[2 + thr];
		  break;
		case 5:
		  /* T = 4, P = 5.  There is wrap-around for thr3.  */
		  p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr];
		  break;
		case 3:
		  /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2
		     into p1.  */
		  p = places_array[test_places].places[(2 + thr) % 3];
		  break;
		case 2:
		  /* T = 4, P = 2, 2 threads into each place.  */
		  p = places_array[test_places].places[1 - thr / 2];
		  break;
		}

	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* Close, spread.  */
	#pragma omp parallel num_threads (4) proc_bind (spread)
	{
	  verify (omp_proc_bind_spread, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#4,#2 thread 2,%d", thr);
	    if (omp_get_num_threads () == 4
		&& (test_spread_master_close || test_true))
	      /* Outer is close, inner is spread.  */
	      switch (places_array[test_places].count)
		{
		case 8:
		  /* T = 4, P = 8, each subpartition has 2 places.  */
		case 7:
		  /* T = 4, P = 7, each subpartition has 2 places, but
		     last partition, which has just one place.  */
		  p = places_array[test_places].places[thr == 3 ? 0
						       : 2 + 2 * thr];
		  break;
		case 5:
		  /* T = 4, P = 5, first subpartition has 2 places, the
		     rest just one.  */
		  p = places_array[test_places].places[thr == 3 ? 0
						       : 2 + thr];
		  break;
		case 3:
		  /* T = 4, P = 3, unit sized subpartitions, third gets
		     thr0 and thr3, first thr1, second thr2.  */
		  p = places_array[test_places].places[thr == 0 ? 2 : thr - 1];
		  break;
		case 2:
		  /* T = 4, P = 2, unit sized subpartitions, each with
		     2 threads.  */
		  p = places_array[test_places].places[1 - thr / 2];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	  #pragma omp barrier
	  if (omp_get_thread_num () == 0)
	    {
	      /* Close, spread, close.  */
	      #pragma omp parallel num_threads (5) proc_bind (close)
	      {
		verify (omp_proc_bind_close, omp_proc_bind_close);
		#pragma omp critical
		{
		  struct place p = places_array[0].places[0];
		  int thr = omp_get_thread_num ();
		  printf ("#4,#2,#1 thread 2,0,%d", thr);
		  if (omp_get_num_threads () == 5
		      && (test_spread_master_close || test_true))
		    /* Outer is close, inner spread, innermost close.  */
		    switch (places_array[test_places].count)
		      {
		      case 8:
		      case 7:
			/* T = 5, P = 2.  */
			p = places_array[test_places].places[2
							     + (thr & 2) / 2];
			break;
		      /* All the rest are T = 5, P = 1.  */
		      case 5:
		      case 3:
			p = places_array[test_places].places[2];
			break;
		      case 2:
			p = places_array[test_places].places[1];
			break;
		      }
		  print_affinity (p);
		  printf ("\n");
		}
	      }
	    }
	  #pragma omp barrier
	  if (omp_get_thread_num () == 2)
	    {
	      /* Close, spread, close.  */
	      #pragma omp parallel num_threads (5) proc_bind (close)
	      {
		verify (omp_proc_bind_close, omp_proc_bind_close);
		#pragma omp critical
		{
		  struct place p = places_array[0].places[0];
		  int thr = omp_get_thread_num ();
		  printf ("#4,#2,#2 thread 2,2,%d", thr);
		  if (omp_get_num_threads () == 5
		      && (test_spread_master_close || test_true))
		    /* Outer is close, inner spread, innermost close.  */
		    switch (places_array[test_places].count)
		      {
		      case 8:
			/* T = 5, P = 2.  */
			p = places_array[test_places].places[6
							     + (thr & 2) / 2];
			break;
		      /* All the rest are T = 5, P = 1.  */
		      case 7:
			p = places_array[test_places].places[6];
			break;
		      case 5:
			p = places_array[test_places].places[4];
			break;
		      case 3:
			p = places_array[test_places].places[1];
			break;
		      case 2:
			p = places_array[test_places].places[0];
			break;
		      }
		  print_affinity (p);
		  printf ("\n");
		}
	      }
	    }
	  #pragma omp barrier
	  if (omp_get_thread_num () == 3)
	    {
	      /* Close, spread, close.  */
	      #pragma omp parallel num_threads (5) proc_bind (close)
	      {
		verify (omp_proc_bind_close, omp_proc_bind_close);
		#pragma omp critical
		{
		  struct place p = places_array[0].places[0];
		  int thr = omp_get_thread_num ();
		  printf ("#4,#2,#3 thread 2,3,%d", thr);
		  if (omp_get_num_threads () == 5
		      && (test_spread_master_close || test_true))
		    /* Outer is close, inner spread, innermost close.  */
		    switch (places_array[test_places].count)
		      {
		      case 8:
		      case 7:
		      case 5:
			/* T = 5, P = 2.  */
			p = places_array[test_places].places[(thr & 2) / 2];
			break;
		      /* All the rest are T = 5, P = 1.  */
		      case 3:
			p = places_array[test_places].places[2];
			break;
		      case 2:
			p = places_array[test_places].places[0];
			break;
		      }
		  print_affinity (p);
		  printf ("\n");
		}
	      }
	    }
	}
	/* Close, master.  */
	#pragma omp parallel num_threads (4) proc_bind(master) \
			     firstprivate (pp)
	{
	  verify (omp_proc_bind_master, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#4,#3 thread 2,%d", thr);
	    if (test_spread_master_close || test_true)
	      /* Outer is close, inner master.  */
	      p = places_array[test_places].places[pp];
	    print_affinity (p);
	    printf ("\n");
	  }
	}
	/* Close, close.  */
	#pragma omp parallel num_threads (6) proc_bind (close)
	{
	  verify (omp_proc_bind_close, omp_proc_bind_close);
	  #pragma omp critical
	  {
	    struct place p = places_array[0].places[0];
	    int thr = omp_get_thread_num ();
	    printf ("#4,#4 thread 2,%d", thr);
	    if (omp_get_num_threads () == 6
		&& (test_spread_master_close || test_true))
	      switch (places_array[test_places].count)
		{
		case 8:
		  /* T = 6, P = 8.  */
		  p = places_array[test_places].places[2 + thr];
		  break;
		case 7:
		  /* T = 6, P = 7.  */
		  p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr];
		  break;
		case 5:
		  /* T = 6, P = 5.  thr{0,5} go into the third place.  */
		  p = places_array[test_places].places[thr >= 3 ? thr - 3
						       : 2 + thr];
		  break;
		case 3:
		  /* T = 6, P = 3, two threads into each place.  */
		  p = places_array[test_places].places[thr < 2 ? 2
						       : thr / 2 - 1];
		  break;
		case 2:
		  /* T = 6, P = 2, 3 threads into each place.  */
		  p = places_array[test_places].places[1 - thr / 3];
		  break;
		}
	    print_affinity (p);
	    printf ("\n");
	  }
	}
      }
  }

  return 0;
}