view gcc/testsuite/gcc.target/arm/acle/cde.c @ 19:2b5abeee2509 default tip

update gcc11
author anatofuz
date Mon, 25 May 2020 07:50:57 +0900
parents
children
line wrap: on
line source

/* { dg-do compile } */
/* { dg-skip-if "Require optimsation to compile DCE tests" { *-*-* } { "-O0" } { "" } } */
/* { dg-require-effective-target arm_v8m_main_cde_ok } */
/* { dg-add-options arm_v8m_main_cde } */
/* { dg-final { check-function-bodies "**" "" } } */

/* These are the scalar intrinsics.
uint32_t __arm_cx1(int coproc, uint32_t imm);
uint32_t __arm_cx1a(int coproc, uint32_t acc, uint32_t imm);
uint32_t __arm_cx2(int coproc, uint32_t n, uint32_t imm);
uint32_t __arm_cx2a(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
uint32_t __arm_cx3(int coproc, uint32_t n, uint32_t m, uint32_t imm);
uint32_t __arm_cx3a(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);

uint64_t __arm_cx1d(int coproc, uint32_t imm);
uint64_t __arm_cx1da(int coproc, uint64_t acc, uint32_t imm);
uint64_t __arm_cx2d(int coproc, uint32_t n, uint32_t imm);
uint64_t __arm_cx2da(int coproc, uint64_t acc, uint32_t n, uint32_t imm);
uint64_t __arm_cx3d(int coproc, uint32_t n, uint32_t m, uint32_t imm);
uint64_t __arm_cx3da(int coproc, uint64_t acc, uint32_t n, uint32_t m, uint32_t imm); */

#include "arm_cde.h"

#define TEST_CDE_SCALAR_INTRINSIC(name, accum_type, arguments) \
  accum_type test_cde_##name (__attribute__ ((unused)) uint32_t n, \
			      __attribute__ ((unused)) uint32_t m) \
  {  \
     accum_type accum = 0; \
     accum += __arm_##name  arguments;  \
     return accum;  \
  }

/* Basic test that we produce the assembly as expected.  */
/*
** test_cde_cx1:
**	cx1	p0, r0, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx1, uint32_t, (0, 33))

/*
** test_cde_cx1a:
**	movs	r0, #0
**	cx1a	p0, r0, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx1a, uint32_t, (0, accum, 33))

/*
** test_cde_cx2:
**	cx2	p0, r0, r0, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx2, uint32_t, (0, n, 33))

/*
** test_cde_cx2a:
**	movs	(r[0-9]+), #0
**	cx2a	p0, \1, r0, #33
**	mov	r0, \1
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx2a, uint32_t, (0, accum, n, 33))

/*
** test_cde_cx3:
**	cx3	p0, r0, r0, r1, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx3, uint32_t, (0, n, m, 33))

/*
** test_cde_cx3a:
**	movs	(r[0-9]+), #0
**	cx3a	p0, \1, r0, r1, #33
**	mov	r0, \1
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx3a, uint32_t, (0, accum, n, m, 33))

/*
** test_cde_cx1d:
**	cx1d	p0, r0, r1, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx1d, uint64_t, (0, 33))

/*
** test_cde_cx1da:
**	movs	r0, #0
**	movs	r1, #0
**	cx1da	p0, r0, r1, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx1da, uint64_t, (0, accum, 33))

/*
** test_cde_cx2d:
**	cx2d	p0, r0, r1, r0, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx2d, uint64_t, (0, n, 33))

/* This particular function gets optimised by the compiler in two different
   ways depending on the optimisation level.  So does test_cde_cx3da.  That's
   why we have two different regexes in each of these function body checks.  */
/*
** test_cde_cx2da:
** (
**	mov	(r[0-9]+), r0
**	movs	r0, #0
**	movs	r1, #0
**	cx2da	p0, r0, r1, \1, #33
** |
**	movs	(r[0-9]+), #0
**	movs	(r[0-9]+), #0
**	cx2da	p0, \2, \3, r0, #33
**	mov	r0, \2
**	mov	r1, \3
** )
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx2da, uint64_t, (0, accum, n, 33))

/*
** test_cde_cx3d:
**	cx3d	p0, r0, r1, r0, r1, #33
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx3d, uint64_t, (0, n, m, 33))

/*
** test_cde_cx3da:
**	...
** (
**	movs	(r[0-9]+), #0
**	movs	(r[0-9]+), #0
**	cx3da	p0, \1, \2, r0, r1, #33
**	mov	r0, \1
**	mov	r1, \2
** |
**      movs	r0, #0
**      movs	r1, #0
**      cx3da	p0, r0, r1, r[0-9]+, r[0-9]+, #33
** )
**	...
**	bx	lr
*/
TEST_CDE_SCALAR_INTRINSIC (cx3da, uint64_t, (0, accum, n, m, 33))



/* Ensure this function gets DCE'd out after optimisation.
   Should be such since the ACLE specification mentions these functions are
   stateless and pure.  */
/*
** test_cde_dce:
**	bx	lr
*/
void test_cde_dce (uint32_t n, uint32_t m)
{
  uint64_t accum = 0;
  __arm_cx1   (0, 33);
  __arm_cx1a  (0, accum, 33);
  __arm_cx2   (0, n, 33);
  __arm_cx2a  (0, accum, n, 33);
  __arm_cx3   (0, n, m, 33);
  __arm_cx3a  (0, accum, n, m, 33);
  __arm_cx1d   (0, 33);
  __arm_cx1da  (0, accum, 33);
  __arm_cx2d   (0, n, 33);
  __arm_cx2da  (0, accum, n, 33);
  __arm_cx3d   (0, n, m, 33);
  __arm_cx3da  (0, accum, n, m, 33);
}

/* Checking this function allows constants with symbolic names.
   This test must be run under some level of optimisation.
   The actual check we perform is that the function is provided something that,
   at the point of expansion, is an immediate.  That check is not as strict as
   having something that is an immediate directly.

   Since we've already checked these intrinsics generate code in the manner we
   expect (above), here we just check that all the instructions we expect are
   there.  To ensure the instructions are from these functions we use different
   constants and search for those specifically with `scan-assembler-times`.  */

/* Checking this function allows constants with symbolic names.  */
uint32_t test_cde2 (uint32_t n, uint32_t m)
{
  int coproc = 6;
  uint32_t imm = 30;
  uint32_t accum = 0;
  accum += __arm_cx1   (coproc, imm);
  accum += __arm_cx1a  (coproc, accum, imm);
  accum += __arm_cx2   (coproc, n, imm);
  accum += __arm_cx2a  (coproc, accum, n, imm);
  accum += __arm_cx3   (coproc, n, m, imm);
  accum += __arm_cx3a  (coproc, accum, n, m, imm);
  return accum;
}

/* Checking this function allows constants with symbolic names.  */
uint64_t test_cdedi2 (uint32_t n, uint32_t m)
{
  int coproc = 6;
  uint32_t imm = 30;
  uint64_t accum = 0;
  accum += __arm_cx1d   (coproc, imm);
  accum += __arm_cx1da  (coproc, accum, imm);
  accum += __arm_cx2d   (coproc, n, imm);
  accum += __arm_cx2da  (coproc, accum, n, imm);
  accum += __arm_cx3d   (coproc, n, m, imm);
  accum += __arm_cx3da  (coproc, accum, n, m, imm);
  return accum;
}

/* { dg-final { scan-assembler-times "cx1\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx2\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx3\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx1a\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx2a\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx3a\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx1d\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx2d\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx3d\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx1da\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx2da\\tp6" 1 } } */
/* { dg-final { scan-assembler-times "cx3da\\tp6" 1 } } */