rs6000: Add execution tests for mma builtins [v4]
This patch adds execution tests that use the MMA builtins and check for the right answer, and new tests that checks whether __builtin_cpu_supports and __builtin_cpu_is return sane answers for power10. 2020-06-30 Rajalakshmi Srinivasaraghavan <rajis@linux.vnet.ibm.com> Aaron Sawdey <acsawdey@linux.ibm.com> gcc/testsuite/ * gcc.target/powerpc/p10-identify.c: New file. * gcc.target/powerpc/p10-arch31.c: New file. * gcc.target/powerpc/mma-single-test.c: New file. * gcc.target/powerpc/mma-double-test.c: New file.
This commit is contained in:
parent
5ada27f8e0
commit
ed1d3639e4
4 changed files with 429 additions and 0 deletions
185
gcc/testsuite/gcc.target/powerpc/mma-double-test.c
Executable file
185
gcc/testsuite/gcc.target/powerpc/mma-double-test.c
Executable file
|
@ -0,0 +1,185 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target power10_hw } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <altivec.h>
|
||||
|
||||
typedef unsigned char vec_t __attribute__ ((vector_size (16)));
|
||||
typedef double v4sf_t __attribute__ ((vector_size (16)));
|
||||
#define SAVE_ACC(ACC, ldc, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[0*ldc+J]; \
|
||||
rowC[0] += result[3] ; \
|
||||
rowC = (v4sf_t *) &CO[1*ldc+J]; \
|
||||
rowC[0] += result[2] ; \
|
||||
rowC = (v4sf_t *) &CO[2*ldc+J]; \
|
||||
rowC[0] += result[1] ; \
|
||||
rowC = (v4sf_t *) &CO[3*ldc+J]; \
|
||||
rowC[0] += result[0] ;
|
||||
|
||||
void
|
||||
MMA (int m, int n, int k, double *A, double *B, double *C)
|
||||
{
|
||||
__vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
|
||||
v4sf_t result[4];
|
||||
v4sf_t *rowC;
|
||||
for (int l = 0; l < n; l += 4)
|
||||
{
|
||||
double *CO;
|
||||
double *AO;
|
||||
AO = A;
|
||||
CO = C;
|
||||
C += m * 4;
|
||||
for (int j = 0; j < m; j += 16)
|
||||
{
|
||||
double *BO = B;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
__builtin_mma_xxsetaccz (&acc1);
|
||||
__builtin_mma_xxsetaccz (&acc2);
|
||||
__builtin_mma_xxsetaccz (&acc3);
|
||||
__builtin_mma_xxsetaccz (&acc4);
|
||||
__builtin_mma_xxsetaccz (&acc5);
|
||||
__builtin_mma_xxsetaccz (&acc6);
|
||||
__builtin_mma_xxsetaccz (&acc7);
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
{
|
||||
vec_t *rowA = (vec_t *) & AO[i * 16];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[i * 4];
|
||||
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
|
||||
__builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
|
||||
__builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
|
||||
__builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
|
||||
__builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
|
||||
}
|
||||
SAVE_ACC (&acc0, m, 0);
|
||||
SAVE_ACC (&acc2, m, 4);
|
||||
SAVE_ACC (&acc1, m, 2);
|
||||
SAVE_ACC (&acc3, m, 6);
|
||||
SAVE_ACC (&acc4, m, 8);
|
||||
SAVE_ACC (&acc6, m, 12);
|
||||
SAVE_ACC (&acc5, m, 10);
|
||||
SAVE_ACC (&acc7, m, 14);
|
||||
AO += k * 16;
|
||||
BO += k * 4;
|
||||
CO += 16;
|
||||
}
|
||||
B += k * 4;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init (double *matrix, int row, int column)
|
||||
{
|
||||
for (int j = 0; j < column; j++)
|
||||
{
|
||||
for (int i = 0; i < row; i++)
|
||||
{
|
||||
matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init0 (double *matrix, double *matrix1, int row, int column)
|
||||
{
|
||||
for (int j = 0; j < column; j++)
|
||||
for (int i = 0; i < row; i++)
|
||||
matrix[j * row + i] = matrix1[j * row + i] = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
print (const char *name, const double *matrix, int row, int column)
|
||||
{
|
||||
printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
|
||||
for (int i = 0; i < row; i++)
|
||||
{
|
||||
for (int j = 0; j < column; j++)
|
||||
{
|
||||
printf ("%f ", matrix[j * row + i]);
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int rowsA, colsB, common;
|
||||
int i, j, k;
|
||||
int ret = 0;
|
||||
|
||||
for (int t = 16; t <= 128; t += 16)
|
||||
{
|
||||
for (int t1 = 4; t1 <= 16; t1 += 4)
|
||||
{
|
||||
rowsA = t;
|
||||
colsB = t1;
|
||||
common = 1;
|
||||
/* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
|
||||
double A[rowsA * common];
|
||||
double B[common * colsB];
|
||||
double C[rowsA * colsB];
|
||||
double D[rowsA * colsB];
|
||||
|
||||
|
||||
init (A, rowsA, common);
|
||||
init (B, common, colsB);
|
||||
init0 (C, D, rowsA, colsB);
|
||||
MMA (rowsA, colsB, common, A, B, C);
|
||||
|
||||
for (i = 0; i < colsB; i++)
|
||||
{
|
||||
for (j = 0; j < rowsA; j++)
|
||||
{
|
||||
D[i * rowsA + j] = 0;
|
||||
for (k = 0; k < common; k++)
|
||||
{
|
||||
D[i * rowsA + j] +=
|
||||
A[k * rowsA + j] * B[k + common * i];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < colsB; i++)
|
||||
{
|
||||
for (j = 0; j < rowsA; j++)
|
||||
{
|
||||
for (k = 0; k < common; k++)
|
||||
{
|
||||
if (D[i * rowsA + j] != C[i * rowsA + j])
|
||||
{
|
||||
printf ("Error %d,%d,%d\n",i,j,k);
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ret)
|
||||
{
|
||||
print ("A", A, rowsA, common);
|
||||
print ("B", B, common, colsB);
|
||||
print ("C", C, rowsA, colsB);
|
||||
print ("D", D, rowsA, colsB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VERBOSE
|
||||
if (ret)
|
||||
printf ("MMA double test fail: %d errors\n",ret);
|
||||
else
|
||||
printf ("MMA single test success: 0 MMA errors\n");
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
193
gcc/testsuite/gcc.target/powerpc/mma-single-test.c
Executable file
193
gcc/testsuite/gcc.target/powerpc/mma-single-test.c
Executable file
|
@ -0,0 +1,193 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target power10_hw } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <altivec.h>
|
||||
|
||||
typedef unsigned char vec_t __attribute__ ((vector_size (16)));
|
||||
typedef float v4sf_t __attribute__ ((vector_size (16)));
|
||||
#define SAVE_ACC(ACC, ldc,J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[0*ldc+J]; \
|
||||
rowC[0] += result[3] ; \
|
||||
rowC = (v4sf_t *) &CO[1*ldc+J]; \
|
||||
rowC[0] += result[2] ; \
|
||||
rowC = (v4sf_t *) &CO[2*ldc+J]; \
|
||||
rowC[0] += result[1] ; \
|
||||
rowC = (v4sf_t *) &CO[3*ldc+J]; \
|
||||
rowC[0] += result[0] ;
|
||||
|
||||
#define SAVE_ACC1(ACC,ldc, J) \
|
||||
__builtin_mma_disassemble_acc (result, ACC); \
|
||||
rowC = (v4sf_t *) &CO[4* ldc+J]; \
|
||||
rowC[0] += result[3] ; \
|
||||
rowC = (v4sf_t *) &CO[5*ldc+J]; \
|
||||
rowC[0] += result[2] ; \
|
||||
rowC = (v4sf_t *) &CO[6*ldc+J]; \
|
||||
rowC[0] += result[1] ; \
|
||||
rowC = (v4sf_t *) &CO[7*ldc+J]; \
|
||||
rowC[0] += result[0] ;
|
||||
void
|
||||
MMA (int m, int n, int k, float *A, float *B, float *C)
|
||||
{
|
||||
__vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
|
||||
v4sf_t result[4];
|
||||
v4sf_t *rowC;
|
||||
for (int l = 0; l < n; l += 8)
|
||||
{
|
||||
float *CO;
|
||||
float *AO;
|
||||
AO = A;
|
||||
CO = C;
|
||||
C += m * 8;
|
||||
for (int j = 0; j < m; j += 16)
|
||||
{
|
||||
float *BO = B;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
__builtin_mma_xxsetaccz (&acc1);
|
||||
__builtin_mma_xxsetaccz (&acc2);
|
||||
__builtin_mma_xxsetaccz (&acc3);
|
||||
__builtin_mma_xxsetaccz (&acc4);
|
||||
__builtin_mma_xxsetaccz (&acc5);
|
||||
__builtin_mma_xxsetaccz (&acc6);
|
||||
__builtin_mma_xxsetaccz (&acc7);
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < k; i++)
|
||||
{
|
||||
vec_t *rowA = (vec_t *) & AO[i * 16];
|
||||
vec_t *rowB = (vec_t *) & BO[i * 8];
|
||||
__builtin_mma_xvf32gerpp (&acc0, rowB[0], rowA[0]);
|
||||
__builtin_mma_xvf32gerpp (&acc1, rowB[1], rowA[0]);
|
||||
__builtin_mma_xvf32gerpp (&acc2, rowB[0], rowA[1]);
|
||||
__builtin_mma_xvf32gerpp (&acc3, rowB[1], rowA[1]);
|
||||
__builtin_mma_xvf32gerpp (&acc4, rowB[0], rowA[2]);
|
||||
__builtin_mma_xvf32gerpp (&acc5, rowB[1], rowA[2]);
|
||||
__builtin_mma_xvf32gerpp (&acc6, rowB[0], rowA[3]);
|
||||
__builtin_mma_xvf32gerpp (&acc7, rowB[1], rowA[3]);
|
||||
}
|
||||
SAVE_ACC (&acc0, m, 0);
|
||||
SAVE_ACC (&acc2, m, 4);
|
||||
SAVE_ACC1 (&acc1, m, 0);
|
||||
SAVE_ACC1 (&acc3, m, 4);
|
||||
SAVE_ACC (&acc4, m, 8);
|
||||
SAVE_ACC (&acc6, m, 12);
|
||||
SAVE_ACC1 (&acc5, m, 8);
|
||||
SAVE_ACC1 (&acc7, m, 12);
|
||||
AO += k * 16;
|
||||
BO += k * 8;
|
||||
CO += 16;
|
||||
}
|
||||
B += k * 8;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init (float *matrix, int row, int column)
|
||||
{
|
||||
for (int j = 0; j < column; j++)
|
||||
{
|
||||
for (int i = 0; i < row; i++)
|
||||
{
|
||||
matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init0 (float *matrix, float *matrix1, int row, int column)
|
||||
{
|
||||
for (int j = 0; j < column; j++)
|
||||
for (int i = 0; i < row; i++)
|
||||
matrix[j * row + i] = matrix1[j * row + i] = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
print (const char *name, const float *matrix, int row, int column)
|
||||
{
|
||||
printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
|
||||
for (int i = 0; i < row; i++)
|
||||
{
|
||||
for (int j = 0; j < column; j++)
|
||||
{
|
||||
printf ("%f ", matrix[j * row + i]);
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int rowsA, colsB, common;
|
||||
int i, j, k;
|
||||
int ret = 0;
|
||||
|
||||
for (int t = 16; t <= 128; t += 16)
|
||||
{
|
||||
for (int t1 = 8; t1 <= 16; t1 += 8)
|
||||
{
|
||||
rowsA = t;
|
||||
colsB = t1;
|
||||
common = 1;
|
||||
/* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
|
||||
float A[rowsA * common];
|
||||
float B[common * colsB];
|
||||
float C[rowsA * colsB];
|
||||
float D[rowsA * colsB];
|
||||
|
||||
|
||||
init (A, rowsA, common);
|
||||
init (B, common, colsB);
|
||||
init0 (C, D, rowsA, colsB);
|
||||
MMA (rowsA, colsB, common, A, B, C);
|
||||
|
||||
for (i = 0; i < colsB; i++)
|
||||
{
|
||||
for (j = 0; j < rowsA; j++)
|
||||
{
|
||||
D[i * rowsA + j] = 0;
|
||||
for (k = 0; k < common; k++)
|
||||
{
|
||||
D[i * rowsA + j] +=
|
||||
A[k * rowsA + j] * B[k + common * i];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < colsB; i++)
|
||||
{
|
||||
for (j = 0; j < rowsA; j++)
|
||||
{
|
||||
for (k = 0; k < common; k++)
|
||||
{
|
||||
if (D[i * rowsA + j] != C[i * rowsA + j])
|
||||
{
|
||||
printf ("Error %d,%d,%d\n",i,j,k);
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ret)
|
||||
{
|
||||
print ("A", A, rowsA, common);
|
||||
print ("B", B, common, colsB);
|
||||
print ("C", C, rowsA, colsB);
|
||||
print ("D", D, rowsA, colsB);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VERBOSE
|
||||
if (ret)
|
||||
printf ("MMA single test fail: %d errors\n",ret);
|
||||
else
|
||||
printf ("MMA single test success: 0 MMA errors\n");
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
25
gcc/testsuite/gcc.target/powerpc/p10-arch31.c
Normal file
25
gcc/testsuite/gcc.target/powerpc/p10-arch31.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target power10_hw } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
|
||||
|
||||
/* This test will only run when the power10_hw_available test passes.
|
||||
If that test passes, then we expect to see that ISA 3.1 is
|
||||
supported. If this is not the case, then the test environment has
|
||||
problems. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int ret = 0;
|
||||
#ifdef __BUILTIN_CPU_SUPPORTS__
|
||||
if ( !__builtin_cpu_supports ("arch_3_1"))
|
||||
{
|
||||
printf ("Error: __builtin_cpu_supports says arch_3_1 not supported, but power10_hw test passed.\n");
|
||||
ret++;
|
||||
}
|
||||
#endif
|
||||
return ret;
|
||||
}
|
26
gcc/testsuite/gcc.target/powerpc/p10-identify.c
Normal file
26
gcc/testsuite/gcc.target/powerpc/p10-identify.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target power10_hw } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
|
||||
|
||||
/* This test will only run when the power10_hw_available test passes.
|
||||
If that test passes, then we expect to see that the cpu is Power10.
|
||||
If this is not the case, then the test environment has problems.
|
||||
If in the future there are cpus that pass the power10_hw test but
|
||||
are not power10, they will need to be added to this check. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
int ret = 0;
|
||||
#ifdef __BUILTIN_CPU_SUPPORTS__
|
||||
if ( !__builtin_cpu_is ("power10"))
|
||||
{
|
||||
printf ("Error: __builtin_cpu_is says this is not power10, but power10_hw test passed.\n");
|
||||
ret++;
|
||||
}
|
||||
#endif
|
||||
return ret;
|
||||
}
|
Loading…
Add table
Reference in a new issue