re PR fortran/80975 (matmul for zero-length arrays)
2017-06-06 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/80975 * m4/matmul_internal.m4: Move zeroing before early return. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * generated/matmulavx128_c10.c: Regenerated. * generated/matmulavx128_c16.c: Regenerated. * generated/matmulavx128_c4.c: Regenerated. * generated/matmulavx128_c8.c: Regenerated. * generated/matmulavx128_i1.c: Regenerated. * generated/matmulavx128_i16.c: Regenerated. * generated/matmulavx128_i2.c: Regenerated. * generated/matmulavx128_i4.c: Regenerated. * generated/matmulavx128_i8.c: Regenerated. * generated/matmulavx128_r10.c: Regenerated. * generated/matmulavx128_r16.c: Regenerated. * generated/matmulavx128_r4.c: Regenerated. * generated/matmulavx128_r8.c: Regenerated. 2017-06-06 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/80975 * gfortran.dg/matmul_16.f90: New test. * gfortran.dg/inline_matmul_18.f90: New test. From-SVN: r248932
This commit is contained in:
parent
dc94972832
commit
bbf9741600
31 changed files with 526 additions and 460 deletions
|
@ -1,3 +1,9 @@
|
|||
2017-06-06 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
PR fortran/80975
|
||||
* gfortran.dg/matmul_16.f90: New test.
|
||||
* gfortran.dg/inline_matmul_18.f90: New test.
|
||||
|
||||
2017-06-06 Marek Polacek <polacek@redhat.com>
|
||||
|
||||
PR c/79983
|
||||
|
|
16
gcc/testsuite/gfortran.dg/inline_matmul_18.f90
Normal file
16
gcc/testsuite/gfortran.dg/inline_matmul_18.f90
Normal file
|
@ -0,0 +1,16 @@
|
|||
! { dg-do run }
|
||||
! { dg-options "-O -finline-matmul-limit=100 -fdump-tree-optimized" }
|
||||
! PR 80975 - this did not zero the result array in the library version;
|
||||
! make sure this also doesn't happen in the inline version.
|
||||
program bogus_matmul
|
||||
implicit none
|
||||
real :: M(3,0), v(0), w(3)
|
||||
|
||||
w = 7
|
||||
w = matmul(M,v)
|
||||
if( any(w .ne. 0) ) then
|
||||
call abort
|
||||
end if
|
||||
end program bogus_matmul
|
||||
! { dg-final { scan-tree-dump-times "matmul_r4" 0 "optimized" } }
|
||||
|
13
gcc/testsuite/gfortran.dg/matmul_16.f90
Normal file
13
gcc/testsuite/gfortran.dg/matmul_16.f90
Normal file
|
@ -0,0 +1,13 @@
|
|||
! { dg-do run }
|
||||
! { dg-options "-finline-matmul-limit=0" }
|
||||
! PR 80975 - this did not zero the result array
|
||||
program bogus_matmul
|
||||
implicit none
|
||||
real :: M(3,0), v(0), w(3)
|
||||
|
||||
w = 7
|
||||
w = matmul(M,v)
|
||||
if( any(w .ne. 0) ) then
|
||||
call abort
|
||||
end if
|
||||
end program bogus_matmul
|
|
@ -1,3 +1,34 @@
|
|||
2017-06-06 Thomas Koenig <tkoenig@gcc.gnu.org>
|
||||
|
||||
PR fortran/80975
|
||||
* m4/matmul_internal.m4: Move zeroing before early return.
|
||||
* generated/matmul_c10.c: Regenerated.
|
||||
* generated/matmul_c16.c: Regenerated.
|
||||
* generated/matmul_c4.c: Regenerated.
|
||||
* generated/matmul_c8.c: Regenerated.
|
||||
* generated/matmul_i1.c: Regenerated.
|
||||
* generated/matmul_i16.c: Regenerated.
|
||||
* generated/matmul_i2.c: Regenerated.
|
||||
* generated/matmul_i4.c: Regenerated.
|
||||
* generated/matmul_i8.c: Regenerated.
|
||||
* generated/matmul_r10.c: Regenerated.
|
||||
* generated/matmul_r16.c: Regenerated.
|
||||
* generated/matmul_r4.c: Regenerated.
|
||||
* generated/matmul_r8.c: Regenerated.
|
||||
* generated/matmulavx128_c10.c: Regenerated.
|
||||
* generated/matmulavx128_c16.c: Regenerated.
|
||||
* generated/matmulavx128_c4.c: Regenerated.
|
||||
* generated/matmulavx128_c8.c: Regenerated.
|
||||
* generated/matmulavx128_i1.c: Regenerated.
|
||||
* generated/matmulavx128_i16.c: Regenerated.
|
||||
* generated/matmulavx128_i2.c: Regenerated.
|
||||
* generated/matmulavx128_i4.c: Regenerated.
|
||||
* generated/matmulavx128_i8.c: Regenerated.
|
||||
* generated/matmulavx128_r10.c: Regenerated.
|
||||
* generated/matmulavx128_r16.c: Regenerated.
|
||||
* generated/matmulavx128_r4.c: Regenerated.
|
||||
* generated/matmulavx128_r8.c: Regenerated.
|
||||
|
||||
2017-05-29 Jerry DeLisle <jvdelisle@gcc.gnu.org>
|
||||
|
||||
PR libgfortran/53029
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -307,6 +307,11 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -319,11 +324,6 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -859,6 +859,11 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -871,11 +876,6 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1411,6 +1411,11 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1423,11 +1428,6 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -1977,6 +1977,11 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -1989,11 +1994,6 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -2603,6 +2603,11 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -2615,11 +2620,6 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_1)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_2)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_INTEGER_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_10)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_16)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_4)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -272,6 +272,11 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -284,11 +289,6 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
@ -825,6 +825,11 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -837,11 +842,6 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
|
|||
|
||||
t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = (GFC_REAL_8)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
|
@ -223,6 +223,11 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
|||
b_offset = 1 + b_dim1;
|
||||
b -= b_offset;
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = ('rtype_name`)0;
|
||||
|
||||
/* Early exit if possible */
|
||||
if (m == 0 || n == 0 || k == 0)
|
||||
return;
|
||||
|
@ -235,11 +240,6 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
|
|||
|
||||
t1 = malloc (t1_dim * sizeof('rtype_name`));
|
||||
|
||||
/* Empty c first. */
|
||||
for (j=1; j<=n; j++)
|
||||
for (i=1; i<=m; i++)
|
||||
c[i + j * c_dim1] = ('rtype_name`)0;
|
||||
|
||||
/* Start turning the crank. */
|
||||
i1 = n;
|
||||
for (jj = 1; jj <= i1; jj += 512)
|
||||
|
|
Loading…
Add table
Reference in a new issue