re PR fortran/52473 (CSHIFT slow - inline it?)

2017-06-24  Thomas Koenig  <tkoenig@gcc.gnu.org>

        PR fortran/52473
	* Makefile.am:  Add i_cshift1a_c.  Add rules to generate files
	from cshift1a.m4.
	* Makefile.in: Regenerated.
	* m4/cshift1a.m4: New file.
	* m4/cshift.m4 (cshift1): Split up inner loop by removing
	condition. Use memcpy where possible.  Call helper functions
	based on dtype.
	* libgfortran.h: Add prototypes for cshift1_16_c10,
	cshift1_16_c16, cshift1_16_c4, cshift1_16_c8, cshift1_16_i1,
	cshift1_16_i16, cshift1_16_i2, cshift1_16_i4, cshift1_16_i8,
	cshift1_16_r10, cshift1_16_r16, cshift1_16_r4, cshift1_16_r8,
	cshift1_4_c10, cshift1_4_c16, cshift1_4_c4, cshift1_4_c8,
	cshift1_4_i1, cshift1_4_i16, cshift1_4_i2, cshift1_4_i4,
	cshift1_4_i8, cshift1_4_r10, cshift1_4_r16, cshift1_4_r4,
	cshift1_4_r8, cshift1_8_c10, cshift1_8_c16, cshift1_8_c4,
	cshift1_8_c8, cshift1_8_i1, cshift1_8_i16, cshift1_8_i2,
	cshift1_8_i4, cshift1_8_i8, cshift1_8_r10, cshift1_8_r16,
	cshift1_8_r4 and cshift1_8_r8.
	* generated/cshift1_16_c10.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_c16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_c4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_c8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_i1.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_i16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_i2.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_i4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_i8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_r10.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_r16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_r4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_16_r8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_c10.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_c16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_c4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_c8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_i1.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_i16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_i2.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_i4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_i8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_r10.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_r16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_r4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_4_r8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_c10.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_c16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_c4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_c8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_i1.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_i16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_i2.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_i4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_i8.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_r10.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_r16.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_r4.c: New file, generated from cshift1a.m4.
	* generated/cshift1_8_r8.c: New file, generated from cshift1a.m4.

2017-06-24  Thomas Koenig  <tkoenig@gcc.gnu.org>

        PR fortran/52473
	* gfortran.dg/cshift_2.f90:  New test.

From-SVN: r249620
This commit is contained in:
Thomas Koenig 2017-06-24 07:07:56 +00:00
parent ef5b7d19b6
commit e56e3fda60
50 changed files with 9183 additions and 69 deletions

View file

@ -61,12 +61,13 @@ cshift1 (gfc_array_char * const restrict ret,
GFC_INTEGER_16 sh;
index_type arraysize;
index_type size;
index_type type_size;
if (pwhich)
which = *pwhich - 1;
else
which = 0;
if (which < 0 || (which + 1) > GFC_DESCRIPTOR_RANK (array))
runtime_error ("Argument 'DIM' is out of range in call to 'CSHIFT'");
@ -111,6 +112,98 @@ cshift1 (gfc_array_char * const restrict ret,
if (arraysize == 0)
return;
/* See if we should dispatch to a helper function. */
type_size = GFC_DTYPE_TYPE_SIZE (array);
switch (type_size)
{
case GFC_DTYPE_LOGICAL_1:
case GFC_DTYPE_INTEGER_1:
case GFC_DTYPE_DERIVED_1:
cshift1_16_i1 ((gfc_array_i1 *)ret, (gfc_array_i1 *) array,
h, pwhich);
return;
case GFC_DTYPE_LOGICAL_2:
case GFC_DTYPE_INTEGER_2:
cshift1_16_i2 ((gfc_array_i2 *)ret, (gfc_array_i2 *) array,
h, pwhich);
return;
case GFC_DTYPE_LOGICAL_4:
case GFC_DTYPE_INTEGER_4:
cshift1_16_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array,
h, pwhich);
return;
case GFC_DTYPE_LOGICAL_8:
case GFC_DTYPE_INTEGER_8:
cshift1_16_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array,
h, pwhich);
return;
#if defined (HAVE_INTEGER_16)
case GFC_DTYPE_LOGICAL_16:
case GFC_DTYPE_INTEGER_16:
cshift1_16_i16 ((gfc_array_i16 *)ret, (gfc_array_i16 *) array,
h, pwhich);
return;
#endif
case GFC_DTYPE_REAL_4:
cshift1_16_r4 ((gfc_array_r4 *)ret, (gfc_array_r4 *) array,
h, pwhich);
return;
case GFC_DTYPE_REAL_8:
cshift1_16_r8 ((gfc_array_r8 *)ret, (gfc_array_r8 *) array,
h, pwhich);
return;
#if defined (HAVE_REAL_10)
case GFC_DTYPE_REAL_10:
cshift1_16_r10 ((gfc_array_r10 *)ret, (gfc_array_r10 *) array,
h, pwhich);
return;
#endif
#if defined (HAVE_REAL_16)
case GFC_DTYPE_REAL_16:
cshift1_16_r16 ((gfc_array_r16 *)ret, (gfc_array_r16 *) array,
h, pwhich);
return;
#endif
case GFC_DTYPE_COMPLEX_4:
cshift1_16_c4 ((gfc_array_c4 *)ret, (gfc_array_c4 *) array,
h, pwhich);
return;
case GFC_DTYPE_COMPLEX_8:
cshift1_16_c8 ((gfc_array_c8 *)ret, (gfc_array_c8 *) array,
h, pwhich);
return;
#if defined (HAVE_COMPLEX_10)
case GFC_DTYPE_COMPLEX_10:
cshift1_16_c10 ((gfc_array_c10 *)ret, (gfc_array_c10 *) array,
h, pwhich);
return;
#endif
#if defined (HAVE_COMPLEX_16)
case GFC_DTYPE_COMPLEX_16:
cshift1_16_c16 ((gfc_array_c16 *)ret, (gfc_array_c16 *) array,
h, pwhich);
return;
#endif
default:
break;
}
extent[0] = 1;
count[0] = 0;
n = 0;
@ -162,22 +255,41 @@ cshift1 (gfc_array_char * const restrict ret,
{
/* Do the shift for this dimension. */
sh = *hptr;
sh = (div (sh, len)).rem;
/* Normal case should be -len < sh < len; try to
avoid the expensive remainder operation if possible. */
if (sh < 0)
sh += len;
if (unlikely (sh >= len || sh < 0))
{
sh = sh % len;
if (sh < 0)
sh += len;
}
src = &sptr[sh * soffset];
dest = rptr;
for (n = 0; n < len; n++)
if (soffset == size && roffset == size)
{
size_t len1 = sh * size;
size_t len2 = (len - sh) * size;
memcpy (rptr, sptr + len1, len2);
memcpy (rptr + len2, sptr, len1);
}
else
{
memcpy (dest, src, size);
dest += roffset;
if (n == len - sh - 1)
src = sptr;
else
src += soffset;
}
for (n = 0; n < len - sh; n++)
{
memcpy (dest, src, size);
dest += roffset;
src += soffset;
}
for (src = sptr, n = 0; n < sh; n++)
{
memcpy (dest, src, size);
dest += roffset;
src += soffset;
}
}
/* Advance to the next section. */
rptr += rstride0;