re PR middle-end/18785 (isdigit builtin function fails with EBCDIC character sets)

PR 18785
libcpp:
	* charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro.
	(cpp_host_to_exec_charset): New function.
	* include/cpplib.h: Declare cpp_host_to_exec_charset.

gcc:
	* langhooks.h (struct lang_hooks): Add to_target_charset.
	* langhooks.c (lhd_to_target_charset): New function.
	* langhooks-def.h: Declare lhd_to_target_charset.
	(LANG_HOOKS_TO_TARGET_CHARSET): New macro.
	(LANG_HOOKS_INITIALIZER): Update.
	* c-common.c (c_common_to_target_charset): New function.
	* c-common.h: Declare it.
	* c-objc-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to
	c_common_to_target_charset.

	* defaults.c (TARGET_BELL, TARGET_BS, TARGET_CR, TARGET_DIGIT0)
	(TARGET_ESC, TARGET_FF, TARGET_NEWLINE, TARGET_TAB, TARGET_VT):
	Delete definitions.
	* system.h: Poison them.
	* doc/tm.texi: Don't discuss them.
	* builtins.c (fold_builtin_isdigit): Use lang_hooks.to_target_charset.
	* c-pretty-print.c (pp_c_integer_constant): Don't use pp_c_char.
	(pp_c_char): Do not attempt to generate letter escapes for
	newline, tab, etc.
	* config/arm/arm.c (output_ascii_pseudo_op): Likewise.
	* config/mips/mips.c (mips_output_ascii): Likewise.
gcc/cp:
	* cp-objcp-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to
	c_common_to_target_charset.  Delete bogus comment.
gcc/testsuite:
	* gcc.dg/charset/builtin1.c: New test.

From-SVN: r95304
This commit is contained in:
Zack Weinberg 2005-02-20 17:01:32 +00:00
parent 5920b5d2e8
commit c5ff069dc4
21 changed files with 370 additions and 325 deletions

View file

@ -81,8 +81,10 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#if HOST_CHARSET == HOST_CHARSET_ASCII
#define SOURCE_CHARSET "UTF-8"
#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
#elif HOST_CHARSET == HOST_CHARSET_EBCDIC
#define SOURCE_CHARSET "UTF-EBCDIC"
#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF
#else
#error "Unrecognized basic host character set"
#endif
@ -714,6 +716,63 @@ _cpp_destroy_iconv (cpp_reader *pfile)
}
}
/* Utility routine for use by a full compiler. C is a character taken
from the *basic* source character set, encoded in the host's
execution encoding. Convert it to (the target's) execution
encoding, and return that value.
Issues an internal error if C's representation in the narrow
execution character set fails to be a single-byte value (C99
5.2.1p3: "The representation of each member of the source and
execution character sets shall fit in a byte.") May also issue an
internal error if C fails to be a member of the basic source
character set (testing this exactly is too hard, especially when
the host character set is EBCDIC). */
cppchar_t
cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
{
uchar sbuf[1];
struct _cpp_strbuf tbuf;
/* This test is merely an approximation, but it suffices to catch
the most important thing, which is that we don't get handed a
character outside the unibyte range of the host character set. */
if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR)
{
cpp_error (pfile, CPP_DL_ICE,
"character 0x%lx is not in the basic source character set\n",
(unsigned long)c);
return 0;
}
/* Being a character in the unibyte range of the host character set,
we can safely splat it into a one-byte buffer and trust that that
is a well-formed string. */
sbuf[0] = c;
/* This should never need to reallocate, but just in case... */
tbuf.asize = 1;
tbuf.text = xmalloc (tbuf.asize);
tbuf.len = 0;
if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
{
cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
return 0;
}
if (tbuf.len != 1)
{
cpp_error (pfile, CPP_DL_ICE,
"character 0x%lx is not unibyte in execution character set",
(unsigned long)c);
return 0;
}
c = tbuf.text[0];
free(tbuf.text);
return c;
}
/* Utility routine that computes a mask of the form 0000...111... with
WIDTH 1-bits. */
@ -727,8 +786,6 @@ width_to_mask (size_t width)
return ((size_t) 1 << width) - 1;
}
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
the start of an identifier, and 0 if C is not valid in an
identifier. We assume C has already gone through the checks of