re PR middle-end/18785 (isdigit builtin function fails with EBCDIC character sets)
PR 18785 libcpp: * charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro. (cpp_host_to_exec_charset): New function. * include/cpplib.h: Declare cpp_host_to_exec_charset. gcc: * langhooks.h (struct lang_hooks): Add to_target_charset. * langhooks.c (lhd_to_target_charset): New function. * langhooks-def.h: Declare lhd_to_target_charset. (LANG_HOOKS_TO_TARGET_CHARSET): New macro. (LANG_HOOKS_INITIALIZER): Update. * c-common.c (c_common_to_target_charset): New function. * c-common.h: Declare it. * c-objc-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to c_common_to_target_charset. * defaults.c (TARGET_BELL, TARGET_BS, TARGET_CR, TARGET_DIGIT0) (TARGET_ESC, TARGET_FF, TARGET_NEWLINE, TARGET_TAB, TARGET_VT): Delete definitions. * system.h: Poison them. * doc/tm.texi: Don't discuss them. * builtins.c (fold_builtin_isdigit): Use lang_hooks.to_target_charset. * c-pretty-print.c (pp_c_integer_constant): Don't use pp_c_char. (pp_c_char): Do not attempt to generate letter escapes for newline, tab, etc. * config/arm/arm.c (output_ascii_pseudo_op): Likewise. * config/mips/mips.c (mips_output_ascii): Likewise. gcc/cp: * cp-objcp-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to c_common_to_target_charset. Delete bogus comment. gcc/testsuite: * gcc.dg/charset/builtin1.c: New test. From-SVN: r95304
This commit is contained in:
parent
5920b5d2e8
commit
c5ff069dc4
21 changed files with 370 additions and 325 deletions
|
@ -81,8 +81,10 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
|
|||
|
||||
#if HOST_CHARSET == HOST_CHARSET_ASCII
|
||||
#define SOURCE_CHARSET "UTF-8"
|
||||
#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
|
||||
#elif HOST_CHARSET == HOST_CHARSET_EBCDIC
|
||||
#define SOURCE_CHARSET "UTF-EBCDIC"
|
||||
#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF
|
||||
#else
|
||||
#error "Unrecognized basic host character set"
|
||||
#endif
|
||||
|
@ -714,6 +716,63 @@ _cpp_destroy_iconv (cpp_reader *pfile)
|
|||
}
|
||||
}
|
||||
|
||||
/* Utility routine for use by a full compiler. C is a character taken
|
||||
from the *basic* source character set, encoded in the host's
|
||||
execution encoding. Convert it to (the target's) execution
|
||||
encoding, and return that value.
|
||||
|
||||
Issues an internal error if C's representation in the narrow
|
||||
execution character set fails to be a single-byte value (C99
|
||||
5.2.1p3: "The representation of each member of the source and
|
||||
execution character sets shall fit in a byte.") May also issue an
|
||||
internal error if C fails to be a member of the basic source
|
||||
character set (testing this exactly is too hard, especially when
|
||||
the host character set is EBCDIC). */
|
||||
cppchar_t
|
||||
cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
|
||||
{
|
||||
uchar sbuf[1];
|
||||
struct _cpp_strbuf tbuf;
|
||||
|
||||
/* This test is merely an approximation, but it suffices to catch
|
||||
the most important thing, which is that we don't get handed a
|
||||
character outside the unibyte range of the host character set. */
|
||||
if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR)
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ICE,
|
||||
"character 0x%lx is not in the basic source character set\n",
|
||||
(unsigned long)c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Being a character in the unibyte range of the host character set,
|
||||
we can safely splat it into a one-byte buffer and trust that that
|
||||
is a well-formed string. */
|
||||
sbuf[0] = c;
|
||||
|
||||
/* This should never need to reallocate, but just in case... */
|
||||
tbuf.asize = 1;
|
||||
tbuf.text = xmalloc (tbuf.asize);
|
||||
tbuf.len = 0;
|
||||
|
||||
if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
|
||||
{
|
||||
cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
|
||||
return 0;
|
||||
}
|
||||
if (tbuf.len != 1)
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ICE,
|
||||
"character 0x%lx is not unibyte in execution character set",
|
||||
(unsigned long)c);
|
||||
return 0;
|
||||
}
|
||||
c = tbuf.text[0];
|
||||
free(tbuf.text);
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Utility routine that computes a mask of the form 0000...111... with
|
||||
WIDTH 1-bits. */
|
||||
|
@ -727,8 +786,6 @@ width_to_mask (size_t width)
|
|||
return ((size_t) 1 << width) - 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
|
||||
the start of an identifier, and 0 if C is not valid in an
|
||||
identifier. We assume C has already gone through the checks of
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue