* charset.c (iconv_open): Use UTF-16 and UTF-32, not UCS-2 and
	UCS-4.
	(iconv): Rename 'ucs_flag'.
	(GDB_DEFAULT_TARGET_WIDE_CHARSET): Use UTF-32.
	* c-lang.c (charset_for_string_type): Use UTF-16 and UTF-32, not
	UCS-2 and UCS-4.
	(convert_ucn): Use UTF-32.
gdb/testsuite
	* gdb.base/charset.exp: Use UTF-16 and UTF-32, not UCS-2 and
	UCS-4.
	* gdb.base/charset.c (utf_32_string): Rename.
	(init_utf32): Rename.
	(main): Update.
This commit is contained in:
Tom Tromey 2009-09-25 20:53:14 +00:00
parent fe2d172ccb
commit b8899f2b68
6 changed files with 51 additions and 33 deletions

View file

@ -1,3 +1,13 @@
2009-09-25 Tom Tromey <tromey@redhat.com>
* charset.c (iconv_open): Use UTF-16 and UTF-32, not UCS-2 and
UCS-4.
(iconv): Rename 'ucs_flag'.
(GDB_DEFAULT_TARGET_WIDE_CHARSET): Use UTF-32.
* c-lang.c (charset_for_string_type): Use UTF-16 and UTF-32, not
UCS-2 and UCS-4.
(convert_ucn): Use UTF-32.
2009-09-25 Nick Roberts <nickrob@snap.net.nz> 2009-09-25 Nick Roberts <nickrob@snap.net.nz>
* mi/gdb-mi.el: Remove file. * mi/gdb-mi.el: Remove file.

View file

@ -52,17 +52,17 @@ charset_for_string_type (enum c_string_type str_type,
case C_WIDE_STRING: case C_WIDE_STRING:
return target_wide_charset (byte_order); return target_wide_charset (byte_order);
case C_STRING_16: case C_STRING_16:
/* FIXME: UCS-2 is not always correct. */ /* FIXME: UTF-16 is not always correct. */
if (byte_order == BFD_ENDIAN_BIG) if (byte_order == BFD_ENDIAN_BIG)
return "UCS-2BE"; return "UTF-16BE";
else else
return "UCS-2LE"; return "UTF-16LE";
case C_STRING_32: case C_STRING_32:
/* FIXME: UCS-4 is not always correct. */ /* FIXME: UTF-32 is not always correct. */
if (byte_order == BFD_ENDIAN_BIG) if (byte_order == BFD_ENDIAN_BIG)
return "UCS-4BE"; return "UTF-32BE";
else else
return "UCS-4LE"; return "UTF-32LE";
} }
internal_error (__FILE__, __LINE__, "unhandled c_string_type"); internal_error (__FILE__, __LINE__, "unhandled c_string_type");
} }
@ -763,7 +763,7 @@ convert_ucn (char *p, char *limit, const char *dest_charset,
result >>= 8; result >>= 8;
} }
convert_between_encodings ("UCS-4BE", dest_charset, data, 4, 4, output, convert_between_encodings ("UTF-32BE", dest_charset, data, 4, 4, output,
translit_none); translit_none);
return p; return p;

View file

@ -102,17 +102,17 @@
iconv_t iconv_t
iconv_open (const char *to, const char *from) iconv_open (const char *to, const char *from)
{ {
/* We allow conversions from UCS-4BE, wchar_t, and the host charset. /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
We allow conversions to wchar_t and the host charset. */ We allow conversions to wchar_t and the host charset. */
if (strcmp (from, "UCS-4BE") && strcmp (from, "wchar_t") if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
&& strcmp (from, GDB_DEFAULT_HOST_CHARSET)) && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
return -1; return -1;
if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
return -1; return -1;
/* Return 1 if we are converting from UCS-4BE, 0 otherwise. This is /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is
used as a flag in calls to iconv. */ used as a flag in calls to iconv. */
return !strcmp (from, "UCS-4BE"); return !strcmp (from, "UTF-32BE");
} }
int int
@ -122,10 +122,10 @@ iconv_close (iconv_t arg)
} }
size_t size_t
iconv (iconv_t ucs_flag, const char **inbuf, size_t *inbytesleft, iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft) char **outbuf, size_t *outbytesleft)
{ {
if (ucs_flag) if (utf_flag)
{ {
while (*inbytesleft >= 4) while (*inbytesleft >= 4)
{ {
@ -193,7 +193,7 @@ iconv (iconv_t ucs_flag, const char **inbuf, size_t *inbytesleft,
#endif #endif
#ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
#define GDB_DEFAULT_TARGET_WIDE_CHARSET "UCS-4" #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
#endif #endif
static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;

View file

@ -1,3 +1,11 @@
2009-09-25 Tom Tromey <tromey@redhat.com>
* gdb.base/charset.exp: Use UTF-16 and UTF-32, not UCS-2 and
UCS-4.
* gdb.base/charset.c (utf_32_string): Rename.
(init_utf32): Rename.
(main): Update.
2009-09-22 Tom Tromey <tromey@redhat.com> 2009-09-22 Tom Tromey <tromey@redhat.com>
* gdb.python/py-function.exp: Add regression tests. * gdb.python/py-function.exp: Add regression tests.

View file

@ -50,10 +50,10 @@ char ebcdic_us_string[NUM_CHARS];
char ibm1047_string[NUM_CHARS]; char ibm1047_string[NUM_CHARS];
/* We make a phony wchar_t and then pretend that this platform uses /* We make a phony wchar_t and then pretend that this platform uses
UCS-4 (or UCS-2, depending on the size -- same difference for the UTF-32 (or UTF-16, depending on the size -- same difference for the
purposes of this test). */ purposes of this test). */
typedef unsigned int wchar_t; typedef unsigned int wchar_t;
wchar_t ucs_4_string[NUM_CHARS]; wchar_t utf_32_string[NUM_CHARS];
/* We also define a couple phony types for testing the u'' and U'' /* We also define a couple phony types for testing the u'' and U''
support. It is ok if these have the wrong size on some platforms support. It is ok if these have the wrong size on some platforms
@ -103,12 +103,12 @@ fill_run (char string[], int start, int len, int first)
void void
init_ucs4 () init_utf32 ()
{ {
int i; int i;
for (i = 0; i < NUM_CHARS; ++i) for (i = 0; i < NUM_CHARS; ++i)
ucs_4_string[i] = iso_8859_1_string[i] & 0xff; utf_32_string[i] = iso_8859_1_string[i] & 0xff;
} }
int main () int main ()
@ -171,9 +171,9 @@ int main ()
/* The digits, at least, are contiguous. */ /* The digits, at least, are contiguous. */
fill_run (ibm1047_string, 59, 10, 240); fill_run (ibm1047_string, 59, 10, 240);
init_ucs4 (); init_utf32 ();
myvar = ucs_4_string[7]; myvar = utf_32_string[7];
return 0; /* all strings initialized */ return 0; /* all strings initialized */
} }

View file

@ -375,10 +375,10 @@ gdb_expect {
set wchar_size [get_sizeof wchar_t 99] set wchar_size [get_sizeof wchar_t 99]
set wchar_ok 0 set wchar_ok 0
if {$wchar_size == 2} { if {$wchar_size == 2} {
lappend charset_subset UCS-2 lappend charset_subset UTF-16
set wchar_ok 1 set wchar_ok 1
} elseif {$wchar_size == 4} { } elseif {$wchar_size == 4} {
lappend charset_subset UCS-4 lappend charset_subset UTF-32
set wchar_ok 1 set wchar_ok 1
} }
@ -388,7 +388,7 @@ foreach target_charset $charset_subset {
continue continue
} }
if {$target_charset == "UCS-4" || $target_charset == "UCS-2"} { if {$target_charset == "UTF-32" || $target_charset == "UTF-16"} {
set param target-wide-charset set param target-wide-charset
set L L set L L
} else { } else {
@ -424,10 +424,10 @@ foreach target_charset $charset_subset {
# a string in $target_charset. The variable's name is the # a string in $target_charset. The variable's name is the
# character set's name, in lower-case, with all non-identifier # character set's name, in lower-case, with all non-identifier
# characters replaced with '_', with "_string" stuck on the end. # characters replaced with '_', with "_string" stuck on the end.
if {$target_charset == "UCS-2"} { if {$target_charset == "UTF-16"} {
# We still use the ucs_4_string variable -- but the size is # We still use the utf_32_string variable -- but the size is
# correct for UCS-2. # correct for UTF-16.
set var_name ucs_4_string set var_name utf_32_string
} else { } else {
set var_name [string tolower "${target_charset}_string"] set var_name [string tolower "${target_charset}_string"]
regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name
@ -556,7 +556,7 @@ gdb_test "print '\\9'" " = \[0-9\]+ '9'"
gdb_test "print \"\\1011\"" " = \"A1\"" gdb_test "print \"\\1011\"" " = \"A1\""
# Tests for wide- or unicode- strings. L is the prefix letter to use, # Tests for wide- or unicode- strings. L is the prefix letter to use,
# either "L" (for wide strings), "u" (for UCS-2), or "U" (for UCS-4). # either "L" (for wide strings), "u" (for UTF-16), or "U" (for UTF-32).
# NAME is used in the test names and should be related to the prefix # NAME is used in the test names and should be related to the prefix
# letter in some easy-to-undestand way. # letter in some easy-to-undestand way.
proc test_wide_or_unicode {L name} { proc test_wide_or_unicode {L name} {
@ -582,12 +582,12 @@ if {$wchar_ok} {
set ucs2_ok [expr {[get_sizeof char16_t 99] == 2}] set ucs2_ok [expr {[get_sizeof char16_t 99] == 2}]
if {$ucs2_ok} { if {$ucs2_ok} {
test_wide_or_unicode u UCS-2 test_wide_or_unicode u UTF-16
} }
set ucs4_ok [expr {[get_sizeof char32_t 99] == 4}] set ucs4_ok [expr {[get_sizeof char32_t 99] == 4}]
if {$ucs4_ok} { if {$ucs4_ok} {
test_wide_or_unicode U UCS-4 test_wide_or_unicode U UTF-32
} }
# Test an invalid string combination. # Test an invalid string combination.
@ -598,16 +598,16 @@ proc test_combination {L1 name1 L2 name2} {
} }
if {$wchar_ok && $ucs2_ok} { if {$wchar_ok && $ucs2_ok} {
test_combination L wide u UCS-2 test_combination L wide u UTF-16
} }
if {$wchar_ok && $ucs4_ok} { if {$wchar_ok && $ucs4_ok} {
test_combination L wide U UCS-4 test_combination L wide U UTF-32
# Regression test for a typedef to a typedef. # Regression test for a typedef to a typedef.
gdb_test "print myvar" "= \[0-9\]+ L'A'" \ gdb_test "print myvar" "= \[0-9\]+ L'A'" \
"typedef to wchar_t" "typedef to wchar_t"
} }
if {$ucs2_ok && $ucs4_ok} { if {$ucs2_ok && $ucs4_ok} {
test_combination u UCS-2 U UCS-4 test_combination u UTF-16 U UTF-32
} }
# Regression test for a cleanup bug in the charset code. # Regression test for a cleanup bug in the charset code.