Support UTF-8 identifiers in C/C++ expressions (PR gdb/22973)
Factor out cp_ident_is_alpha/cp_ident_is_alnum out of gdb/cp-name-parser.y and use it in the C/C++ expression parser too. New test included. gdb/ChangeLog: 2018-05-22 Pedro Alves <palves@redhat.com> 張俊芝 <zjz@zjz.name> PR gdb/22973 * c-exp.y: Include "c-support.h". (parse_number, c_parse_escape, lex_one_token): Use TOLOWER instead of tolower. Use c_ident_is_alpha to scan names. * c-lang.c: Include "c-support.h". (convert_ucn, convert_octal, convert_hex, convert_escape): Use ISXDIGIT instead of isxdigit and ISDIGIT instead of isdigit. * c-support.h: New file, with bits factored out from ... * cp-name-parser.y: ... this file. Include "c-support.h". (cp_ident_is_alpha, cp_ident_is_alnum): Deleted, moved to c-support.h and renamed. (symbol_end, yylex): Adjust. gdb/testsuite/ChangeLog: 2018-05-22 Pedro Alves <palves@redhat.com> PR gdb/22973 * gdb.base/utf8-identifiers.c: New file. * gdb.base/utf8-identifiers.exp: New file.
This commit is contained in:
parent
0ec848ad25
commit
b1b60145ae
8 changed files with 240 additions and 44 deletions
|
@ -1,3 +1,20 @@
|
|||
2018-05-22 Pedro Alves <palves@redhat.com>
|
||||
張俊芝 <zjz@zjz.name>
|
||||
|
||||
PR gdb/22973
|
||||
* c-exp.y: Include "c-support.h".
|
||||
(parse_number, c_parse_escape, lex_one_token): Use TOLOWER instead
|
||||
of tolower. Use c_ident_is_alpha to scan names.
|
||||
* c-lang.c: Include "c-support.h".
|
||||
(convert_ucn, convert_octal, convert_hex, convert_escape): Use
|
||||
ISXDIGIT instead of isxdigit and ISDIGIT instead of isdigit.
|
||||
* c-support.h: New file, with bits factored out from ...
|
||||
* cp-name-parser.y: ... this file.
|
||||
Include "c-support.h".
|
||||
(cp_ident_is_alpha, cp_ident_is_alnum): Deleted, moved to
|
||||
c-support.h and renamed.
|
||||
(symbol_end, yylex): Adjust.
|
||||
|
||||
2018-05-22 Pedro Franco de Carvalho <pedromfc@linux.vnet.ibm.com>
|
||||
|
||||
* arch/ppc-linux-common.c (ppc_linux_has_isa205): Change the
|
||||
|
|
27
gdb/c-exp.y
27
gdb/c-exp.y
|
@ -42,6 +42,7 @@
|
|||
#include "parser-defs.h"
|
||||
#include "language.h"
|
||||
#include "c-lang.h"
|
||||
#include "c-support.h"
|
||||
#include "bfd.h" /* Required by objfiles.h. */
|
||||
#include "symfile.h" /* Required by objfiles.h. */
|
||||
#include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
|
||||
|
@ -1806,13 +1807,13 @@ parse_number (struct parser_state *par_state,
|
|||
len -= 2;
|
||||
}
|
||||
/* Handle suffixes: 'f' for float, 'l' for long double. */
|
||||
else if (len >= 1 && tolower (p[len - 1]) == 'f')
|
||||
else if (len >= 1 && TOLOWER (p[len - 1]) == 'f')
|
||||
{
|
||||
putithere->typed_val_float.type
|
||||
= parse_type (par_state)->builtin_float;
|
||||
len -= 1;
|
||||
}
|
||||
else if (len >= 1 && tolower (p[len - 1]) == 'l')
|
||||
else if (len >= 1 && TOLOWER (p[len - 1]) == 'l')
|
||||
{
|
||||
putithere->typed_val_float.type
|
||||
= parse_type (par_state)->builtin_long_double;
|
||||
|
@ -2023,9 +2024,9 @@ c_parse_escape (const char **ptr, struct obstack *output)
|
|||
if (output)
|
||||
obstack_grow_str (output, "\\x");
|
||||
++tokptr;
|
||||
if (!isxdigit (*tokptr))
|
||||
if (!ISXDIGIT (*tokptr))
|
||||
error (_("\\x escape without a following hex digit"));
|
||||
while (isxdigit (*tokptr))
|
||||
while (ISXDIGIT (*tokptr))
|
||||
{
|
||||
if (output)
|
||||
obstack_1grow (output, *tokptr);
|
||||
|
@ -2048,7 +2049,7 @@ c_parse_escape (const char **ptr, struct obstack *output)
|
|||
if (output)
|
||||
obstack_grow_str (output, "\\");
|
||||
for (i = 0;
|
||||
i < 3 && isdigit (*tokptr) && *tokptr != '8' && *tokptr != '9';
|
||||
i < 3 && ISDIGIT (*tokptr) && *tokptr != '8' && *tokptr != '9';
|
||||
++i)
|
||||
{
|
||||
if (output)
|
||||
|
@ -2073,9 +2074,9 @@ c_parse_escape (const char **ptr, struct obstack *output)
|
|||
obstack_1grow (output, *tokptr);
|
||||
}
|
||||
++tokptr;
|
||||
if (!isxdigit (*tokptr))
|
||||
if (!ISXDIGIT (*tokptr))
|
||||
error (_("\\%c escape without a following hex digit"), c);
|
||||
for (i = 0; i < len && isxdigit (*tokptr); ++i)
|
||||
for (i = 0; i < len && ISXDIGIT (*tokptr); ++i)
|
||||
{
|
||||
if (output)
|
||||
obstack_1grow (output, *tokptr);
|
||||
|
@ -2668,7 +2669,7 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name)
|
|||
size_t len = strlen ("selector");
|
||||
|
||||
if (strncmp (p, "selector", len) == 0
|
||||
&& (p[len] == '\0' || isspace (p[len])))
|
||||
&& (p[len] == '\0' || ISSPACE (p[len])))
|
||||
{
|
||||
lexptr = p + len;
|
||||
return SELECTOR;
|
||||
|
@ -2677,9 +2678,9 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name)
|
|||
goto parse_string;
|
||||
}
|
||||
|
||||
while (isspace (*p))
|
||||
while (ISSPACE (*p))
|
||||
p++;
|
||||
if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
|
||||
if (strncmp (p, "entry", len) == 0 && !c_ident_is_alnum (p[len])
|
||||
&& p[len] != '_')
|
||||
{
|
||||
lexptr = &p[len];
|
||||
|
@ -2741,16 +2742,14 @@ lex_one_token (struct parser_state *par_state, bool *is_quoted_name)
|
|||
}
|
||||
}
|
||||
|
||||
if (!(c == '_' || c == '$'
|
||||
|| (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
|
||||
if (!(c == '_' || c == '$' || c_ident_is_alpha (c)))
|
||||
/* We must have come across a bad character (e.g. ';'). */
|
||||
error (_("Invalid character '%c' in expression."), c);
|
||||
|
||||
/* It's a name. See how long it is. */
|
||||
namelen = 0;
|
||||
for (c = tokstart[namelen];
|
||||
(c == '_' || c == '$' || (c >= '0' && c <= '9')
|
||||
|| (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '<');)
|
||||
(c == '_' || c == '$' || c_ident_is_alnum (c) || c == '<');)
|
||||
{
|
||||
/* Template parameter lists are part of the name.
|
||||
FIXME: This mishandles `print $a<4&&$a>3'. */
|
||||
|
|
11
gdb/c-lang.c
11
gdb/c-lang.c
|
@ -25,6 +25,7 @@
|
|||
#include "language.h"
|
||||
#include "varobj.h"
|
||||
#include "c-lang.h"
|
||||
#include "c-support.h"
|
||||
#include "valprint.h"
|
||||
#include "macroscope.h"
|
||||
#include "charset.h"
|
||||
|
@ -382,7 +383,7 @@ convert_ucn (char *p, char *limit, const char *dest_charset,
|
|||
gdb_byte data[4];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < length && p < limit && isxdigit (*p); ++i, ++p)
|
||||
for (i = 0; i < length && p < limit && ISXDIGIT (*p); ++i, ++p)
|
||||
result = (result << 4) + host_hex_value (*p);
|
||||
|
||||
for (i = 3; i >= 0; --i)
|
||||
|
@ -424,7 +425,7 @@ convert_octal (struct type *type, char *p,
|
|||
unsigned long value = 0;
|
||||
|
||||
for (i = 0;
|
||||
i < 3 && p < limit && isdigit (*p) && *p != '8' && *p != '9';
|
||||
i < 3 && p < limit && ISDIGIT (*p) && *p != '8' && *p != '9';
|
||||
++i)
|
||||
{
|
||||
value = 8 * value + host_hex_value (*p);
|
||||
|
@ -447,7 +448,7 @@ convert_hex (struct type *type, char *p,
|
|||
{
|
||||
unsigned long value = 0;
|
||||
|
||||
while (p < limit && isxdigit (*p))
|
||||
while (p < limit && ISXDIGIT (*p))
|
||||
{
|
||||
value = 16 * value + host_hex_value (*p);
|
||||
++p;
|
||||
|
@ -488,7 +489,7 @@ convert_escape (struct type *type, const char *dest_charset,
|
|||
|
||||
case 'x':
|
||||
ADVANCE;
|
||||
if (!isxdigit (*p))
|
||||
if (!ISXDIGIT (*p))
|
||||
error (_("\\x used with no following hex digits."));
|
||||
p = convert_hex (type, p, limit, output);
|
||||
break;
|
||||
|
@ -510,7 +511,7 @@ convert_escape (struct type *type, const char *dest_charset,
|
|||
int length = *p == 'u' ? 4 : 8;
|
||||
|
||||
ADVANCE;
|
||||
if (!isxdigit (*p))
|
||||
if (!ISXDIGIT (*p))
|
||||
error (_("\\u used with no following hex digits"));
|
||||
p = convert_ucn (p, limit, dest_charset, output, length);
|
||||
}
|
||||
|
|
46
gdb/c-support.h
Normal file
46
gdb/c-support.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* Helper routines for C support in GDB.
|
||||
Copyright (C) 2017-2018 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GDB.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef C_SUPPORT_H
|
||||
#define C_SUPPORT_H
|
||||
|
||||
#include "safe-ctype.h"
|
||||
|
||||
/* Like ISALPHA, but also returns true for the union of all UTF-8
|
||||
multi-byte sequence bytes and non-ASCII characters in
|
||||
extended-ASCII charsets (e.g., Latin1). I.e., returns true if the
|
||||
high bit is set. Note that not all UTF-8 ranges are allowed in C++
|
||||
identifiers, but we don't need to be pedantic so for simplicity we
|
||||
ignore that here. Plus this avoids the complication of actually
|
||||
knowing what was the right encoding. */
|
||||
|
||||
static inline bool
|
||||
c_ident_is_alpha (unsigned char ch)
|
||||
{
|
||||
return ISALPHA (ch) || ch >= 0x80;
|
||||
}
|
||||
|
||||
/* Similarly, but Like ISALNUM. */
|
||||
|
||||
static inline bool
|
||||
c_ident_is_alnum (unsigned char ch)
|
||||
{
|
||||
return ISALNUM (ch) || ch >= 0x80;
|
||||
}
|
||||
|
||||
#endif /* C_SUPPORT_H */
|
|
@ -35,6 +35,7 @@
|
|||
#include "safe-ctype.h"
|
||||
#include "demangle.h"
|
||||
#include "cp-support.h"
|
||||
#include "c-support.h"
|
||||
|
||||
/* Bison does not make it easy to create a parser without global
|
||||
state, unfortunately. Here are all the global variables used
|
||||
|
@ -1304,28 +1305,6 @@ d_binary (const char *name, struct demangle_component *lhs, struct demangle_comp
|
|||
fill_comp (DEMANGLE_COMPONENT_BINARY_ARGS, lhs, rhs));
|
||||
}
|
||||
|
||||
/* Like ISALPHA, but also returns true for the union of all UTF-8
|
||||
multi-byte sequence bytes and non-ASCII characters in
|
||||
extended-ASCII charsets (e.g., Latin1). I.e., returns true if the
|
||||
high bit is set. Note that not all UTF-8 ranges are allowed in C++
|
||||
identifiers, but we don't need to be pedantic so for simplicity we
|
||||
ignore that here. Plus this avoids the complication of actually
|
||||
knowing what was the right encoding. */
|
||||
|
||||
static inline bool
|
||||
cp_ident_is_alpha (unsigned char ch)
|
||||
{
|
||||
return ISALPHA (ch) || ch >= 0x80;
|
||||
}
|
||||
|
||||
/* Similarly, but Like ISALNUM. */
|
||||
|
||||
static inline bool
|
||||
cp_ident_is_alnum (unsigned char ch)
|
||||
{
|
||||
return ISALNUM (ch) || ch >= 0x80;
|
||||
}
|
||||
|
||||
/* Find the end of a symbol name starting at LEXPTR. */
|
||||
|
||||
static const char *
|
||||
|
@ -1333,7 +1312,7 @@ symbol_end (const char *lexptr)
|
|||
{
|
||||
const char *p = lexptr;
|
||||
|
||||
while (*p && (cp_ident_is_alnum (*p) || *p == '_' || *p == '$' || *p == '.'))
|
||||
while (*p && (c_ident_is_alnum (*p) || *p == '_' || *p == '$' || *p == '.'))
|
||||
p++;
|
||||
|
||||
return p;
|
||||
|
@ -1813,7 +1792,7 @@ yylex (void)
|
|||
return ERROR;
|
||||
}
|
||||
|
||||
if (!(c == '_' || c == '$' || cp_ident_is_alpha (c)))
|
||||
if (!(c == '_' || c == '$' || c_ident_is_alpha (c)))
|
||||
{
|
||||
/* We must have come across a bad character (e.g. ';'). */
|
||||
yyerror (_("invalid character"));
|
||||
|
@ -1824,7 +1803,7 @@ yylex (void)
|
|||
namelen = 0;
|
||||
do
|
||||
c = tokstart[++namelen];
|
||||
while (cp_ident_is_alnum (c) || c == '_' || c == '$');
|
||||
while (c_ident_is_alnum (c) || c == '_' || c == '$');
|
||||
|
||||
lexptr += namelen;
|
||||
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2018-05-22 Pedro Alves <palves@redhat.com>
|
||||
|
||||
PR gdb/22973
|
||||
* gdb.base/utf8-identifiers.c: New file.
|
||||
* gdb.base/utf8-identifiers.exp: New file.
|
||||
|
||||
2018-05-22 Pedro Franco de Carvalho <pedromfc@linux.vnet.ibm.com>
|
||||
|
||||
* gdb.arch/powerpc-fpscr-gcore.exp: New file.
|
||||
|
|
71
gdb/testsuite/gdb.base/utf8-identifiers.c
Normal file
71
gdb/testsuite/gdb.base/utf8-identifiers.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
/* -*- coding: utf-8 -*- */
|
||||
|
||||
/* This testcase is part of GDB, the GNU debugger.
|
||||
|
||||
Copyright 2017-2018 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* UTF-8 "função1". */
|
||||
#define FUNCAO1 fun\u00e7\u00e3o1
|
||||
|
||||
/* UTF-8 "função2". */
|
||||
#define FUNCAO2 fun\u00e7\u00e3o2
|
||||
|
||||
/* UTF-8 "my_função". */
|
||||
#define MY_FUNCAO my_fun\u00e7\u00e3o
|
||||
|
||||
/* UTF-8 "num_€". */
|
||||
#define NUM_EUROS num_\u20ac
|
||||
|
||||
struct S
|
||||
{
|
||||
int NUM_EUROS;
|
||||
} g_s;
|
||||
|
||||
void
|
||||
FUNCAO1 (void)
|
||||
{
|
||||
g_s.NUM_EUROS = 1000;
|
||||
}
|
||||
|
||||
void
|
||||
FUNCAO2 (void)
|
||||
{
|
||||
g_s.NUM_EUROS = 1000;
|
||||
}
|
||||
|
||||
void
|
||||
MY_FUNCAO (void)
|
||||
{
|
||||
}
|
||||
|
||||
int NUM_EUROS = 2000;
|
||||
|
||||
static void
|
||||
done ()
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
FUNCAO1 ();
|
||||
done ();
|
||||
FUNCAO2 ();
|
||||
MY_FUNCAO ();
|
||||
|
||||
return 0;
|
||||
}
|
77
gdb/testsuite/gdb.base/utf8-identifiers.exp
Normal file
77
gdb/testsuite/gdb.base/utf8-identifiers.exp
Normal file
|
@ -0,0 +1,77 @@
|
|||
# -*- coding: utf-8 -*- */
|
||||
|
||||
# This testcase is part of GDB, the GNU debugger.
|
||||
|
||||
# Copyright 2017-2018 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Test GDB's support for UTF-8 C/C++ identifiers.
|
||||
|
||||
load_lib completion-support.exp
|
||||
|
||||
standard_testfile
|
||||
|
||||
# Enable basic use of UTF-8. LC_ALL gets reset for each testfile.
|
||||
setenv LC_ALL C.UTF-8
|
||||
|
||||
if { [prepare_for_testing "failed to prepare" ${testfile} [list $srcfile]] } {
|
||||
return -1
|
||||
}
|
||||
|
||||
if ![runto done] {
|
||||
fail "couldn't run to done"
|
||||
return
|
||||
}
|
||||
|
||||
# Test expressions.
|
||||
gdb_test "print g_s.num_€" " = 1000"
|
||||
gdb_test "print num_€" " = 2000"
|
||||
|
||||
# Test linespecs/breakpoints.
|
||||
gdb_test "break função2" "Breakpoint $decimal at .*$srcfile.*"
|
||||
|
||||
set test "info breakpoints"
|
||||
gdb_test_multiple $test $test {
|
||||
-re "in função2 at .*$srcfile.*$gdb_prompt $" {
|
||||
pass $test
|
||||
}
|
||||
}
|
||||
|
||||
gdb_test "continue" \
|
||||
"Breakpoint $decimal, função2 \\(\\) at .*$srcfile.*"
|
||||
|
||||
# Unload symbols from shared libraries to avoid random symbol and file
|
||||
# names getting in the way of completion.
|
||||
gdb_test_no_output "nosharedlibrary"
|
||||
|
||||
# Test linespec completion.
|
||||
|
||||
# A unique completion.
|
||||
test_gdb_complete_unique "break my_fun" "break my_função"
|
||||
|
||||
# A multiple-matches completion:
|
||||
|
||||
# kfailed because gdb/readline display the completion match list like
|
||||
# this, with no separating space:
|
||||
#
|
||||
# (gdb) break função[TAB]
|
||||
# função1função2
|
||||
#
|
||||
# ... which is bogus.
|
||||
setup_kfail "gdb/23211" "*-*-*"
|
||||
test_gdb_complete_multiple "break " "fun" "ção" {"função1" "função2"}
|
||||
|
||||
# Test expression completion.
|
||||
test_gdb_complete_unique "print g_s.num" "print g_s.num_€"
|
Loading…
Add table
Reference in a new issue