Always use expand_symtabs_matching in ada-lang.c

The previous patch fixed the immediate performance problem with Ada
name matching, by having a subset of matches call
expand_symtabs_matching rather than expand_matching_symbols.  However,
it seemed to me that expand_matching_symbols should not be needed at
all.

To achieve this, this patch changes ada_lookup_name_info::split_name
to use the decoded name, rather than the encoded name.  In order to
make this work correctly, a new decoded form is used: one that does
not decode operators (this is already done) and also does not decode
wide characters.  The latter change is done so that changes to the Ada
source charset don't affect the DWARF index.

With this in place, we can change ada-lang.c to always use
expand_symtabs_matching rather than expand_matching_symbols.
This commit is contained in:
Tom Tromey 2023-11-10 11:58:47 -07:00
parent 47cd8fcf54
commit 957ce53750
4 changed files with 40 additions and 112 deletions

View file

@ -1308,7 +1308,7 @@ convert_from_hex_encoded (std::string &out, const char *str, int n)
/* See ada-lang.h. */ /* See ada-lang.h. */
std::string std::string
ada_decode (const char *encoded, bool wrap, bool operators) ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
{ {
int i; int i;
int len0; int len0;
@ -1502,7 +1502,7 @@ ada_decode (const char *encoded, bool wrap, bool operators)
i++; i++;
} }
if (i < len0 + 3 && encoded[i] == 'U' && isxdigit (encoded[i + 1])) if (wide && i < len0 + 3 && encoded[i] == 'U' && isxdigit (encoded[i + 1]))
{ {
if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2)) if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2))
{ {
@ -1510,7 +1510,7 @@ ada_decode (const char *encoded, bool wrap, bool operators)
continue; continue;
} }
} }
else if (i < len0 + 5 && encoded[i] == 'W' && isxdigit (encoded[i + 1])) else if (wide && i < len0 + 5 && encoded[i] == 'W' && isxdigit (encoded[i + 1]))
{ {
if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4)) if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4))
{ {
@ -1518,7 +1518,7 @@ ada_decode (const char *encoded, bool wrap, bool operators)
continue; continue;
} }
} }
else if (i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W' else if (wide && i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W'
&& isxdigit (encoded[i + 2])) && isxdigit (encoded[i + 2]))
{ {
if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8)) if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8))
@ -5465,91 +5465,6 @@ ada_add_block_renamings (std::vector<struct block_symbol> &result,
return result.size () != defns_mark; return result.size () != defns_mark;
} }
/* Implements compare_names, but only applying the comparision using
the given CASING. */
static int
compare_names_with_case (const char *string1, const char *string2,
enum case_sensitivity casing)
{
while (*string1 != '\0' && *string2 != '\0')
{
char c1, c2;
if (isspace (*string1) || isspace (*string2))
return strcmp_iw_ordered (string1, string2);
if (casing == case_sensitive_off)
{
c1 = tolower (*string1);
c2 = tolower (*string2);
}
else
{
c1 = *string1;
c2 = *string2;
}
if (c1 != c2)
break;
string1 += 1;
string2 += 1;
}
switch (*string1)
{
case '(':
return strcmp_iw_ordered (string1, string2);
case '_':
if (*string2 == '\0')
{
if (is_name_suffix (string1))
return 0;
else
return 1;
}
[[fallthrough]];
default:
if (*string2 == '(')
return strcmp_iw_ordered (string1, string2);
else
{
if (casing == case_sensitive_off)
return tolower (*string1) - tolower (*string2);
else
return *string1 - *string2;
}
}
}
/* Compare STRING1 to STRING2, with results as for strcmp.
Compatible with strcmp_iw_ordered in that...
strcmp_iw_ordered (STRING1, STRING2) <= 0
... implies...
compare_names (STRING1, STRING2) <= 0
(they may differ as to what symbols compare equal). */
static int
compare_names (const char *string1, const char *string2)
{
int result;
/* Similar to what strcmp_iw_ordered does, we need to perform
a case-insensitive comparison first, and only resort to
a second, case-sensitive, comparison if the first one was
not sufficient to differentiate the two strings. */
result = compare_names_with_case (string1, string2, case_sensitive_off);
if (result == 0)
result = compare_names_with_case (string1, string2, case_sensitive_on);
return result;
}
/* Convenience function to get at the Ada encoded lookup name for /* Convenience function to get at the Ada encoded lookup name for
LOOKUP_NAME, as a C string. */ LOOKUP_NAME, as a C string. */
@ -5559,29 +5474,24 @@ ada_lookup_name (const lookup_name_info &lookup_name)
return lookup_name.ada ().lookup_name ().c_str (); return lookup_name.ada ().lookup_name ().c_str ();
} }
/* A helper for add_nonlocal_symbols. Call expand_matching_symbols /* A helper for add_nonlocal_symbols. Expand all necessary symtabs
for OBJFILE, then walk the objfile's symtabs and update the for OBJFILE, then walk the objfile's symtabs and update the
results. */ results. */
static void static void
map_matching_symbols (struct objfile *objfile, map_matching_symbols (struct objfile *objfile,
const lookup_name_info &lookup_name, const lookup_name_info &lookup_name,
bool is_wild_match,
domain_enum domain, domain_enum domain,
int global, int global,
match_data &data) match_data &data)
{ {
data.objfile = objfile; data.objfile = objfile;
if (is_wild_match || lookup_name.ada ().standard_p ()) objfile->expand_symtabs_matching (nullptr, &lookup_name,
objfile->expand_matching_symbols (lookup_name, domain, global, nullptr, nullptr,
is_wild_match ? nullptr : compare_names); global
else ? SEARCH_GLOBAL_BLOCK
objfile->expand_symtabs_matching (nullptr, &lookup_name, : SEARCH_STATIC_BLOCK,
nullptr, nullptr, domain, ALL_DOMAIN);
global
? SEARCH_GLOBAL_BLOCK
: SEARCH_STATIC_BLOCK,
domain, ALL_DOMAIN);
const int block_kind = global ? GLOBAL_BLOCK : STATIC_BLOCK; const int block_kind = global ? GLOBAL_BLOCK : STATIC_BLOCK;
for (compunit_symtab *symtab : objfile->compunits ()) for (compunit_symtab *symtab : objfile->compunits ())
@ -5610,8 +5520,7 @@ add_nonlocal_symbols (std::vector<struct block_symbol> &result,
for (objfile *objfile : current_program_space->objfiles ()) for (objfile *objfile : current_program_space->objfiles ())
{ {
map_matching_symbols (objfile, lookup_name, is_wild_match, domain, map_matching_symbols (objfile, lookup_name, domain, global, data);
global, data);
for (compunit_symtab *cu : objfile->compunits ()) for (compunit_symtab *cu : objfile->compunits ())
{ {
@ -5631,7 +5540,7 @@ add_nonlocal_symbols (std::vector<struct block_symbol> &result,
lookup_name_info name1 (bracket_name, symbol_name_match_type::FULL); lookup_name_info name1 (bracket_name, symbol_name_match_type::FULL);
for (objfile *objfile : current_program_space->objfiles ()) for (objfile *objfile : current_program_space->objfiles ())
map_matching_symbols (objfile, name1, false, domain, global, data); map_matching_symbols (objfile, name1, domain, global, data);
} }
} }
@ -13297,6 +13206,8 @@ ada_lookup_name_info::ada_lookup_name_info (const lookup_name_info &lookup_name)
else else
m_standard_p = false; m_standard_p = false;
m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false, false);
/* If the name contains a ".", then the user is entering a fully /* If the name contains a ".", then the user is entering a fully
qualified entity name, and the match must not be done in wild qualified entity name, and the match must not be done in wild
mode. Similarly, if the user wants to complete what looks mode. Similarly, if the user wants to complete what looks

View file

@ -216,10 +216,18 @@ extern const char *ada_decode_symbol (const struct general_symbol_info *);
the name does not appear to be GNAT-encoded, then the result the name does not appear to be GNAT-encoded, then the result
depends on WRAP. If WRAP is true (the default), then the result is depends on WRAP. If WRAP is true (the default), then the result is
simply wrapped in <...>. If WRAP is false, then the empty string simply wrapped in <...>. If WRAP is false, then the empty string
will be returned. Also, when OPERATORS is false, operator names will be returned.
will not be decoded. */
When OPERATORS is false, operator names will not be decoded. By
default, they are decoded, e.g., 'Oadd' will be transformed to
'"+"'.
When WIDE is false, wide characters will be left as-is. By
default, they converted from their hex encoding to the host
charset. */
extern std::string ada_decode (const char *name, bool wrap = true, extern std::string ada_decode (const char *name, bool wrap = true,
bool operators = true); bool operators = true,
bool wide = true);
extern std::vector<struct block_symbol> ada_lookup_symbol_list extern std::vector<struct block_symbol> ada_lookup_symbol_list
(const char *, const struct block *, domain_enum); (const char *, const struct block *, domain_enum);

View file

@ -263,7 +263,11 @@ gdb::unique_xmalloc_ptr<char>
cooked_index_shard::handle_gnat_encoded_entry (cooked_index_entry *entry, cooked_index_shard::handle_gnat_encoded_entry (cooked_index_entry *entry,
htab_t gnat_entries) htab_t gnat_entries)
{ {
std::string canonical = ada_decode (entry->name, false, false); /* We decode Ada names in a particular way: operators and wide
characters are left as-is. This is done to make name matching a
bit simpler; and for wide characters, it means the choice of Ada
source charset does not affect the indexer directly. */
std::string canonical = ada_decode (entry->name, false, false, false);
if (canonical.empty ()) if (canonical.empty ())
return {}; return {};
std::vector<std::string_view> names = split_name (canonical.c_str (), std::vector<std::string_view> names = split_name (canonical.c_str (),

View file

@ -128,21 +128,26 @@ class ada_lookup_name_info final
peculiarities. */ peculiarities. */
std::vector<std::string_view> split_name () const std::vector<std::string_view> split_name () const
{ {
if (m_verbatim_p || m_standard_p) if (m_verbatim_p)
{ {
/* For verbatim matches, just return the encoded name
as-is. */
std::vector<std::string_view> result; std::vector<std::string_view> result;
if (m_standard_p)
result.emplace_back ("standard");
result.emplace_back (m_encoded_name); result.emplace_back (m_encoded_name);
return result; return result;
} }
return ::split_name (m_encoded_name.c_str (), split_style::UNDERSCORE); /* Otherwise, split the decoded name for matching. */
return ::split_name (m_decoded_name.c_str (), split_style::DOT_STYLE);
} }
private: private:
/* The Ada-encoded lookup name. */ /* The Ada-encoded lookup name. */
std::string m_encoded_name; std::string m_encoded_name;
/* The decoded lookup name. This is formed by calling ada_decode
with both 'operators' and 'wide' set to false. */
std::string m_decoded_name;
/* Whether the user-provided lookup name was Ada encoded. If so, /* Whether the user-provided lookup name was Ada encoded. If so,
then return encoded names in the 'matches' method's 'completion then return encoded names in the 'matches' method's 'completion
match result' output. */ match result' output. */