libctf: add a deduplicator-specific type mapping table

When CTF linking is done, the linker has to track the association
between types in the inputs and types in the outputs.  The deduplicator
does this via the cd_output_emission_hashes, which maps from hashes of
types (valid in both the input and output) to the IDs of types in the
specific dict in which the cd_emission_hashes is held.  However, the
nondeduplicating linker and ctf_add_type used a different mechanism, a
dedicated hashtab stored in the ctf_link_type_mapping, populated via
ctf_add_type_mapping and queried via the ctf_type_mapping function.  To
allow the same functions to be used for variable and symbol population
in both the deduplicating and nondeduplicating linker, the deduplicator
carefully transferred all its input->output mappings into this hashtab
before returning.

This is *expensive*. The number of entries in this hashtab scales as the
number of input types, and unlike the hashing machinery the type mapping
machinery (the only other thing which scales that way) has not been much
optimized.

Now the nondeduplicating linker is gone, we can throw this out, move
the existing type mapping machinery to ctf-create.c and dedicate it to
ctf_add_type alone, and add a new function ctf_dedup_type_mapping which
uses the deduplicator's built-in knowledge of type mappings directly,
without requiring an expensive repopulation phase.

This speeds up a test link of nouveau.ko (a good worst-case candidate
with a lot of types in each of a lot of input files) from 9.11s to 7.15s
in my testing, a speedup of over 20%.

libctf/ChangeLog
2021-03-02  Nick Alcock  <nick.alcock@oracle.com>

	* ctf-impl.h (ctf_dict_t) <ctf_link_type_mapping>: No longer used
	by the nondeduplicating linker.
	(ctf_add_type_mapping): Removed, now static.
	(ctf_type_mapping): Likewise.
	(ctf_dedup_type_mapping): New.
	(ctf_dedup_t) <cd_input_nums>: New.
	* ctf-dedup.c (ctf_dedup_init): Populate it.
	(ctf_dedup_fini): Free it again.  Emphasise that this has to be
	the last thing called.
	(ctf_dedup): Populate it.
	(ctf_dedup_populate_type_mapping): Removed.
	(ctf_dedup_populate_type_mappings): Likewise.
	(ctf_dedup_emit): No longer call it.  No longer call
	ctf_dedup_fini either.
	(ctf_dedup_type_mapping): New.
	* ctf-link.c (ctf_unnamed_cuname): New.
	(ctf_create_per_cu): Arguments must be non-null now.
	(ctf_in_member_cb_arg): Removed.
	(ctf_link): No longer populate it.  No longer discard the
	mapping table.
	(ctf_link_deduplicating_one_symtypetab): Use
	ctf_dedup_type_mapping, not ctf_type_mapping.  Use
	ctf_unnamed_cuname.
	(ctf_link_one_variable): Likewise.  Pass in args individually: no
	longer a ctf_variable_iter callback.
	(empty_link_type_mapping): Removed.
	(ctf_link_deduplicating_variables): Use ctf_variable_next, not
	ctf_variable_iter.  No longer pack arguments to
	ctf_link_one_variable into a struct.
	(ctf_link_deduplicating_per_cu): Call ctf_dedup_fini once
	all link phases are done.
	(ctf_link_deduplicating): Likewise.
	(ctf_link_intern_extern_string): Improve comment.
	(ctf_add_type_mapping): Migrate...
	(ctf_type_mapping): ... these functions...
	* ctf-create.c (ctf_add_type_mapping): ... here...
	(ctf_type_mapping): ... and make static, for the sole use of
	ctf_add_type.
This commit is contained in:
Nick Alcock 2021-03-02 15:10:05 +00:00
parent 478c04a55e
commit f5060e5633
5 changed files with 344 additions and 328 deletions

View file

@ -1642,6 +1642,12 @@ ctf_dedup_init (ctf_dict_t *fp)
goto oom;
#endif
if ((d->cd_input_nums
= ctf_dynhash_create (ctf_hash_integer,
ctf_hash_eq_integer,
NULL, NULL)) == NULL)
goto oom;
if ((d->cd_emission_struct_members
= ctf_dynhash_create (ctf_hash_integer,
ctf_hash_eq_integer,
@ -1661,6 +1667,8 @@ ctf_dedup_init (ctf_dict_t *fp)
return ctf_set_errno (fp, ENOMEM);
}
/* No ctf_dedup calls are allowed after this call other than starting a new
deduplication via ctf_dedup (not even ctf_dedup_type_mapping lookups). */
void
ctf_dedup_fini (ctf_dict_t *fp, ctf_dict_t **outputs, uint32_t noutputs)
{
@ -1682,6 +1690,7 @@ ctf_dedup_fini (ctf_dict_t *fp, ctf_dict_t **outputs, uint32_t noutputs)
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
ctf_dynhash_destroy (d->cd_output_mapping_guard);
#endif
ctf_dynhash_destroy (d->cd_input_nums);
ctf_dynhash_destroy (d->cd_emission_struct_members);
ctf_dynset_destroy (d->cd_conflicting_types);
@ -1876,12 +1885,22 @@ ctf_dedup (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs,
size_t i;
ctf_next_t *it = NULL;
for (i = 0; i < ninputs; i++)
ctf_dprintf ("Input %i: %s\n", (int) i, ctf_link_input_name (inputs[i]));
if (ctf_dedup_init (output) < 0)
return -1; /* errno is set for us. */
for (i = 0; i < ninputs; i++)
{
ctf_dprintf ("Input %i: %s\n", (int) i, ctf_link_input_name (inputs[i]));
if (ctf_dynhash_insert (d->cd_input_nums, inputs[i],
(void *) (uintptr_t) i) < 0)
{
ctf_set_errno (output, errno);
ctf_err_warn (output, 0, errno, _("ctf_dedup: cannot initialize: %s\n"),
ctf_errmsg (errno));
goto err;
}
}
/* Some flags do not apply when CU-mapping: this is not a duplicated link,
because there is only one output and we really don't want to end up marking
all nonconflicting but appears-only-once types as conflicting (which in the
@ -1937,6 +1956,10 @@ ctf_dedup (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs,
return -1; /* errno is set for us. */
}
return 0;
err:
ctf_dedup_fini (output, NULL, 0);
return -1;
}
static int
@ -3003,100 +3026,6 @@ ctf_dedup_emit_struct_members (ctf_dict_t *output, ctf_dict_t **inputs,
return ctf_set_errno (output, err);
}
/* Populate the type mapping used by the types in one FP (which must be an input
dict containing a non-null cd_output resulting from a ctf_dedup_emit_type
walk). */
static int
ctf_dedup_populate_type_mapping (ctf_dict_t *shared, ctf_dict_t *fp,
ctf_dict_t **inputs)
{
ctf_dedup_t *d = &shared->ctf_dedup;
ctf_dict_t *output = fp->ctf_dedup.cd_output;
const void *k, *v;
ctf_next_t *i = NULL;
int err;
/* The shared dict (the output) stores its types in the fp itself, not in a
separate cd_output dict. */
if (shared == fp)
output = fp;
/* There may be no types to emit at all, or all the types in this TU may be
shared. */
if (!output || !output->ctf_dedup.cd_output_emission_hashes)
return 0;
while ((err = ctf_dynhash_cnext (output->ctf_dedup.cd_output_emission_hashes,
&i, &k, &v)) == 0)
{
const char *hval = (const char *) k;
ctf_id_t id_out = (ctf_id_t) (uintptr_t) v;
ctf_next_t *j = NULL;
ctf_dynset_t *type_ids;
const void *id;
type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval);
if (!ctf_assert (shared, type_ids))
return -1;
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
ctf_dprintf ("Traversing emission hash: hval %s\n", hval);
#endif
while ((err = ctf_dynset_cnext (type_ids, &j, &id)) == 0)
{
ctf_dict_t *input = inputs[CTF_DEDUP_GID_TO_INPUT (id)];
ctf_id_t id_in = CTF_DEDUP_GID_TO_TYPE (id);
#ifdef ENABLE_LIBCTF_HASH_DEBUGGING
ctf_dprintf ("Adding mapping from %i/%lx to %lx\n",
CTF_DEDUP_GID_TO_INPUT (id), id_in, id_out);
#endif
ctf_add_type_mapping (input, id_in, output, id_out);
}
if (err != ECTF_NEXT_END)
{
ctf_next_destroy (i);
goto err;
}
}
if (err != ECTF_NEXT_END)
goto err;
return 0;
err:
ctf_err_warn (shared, 0, err, _("iteration error populating the type mapping"));
return ctf_set_errno (shared, err);
}
/* Populate the type mapping machinery used by the rest of the linker,
by ctf_add_type, etc. */
static int
ctf_dedup_populate_type_mappings (ctf_dict_t *output, ctf_dict_t **inputs,
uint32_t ninputs)
{
size_t i;
if (ctf_dedup_populate_type_mapping (output, output, inputs) < 0)
{
ctf_err_warn (output, 0, 0, _("cannot populate type mappings for shared "
"CTF dict"));
return -1; /* errno is set for us. */
}
for (i = 0; i < ninputs; i++)
{
if (ctf_dedup_populate_type_mapping (output, inputs[i], inputs) < 0)
{
ctf_err_warn (output, 0, ctf_errno (inputs[i]),
_("cannot populate type mappings for per-CU CTF dict"));
return ctf_set_errno (output, ctf_errno (inputs[i]));
}
}
return 0;
}
/* Emit deduplicated types into the outputs. The shared type repository is
OUTPUT, on which the ctf_dedup function must have already been called. The
PARENTS array contains the INPUTS index of the parent dict for every child
@ -3127,9 +3056,6 @@ ctf_dedup_emit (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs,
if (ctf_dedup_emit_struct_members (output, inputs, ninputs, parents) < 0)
return NULL; /* errno is set for us. */
if (ctf_dedup_populate_type_mappings (output, inputs, ninputs) < 0)
return NULL; /* errno is set for us. */
for (i = 0; i < ninputs; i++)
{
if (inputs[i]->ctf_dedup.cd_output)
@ -3163,6 +3089,76 @@ ctf_dedup_emit (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs,
}
}
ctf_dedup_fini (output, outputs, num_outputs);
return outputs;
}
/* Determine what type SRC_FP / SRC_TYPE was emitted as in the FP, which
must be the shared dict or have it as a parent: return 0 if none. The SRC_FP
must be a past input to ctf_dedup. */
ctf_id_t
ctf_dedup_type_mapping (ctf_dict_t *fp, ctf_dict_t *src_fp, ctf_id_t src_type)
{
ctf_dict_t *output = NULL;
ctf_dedup_t *d;
int input_num;
void *num_ptr;
void *type_ptr;
int found;
const char *hval;
/* It is an error (an internal error in the caller, in ctf-link.c) to call
this with an FP that is not a per-CU output or shared output dict, or with
a SRC_FP that was not passed to ctf_dedup as an input; it is an internal
error in ctf-dedup for the type passed not to have been hashed, though if
the src_fp is a child dict and the type is not a child type, it will have
been hashed under the GID corresponding to the parent. */
if (fp->ctf_dedup.cd_type_hashes != NULL)
output = fp;
else if (fp->ctf_parent && fp->ctf_parent->ctf_dedup.cd_type_hashes != NULL)
output = fp->ctf_parent;
else
{
ctf_set_errno (fp, ECTF_INTERNAL);
ctf_err_warn (fp, 0, ECTF_INTERNAL,
_("dict %p passed to ctf_dedup_type_mapping is not a "
"deduplicated output"), (void *) fp);
return CTF_ERR;
}
if (src_fp->ctf_parent && ctf_type_isparent (src_fp, src_type))
src_fp = src_fp->ctf_parent;
d = &output->ctf_dedup;
found = ctf_dynhash_lookup_kv (d->cd_input_nums, src_fp, NULL, &num_ptr);
if (!ctf_assert (output, found != 0))
return CTF_ERR; /* errno is set for us. */
input_num = (uintptr_t) num_ptr;
hval = ctf_dynhash_lookup (d->cd_type_hashes,
CTF_DEDUP_GID (output, input_num, src_type));
if (!ctf_assert (output, hval != NULL))
return CTF_ERR; /* errno is set for us. */
/* The emission hashes may be unset if this dict was created after
deduplication to house variables or other things that would conflict if
stored in the shared dict. */
if (fp->ctf_dedup.cd_output_emission_hashes)
if (ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_output_emission_hashes, hval,
NULL, &type_ptr))
return (ctf_id_t) (uintptr_t) type_ptr;
if (fp->ctf_parent)
{
ctf_dict_t *pfp = fp->ctf_parent;
if (pfp->ctf_dedup.cd_output_emission_hashes)
if (ctf_dynhash_lookup_kv (pfp->ctf_dedup.cd_output_emission_hashes,
hval, NULL, &type_ptr))
return (ctf_id_t) (uintptr_t) type_ptr;
}
return 0;
}