Handle bit offset and bit size in base types

PR symtab/25470 points out that the Zig programming language allows integers of various bit sizes (including zero), not just sizes that are a multiple of 8. This is supported in DWARF by applying both a byte size and a DW_AT_bit_size. This patch adds support for this feature to integer and boolean types. Other base types are not handled -- for floating-point types, this didn't seem to make sense, and for character types I didn't see much need. (These can be added later if desired.) I've also added support for DW_AT_data_bit_offset at the same time. I don't know whether the Zig compiler requires this, but it was described in the same section in the DWARF standard and was easy to add. A new test case is supplied, using the DWARF assembler. gdb/ChangeLog 2020-09-23 Tom Tromey <tom@tromey.com> PR symtab/25470: * value.c (unpack_long, pack_long, pack_unsigned_long): Handle bit offset and bit size. * printcmd.c (print_scalar_formatted): Handle zero-length integer. (print_scalar_formatted): Use bit_size_differs_p. * gdbtypes.h (enum type_specific_kind) <TYPE_SPECIFIC_INT>: New constant. (union type_specific): <int_stuff>: New member. (struct type) <bit_size_differs_p, bit_size, bit_offset>: New methods. * gdbtypes.c (init_integer_type, init_boolean_type): Initialize TYPE_SPECIFIC_FIELD. (recursive_dump_type, copy_type_recursive): Update. * dwarf2/read.c (read_base_type): Handle DW_AT_bit_size and DW_AT_data_bit_offset. gdb/testsuite/ChangeLog 2020-09-23 Tom Tromey <tom@tromey.com> * gdb.dwarf2/intbits.exp: New file. * gdb.dwarf2/intbits.c: New file.
2020-09-23 09:39:24 -06:00 · 2020-09-23 09:39:24 -06:00 · 20a5fcbd5b
commit 20a5fcbd5b
parent bac51ab78d
9 changed files with 406 additions and 7 deletions
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@ -1,3 +1,22 @@
+2020-09-23  Tom Tromey  <tom@tromey.com>
+
+	PR symtab/25470:
+	* value.c (unpack_long, pack_long, pack_unsigned_long): Handle bit
+	offset and bit size.
+	* printcmd.c (print_scalar_formatted): Handle zero-length
+	integer.
+	(print_scalar_formatted): Use bit_size_differs_p.
+	* gdbtypes.h (enum type_specific_kind) <TYPE_SPECIFIC_INT>: New
+	constant.
+	(union type_specific): <int_stuff>: New member.
+	(struct type) <bit_size_differs_p, bit_size, bit_offset>: New
+	methods.
+	* gdbtypes.c (init_integer_type, init_boolean_type): Initialize
+	TYPE_SPECIFIC_FIELD.
+	(recursive_dump_type, copy_type_recursive): Update.
+	* dwarf2/read.c (read_base_type): Handle DW_AT_bit_size and
+	DW_AT_data_bit_offset.
+
 2020-09-23  Tom Tromey  <tom@tromey.com>

 	* utils.h (class gdb_argv): Add move operators.
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@ -18054,6 +18054,26 @@ read_base_type (struct die_info *die, struct dwarf2_cu *cu)

  type->set_endianity_is_not_default (gdbarch_byte_order (arch) != byte_order);

+  if (TYPE_SPECIFIC_FIELD (type) == TYPE_SPECIFIC_INT)
+    {
+      attr = dwarf2_attr (die, DW_AT_bit_size, cu);
+      if (attr != nullptr && DW_UNSND (attr) <= 8 * TYPE_LENGTH (type))
+	{
+	  unsigned real_bit_size = DW_UNSND (attr);
+	  attr = dwarf2_attr (die, DW_AT_data_bit_offset, cu);
+	  /* Only use the attributes if they make sense together.  */
+	  if (attr == nullptr
+	      || DW_UNSND (attr) + real_bit_size <= 8 * TYPE_LENGTH (type))
+	    {
+	      TYPE_MAIN_TYPE (type)->type_specific.int_stuff.bit_size
+		= real_bit_size;
+	      if (attr != nullptr)
+		TYPE_MAIN_TYPE (type)->type_specific.int_stuff.bit_offset
+		  = DW_UNSND (attr);
+	    }
+	}
+    }
+
  return set_die_type (die, type, cu);
 }

--- a/gdb/gdbtypes.c
+++ b/gdb/gdbtypes.c
@ -3194,6 +3194,10 @@ init_integer_type (struct objfile *objfile,
  if (unsigned_p)
    t->set_is_unsigned (true);

+  TYPE_SPECIFIC_FIELD (t) = TYPE_SPECIFIC_INT;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_size = bit;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_offset = 0;
+
  return t;
 }

@ -3228,6 +3232,10 @@ init_boolean_type (struct objfile *objfile,
  if (unsigned_p)
    t->set_is_unsigned (true);

+  TYPE_SPECIFIC_FIELD (t) = TYPE_SPECIFIC_INT;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_size = bit;
+  TYPE_MAIN_TYPE (t)->type_specific.int_stuff.bit_offset = 0;
+
  return t;
 }

@ -5188,6 +5196,16 @@ recursive_dump_type (struct type *type, int spaces)
 	gdb_print_host_address (TYPE_SELF_TYPE (type), gdb_stdout);
 	puts_filtered ("\n");
 	break;
+
+    case TYPE_SPECIFIC_INT:
+      if (type->bit_size_differs_p ())
+	{
+	  unsigned bit_size = type->bit_size ();
+	  unsigned bit_off = type->bit_offset ();
+	  printfi_filtered (spaces, " bit size = %u, bit offset = %u\n",
+			    bit_size, bit_off);
+	}
+      break;
    }

  if (spaces == 0)
@ -5411,6 +5429,12 @@ copy_type_recursive (struct objfile *objfile,
 			  copy_type_recursive (objfile, TYPE_SELF_TYPE (type),
 					       copied_types));
      break;
+    case TYPE_SPECIFIC_INT:
+      TYPE_SPECIFIC_FIELD (new_type) = TYPE_SPECIFIC_INT;
+      TYPE_MAIN_TYPE (new_type)->type_specific.int_stuff
+	= TYPE_MAIN_TYPE (type)->type_specific.int_stuff;
+      break;
+
    default:
      gdb_assert_not_reached ("bad type_specific_kind");
    }
--- a/gdb/gdbtypes.h
+++ b/gdb/gdbtypes.h
@ -599,7 +599,8 @@ enum type_specific_kind
  TYPE_SPECIFIC_FLOATFORMAT,
  /* Note: This is used by TYPE_CODE_FUNC and TYPE_CODE_METHOD.  */
  TYPE_SPECIFIC_FUNC,
-  TYPE_SPECIFIC_SELF_TYPE
+  TYPE_SPECIFIC_SELF_TYPE,
+  TYPE_SPECIFIC_INT
 };

 union type_owner
@ -764,6 +765,21 @@ union type_specific
     is a member of.  */

  struct type *self_type;
+
+  /* * An integer-like scalar type may be stored in just part of its
+     enclosing storage bytes.  This structure describes this
+     situation.  */
+  struct
+  {
+    /* * The bit size of the integer.  This can be 0.  For integers
+       that fill their storage (the ordinary case), this field holds
+       the byte size times 8.  */
+    unsigned short bit_size;
+    /* * The bit offset of the integer.  This is ordinarily 0, and can
+       only be non-zero if the bit size is less than the storage
+       size.  */
+    unsigned short bit_offset;
+  } int_stuff;
 };

 /* * Main structure representing a type in GDB.
@ -1182,6 +1198,31 @@ struct type
  /* * Remove dynamic property of kind KIND from this type, if it exists.  */
  void remove_dyn_prop (dynamic_prop_node_kind kind);

+  /* * Return true if this is an integer type whose logical (bit) size
+     differs from its storage size; false otherwise.  Always return
+     false for non-integer (i.e., non-TYPE_SPECIFIC_INT) types.  */
+  bool bit_size_differs_p () const
+  {
+    return (main_type->type_specific_field == TYPE_SPECIFIC_INT
+	    && main_type->type_specific.int_stuff.bit_size != 8 * length);
+  }
+
+  /* * Return the logical (bit) size for this integer type.  Only
+     valid for integer (TYPE_SPECIFIC_INT) types.  */
+  unsigned short bit_size () const
+  {
+    gdb_assert (main_type->type_specific_field == TYPE_SPECIFIC_INT);
+    return main_type->type_specific.int_stuff.bit_size;
+  }
+
+  /* * Return the bit offset for this integer type.  Only valid for
+     integer (TYPE_SPECIFIC_INT) types.  */
+  unsigned short bit_offset () const
+  {
+    gdb_assert (main_type->type_specific_field == TYPE_SPECIFIC_INT);
+    return main_type->type_specific.int_stuff.bit_offset;
+  }
+
  /* * Type that is a pointer to this type.
     NULL if no such pointer-to type is known yet.
     The debugger may add the address of such a type
--- a/gdb/printcmd.c
+++ b/gdb/printcmd.c
@ -374,6 +374,15 @@ print_scalar_formatted (const gdb_byte *valaddr, struct type *type,
 	valaddr += TYPE_LENGTH (type) - len;
    }

+  /* Allow LEN == 0, and in this case, don't assume that VALADDR is
+     valid.  */
+  const gdb_byte zero = 0;
+  if (len == 0)
+    {
+      len = 1;
+      valaddr = &zero;
+    }
+
  if (size != 0 && (options->format == 'x' || options->format == 't'))
    {
      /* Truncate to fit.  */
@ -404,8 +413,8 @@ print_scalar_formatted (const gdb_byte *valaddr, struct type *type,
     long, and then printing the long.  PR cli/16242 suggests changing
     this to using C-style hex float format.

-     Biased range types must also be unbiased here; the unbiasing is
-     done by unpack_long.  */
+     Biased range types and sub-word scalar types must also be handled
+     here; the value is correctly computed by unpack_long.  */
  gdb::byte_vector converted_bytes;
  /* Some cases below will unpack the value again.  In the biased
     range case, we want to avoid this, so we store the unpacked value
@ -418,7 +427,8 @@ print_scalar_formatted (const gdb_byte *valaddr, struct type *type,
 	   || options->format == 'z'
 	   || options->format == 'd'
 	   || options->format == 'u'))
-      || (type->code () == TYPE_CODE_RANGE && type->bounds ()->bias != 0))
+      || (type->code () == TYPE_CODE_RANGE && type->bounds ()->bias != 0)
+      || type->bit_size_differs_p ())
    {
      val_long.emplace (unpack_long (type, valaddr));
      converted_bytes.resize (TYPE_LENGTH (type));
--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@ -1,3 +1,8 @@
+2020-09-23  Tom Tromey  <tom@tromey.com>
+
+	* gdb.dwarf2/intbits.exp: New file.
+	* gdb.dwarf2/intbits.c: New file.
+
 2020-09-20  Pedro Alves  <pedro@palves.net>

 	* lib/mi-support.exp (mi_uncatched_gdb_exit) Switch to the main
--- a/gdb/testsuite/gdb.dwarf2/intbits.c
+++ b/gdb/testsuite/gdb.dwarf2/intbits.c
@ -0,0 +1,48 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* A 16 bit slot holding a 7-bit value of -1.  Note that, for all
+   these values, we explicitly set the endian-ness in the DWARF to
+   avoid issues.  */
+unsigned char i16_m1[2] = { 0x7f, 0 };
+
+/* A 16 bit slot holding a 1-bit value of 1 at offset 2.  */
+unsigned char u16_1[2] = { 0x4, 0 };
+
+/* A 32 bit slot holding a 17-bit value of -2.  */
+unsigned char u32_m2[4] = { 0xfe, 0xff, 0x01, 0 };
+
+/* A 32 bit slot holding a 31 bit value of 1.  The high bit should be
+   ignored when reading.  */
+unsigned char u32_1[4] = { 1, 0, 0, 0x80 };
+
+/* A 32 bit slot holding a 31 bit value of 1, offset by 1 bit.  */
+unsigned char u32_1_off[4] = { 2, 0, 0, 0 };
+
+/* A 32 bit slot holding a 30 bit value of 1, offset by 1 bit.
+   Big-endian.  */
+unsigned char be30_1_off[4] = { 0x80, 0, 0, 2 };
+
+/* A 32 bit slot holding a 0 bit value.  We don't use 0 in the array
+   here, to catch any situation where gdb tries to use the memory.  */
+unsigned char u32_0[4] = { 0xff, 0xff, 0xff, 0xff };
+
+int
+main (void)
+{
+  return 0;
+}
--- a/gdb/testsuite/gdb.dwarf2/intbits.exp
+++ b/gdb/testsuite/gdb.dwarf2/intbits.exp
@ -0,0 +1,201 @@
+# Copyright 2020 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test support for non-byte-sized integer base types.
+
+load_lib dwarf.exp
+
+# This test can only be run on targets which support DWARF-2 and use gas.
+if {![dwarf2_support]} {
+    return 0
+}
+
+standard_testfile .c .S
+
+set executable ${testfile}
+set asm_file [standard_output_file ${srcfile2}]
+
+if [prepare_for_testing "failed to prepare" ${testfile} ${srcfile}] {
+    return -1
+}
+
+# Create the DWARF.
+Dwarf::assemble ${asm_file} {
+    cu {} {
+	DW_TAG_compile_unit {
+	    {DW_AT_language @DW_LANG_C_plus_plus}
+	} {
+	    declare_labels i7_type u1_type u17_type u31_type \
+		u31_1_type u32_0_type u0_0_type be30_1_type
+
+	    i7_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_signed}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "i7"}
+		{DW_AT_byte_size 2 DW_FORM_udata}
+		{DW_AT_bit_size 7 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_i16_m1"}
+		{DW_AT_type :${i7_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "i16_m1"]}
+		    SPECIAL_expr}
+	    }
+
+	    u1_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_boolean}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u1"}
+		{DW_AT_byte_size 2 DW_FORM_udata}
+		{DW_AT_bit_size 1 DW_FORM_udata}
+		{DW_AT_data_bit_offset 2 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u16_1"}
+		{DW_AT_type :${u1_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u16_1"]}
+		    SPECIAL_expr}
+	    }
+
+	    u17_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_signed}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u17"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 17 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_m2"}
+		{DW_AT_type :${u17_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_m2"]}
+		    SPECIAL_expr}
+	    }
+
+	    u31_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u31"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 31 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_1"}
+		{DW_AT_type :${u31_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_1"]}
+		    SPECIAL_expr}
+	    }
+
+	    u31_1_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u31_1"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 31 DW_FORM_udata}
+		{DW_AT_data_bit_offset 1 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_1_off"}
+		{DW_AT_type :${u31_1_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_1_off"]}
+		    SPECIAL_expr}
+	    }
+
+	    be30_1_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_big}
+		{DW_AT_name "be30_1"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 30 DW_FORM_udata}
+		{DW_AT_data_bit_offset 1 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_be30_1_off"}
+		{DW_AT_type :${be30_1_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "be30_1_off"]}
+		    SPECIAL_expr}
+	    }
+
+	    u32_0_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u32_0"}
+		{DW_AT_byte_size 4 DW_FORM_udata}
+		{DW_AT_bit_size 0 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u32_0"}
+		{DW_AT_type :${u32_0_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_0"]}
+		    SPECIAL_expr}
+	    }
+
+	    u0_0_type: DW_TAG_base_type {
+		{DW_AT_encoding @DW_ATE_unsigned}
+		{DW_AT_endianity @DW_END_little}
+		{DW_AT_name "u0_0"}
+		{DW_AT_byte_size 0 DW_FORM_udata}
+	    }
+
+	    DW_TAG_variable {
+		{DW_AT_name "v_u0_0"}
+		{DW_AT_type :${u0_0_type}}
+		{DW_AT_external 1 DW_FORM_flag}
+		{DW_AT_location {DW_OP_addr [gdb_target_symbol "u32_0"]}
+		    SPECIAL_expr}
+	    }
+	}
+    }
+}
+
+if {[prepare_for_testing "failed to prepare" ${executable} \
+	 [list ${asm_file} ${srcfile}] {}]} {
+    return -1
+}
+
+if {![runto_main]} {
+    return -1
+}
+
+gdb_test "print v_i16_m1" "= -1"
+gdb_test "print v_u16_1" "= true"
+gdb_test "print v_u32_m2" "= -2"
+gdb_test "print v_u32_1" "= 1"
+gdb_test "print v_u32_0" "= 0"
+gdb_test "print v_u0_0" "= 0"
+
+gdb_test "print v_i16_m1 = 7" "= 7"
+gdb_test "print v_i16_m1" "= 7" "print v_i16_m1 after assignment"
+
+gdb_test "print v_u32_1_off" "= 1"
+gdb_test "print v_u32_1_off = 7" " = 7"
+gdb_test "x/4xb &v_u32_1_off" ":\t0x0e\t0x00\t0x00\t0x00"
+
+gdb_test "print v_be30_1_off" "= 1"
+gdb_test "print v_be30_1_off = 7" " = 7"
+gdb_test "x/4xb &v_be30_1_off" ":\t0x00\t0x00\t0x00\t0x0e"
--- a/gdb/value.c
+++ b/gdb/value.c
@ -2776,10 +2776,27 @@ unpack_long (struct type *type, const gdb_byte *valaddr)
    case TYPE_CODE_MEMBERPTR:
      {
 	LONGEST result;
+
+	if (type->bit_size_differs_p ())
+	  {
+	    unsigned bit_off = type->bit_offset ();
+	    unsigned bit_size = type->bit_size ();
+	    if (bit_size == 0)
+	      {
+		/* unpack_bits_as_long doesn't handle this case the
+		   way we'd like, so handle it here.  */
+		result = 0;
+	      }
+	    else
+	      result = unpack_bits_as_long (type, valaddr, bit_off, bit_size);
+	  }
+	else
+	  {
 	    if (nosign)
 	      result = extract_unsigned_integer (valaddr, len, byte_order);
 	    else
 	      result = extract_signed_integer (valaddr, len, byte_order);
+	  }
 	if (code == TYPE_CODE_RANGE)
 	  result += type->bounds ()->bias;
 	return result;
@ -3339,6 +3356,13 @@ pack_long (gdb_byte *buf, struct type *type, LONGEST num)
    case TYPE_CODE_FLAGS:
    case TYPE_CODE_BOOL:
    case TYPE_CODE_MEMBERPTR:
+      if (type->bit_size_differs_p ())
+	{
+	  unsigned bit_off = type->bit_offset ();
+	  unsigned bit_size = type->bit_size ();
+	  num &= ((ULONGEST) 1 << bit_size) - 1;
+	  num <<= bit_off;
+	}
      store_signed_integer (buf, len, byte_order, num);
      break;

@ -3381,6 +3405,13 @@ pack_unsigned_long (gdb_byte *buf, struct type *type, ULONGEST num)
    case TYPE_CODE_BOOL:
    case TYPE_CODE_RANGE:
    case TYPE_CODE_MEMBERPTR:
+      if (type->bit_size_differs_p ())
+	{
+	  unsigned bit_off = type->bit_offset ();
+	  unsigned bit_size = type->bit_size ();
+	  num &= ((ULONGEST) 1 << bit_size) - 1;
+	  num <<= bit_off;
+	}
      store_unsigned_integer (buf, len, byte_order, num);
      break;