libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
The following patch implements the C++23 P2290R3 paper. 2022-08-20 Jakub Jelinek <jakub@redhat.com> PR c++/106645 libcpp/ * include/cpplib.h (struct cpp_options): Implement P2290R3 - Delimited escape sequences. Add delimite_escape_seqs member. * init.cc (struct lang_flags): Likewise. (lang_defaults): Add delim column. (cpp_set_lang): Copy over delimite_escape_seqs. * charset.cc (extend_char_range): New function. (_cpp_valid_ucn): Use it. Handle delimited escape sequences. (convert_hex): Likewise. (convert_oct): Likewise. (convert_ucn): Use extend_char_range. (convert_escape): Call convert_oct even for \o. (_cpp_interpret_identifier): Handle delimited escape sequences. * lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in. (get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to compute num_bytes. gcc/testsuite/ * c-c++-common/cpp/delimited-escape-seq-1.c: New test. * c-c++-common/cpp/delimited-escape-seq-2.c: New test. * c-c++-common/cpp/delimited-escape-seq-3.c: New test. * c-c++-common/Wbidi-chars-24.c: New test. * gcc.dg/cpp/delimited-escape-seq-1.c: New test. * gcc.dg/cpp/delimited-escape-seq-2.c: New test. * g++.dg/cpp/delimited-escape-seq-1.C: New test. * g++.dg/cpp/delimited-escape-seq-2.C: New test.
This commit is contained in:
parent
613e9e16b8
commit
e9dd050e0c
12 changed files with 432 additions and 50 deletions
28
gcc/testsuite/c-c++-common/Wbidi-chars-24.c
Normal file
28
gcc/testsuite/c-c++-common/Wbidi-chars-24.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* PR preprocessor/103026 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-Wbidi-chars=ucn,unpaired" } */
|
||||
/* Test nesting of bidi chars in various contexts. */
|
||||
|
||||
void
|
||||
g1 ()
|
||||
{
|
||||
const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z";
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z";
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z";
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z";
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z";
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z";
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z";
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
}
|
||||
|
||||
int A\u{202a}B\u{2069}C;
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
||||
int a\u{00000202b}B\u{000000002069}c;
|
||||
/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
|
92
gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c
Normal file
92
gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c
Normal file
|
@ -0,0 +1,92 @@
|
|||
/* P2290R3 - Delimited escape sequences */
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target wchar } */
|
||||
/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
|
||||
/* { dg-options "-std=c++23" { target c++ } } */
|
||||
|
||||
#ifndef __cplusplus
|
||||
#include <wchar.h>
|
||||
typedef __CHAR16_TYPE__ char16_t;
|
||||
typedef __CHAR32_TYPE__ char32_t;
|
||||
#endif
|
||||
|
||||
const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}";
|
||||
const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}";
|
||||
const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}";
|
||||
const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
|
||||
const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}";
|
||||
const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}";
|
||||
const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}";
|
||||
const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}";
|
||||
const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}";
|
||||
#ifdef __cplusplus
|
||||
const char *j = "\u{34}\u{000000000000000003D}";
|
||||
#endif
|
||||
const char *k = "\x{34}\x{000000000000000003D}";
|
||||
const char *l = "\o{34}\o{000000000000000176}";
|
||||
|
||||
#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \
|
||||
|| U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \
|
||||
|| U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \
|
||||
|| u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \
|
||||
|| u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \
|
||||
|| u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \
|
||||
|| L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \
|
||||
|| L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \
|
||||
|| L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \
|
||||
|| '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \
|
||||
|| '\o{34}' != '\x1C' || '\o{176}' != '\x007E'
|
||||
#error Bad
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D'
|
||||
#error Bad
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
if (a[0] != U'\u1234' || a[0] != U'\u{1234}'
|
||||
|| a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}'
|
||||
|| a[2] != a[0]
|
||||
|| a[3] != a[1]
|
||||
|| b[0] != U'\x1234' || b[0] != U'\x{001234}'
|
||||
|| b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}'
|
||||
|| b[2] != b[0]
|
||||
|| c[0] != U'\x29c' || c[0] != U'\o{001234}'
|
||||
|| c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}'
|
||||
|| c[2] != c[1])
|
||||
__builtin_abort ();
|
||||
if (d[0] != u'\u1234' || d[0] != u'\u{1234}'
|
||||
|| d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}'
|
||||
|| d[2] != d[0]
|
||||
|| e[0] != u'\x1234' || e[0] != u'\x{001234}'
|
||||
|| e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}'
|
||||
|| e[2] != e[0]
|
||||
|| f[0] != u'\x29c' || f[0] != u'\o{001234}'
|
||||
|| f[1] != u'\xbFFD' || f[1] != u'\o{137775}'
|
||||
|| f[2] != f[1])
|
||||
__builtin_abort ();
|
||||
if (g[0] != L'\u1234' || g[0] != L'\u{1234}'
|
||||
|| g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}'
|
||||
|| g[2] != g[0]
|
||||
|| h[0] != L'\x1234' || h[0] != L'\x{001234}'
|
||||
|| h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}'
|
||||
|| h[2] != h[0]
|
||||
|| i[0] != L'\x29c' || i[0] != L'\o{001234}'
|
||||
|| i[1] != L'\xbFFD' || i[1] != L'\o{137775}'
|
||||
|| i[2] != i[1])
|
||||
__builtin_abort ();
|
||||
#ifdef __cplusplus
|
||||
if (j[0] != '\u0034' || j[0] != '\u{034}'
|
||||
|| j[1] != '\U0000003D' || j[1] != '\u{000003d}')
|
||||
__builtin_abort ();
|
||||
#endif
|
||||
if (k[0] != '\x034' || k[0] != '\x{0034}'
|
||||
|| k[1] != '\x3D' || k[1] != '\x{3d}'
|
||||
|| l[0] != '\x1c' || l[0] != '\o{0034}'
|
||||
|| l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176')
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
18
gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c
Normal file
18
gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* P2290R3 - Delimited escape sequences */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
|
||||
/* { dg-options "-std=c++23" { target c++ } } */
|
||||
|
||||
int jalape\u{f1}o = 42;
|
||||
|
||||
int
|
||||
caf\u{000e9} (void)
|
||||
{
|
||||
return jalape\u00F1o;
|
||||
}
|
||||
|
||||
int
|
||||
test (void)
|
||||
{
|
||||
return caf\u00e9 ();
|
||||
}
|
33
gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c
Normal file
33
gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c
Normal file
|
@ -0,0 +1,33 @@
|
|||
/* P2290R3 - Delimited escape sequences */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target wchar } */
|
||||
/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */
|
||||
/* { dg-options "-std=c++23" { target c++ } } */
|
||||
|
||||
#ifndef __cplusplus
|
||||
typedef __CHAR32_TYPE__ char32_t;
|
||||
#endif
|
||||
|
||||
const char32_t *a = U"\u{}"; /* { dg-error "empty delimited escape sequence" } */
|
||||
/* { dg-error "is not a valid universal character" "" { target c } .-1 } */
|
||||
const char32_t *b = U"\u{12" "34}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *c = U"\u{0000ffffffff}"; /* { dg-error "is not a valid universal character" } */
|
||||
const char32_t *d = U"\u{010000edcb}"; /* { dg-error "is not a valid universal character" } */
|
||||
const char32_t *e = U"\u{02000000000000000000edcb}"; /* { dg-error "is not a valid universal character" } */
|
||||
const char32_t *f = U"\u{123ghij}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *g = U"\u{123.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *h = U"\u{.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *i = U"\x{}"; /* { dg-error "empty delimited escape sequence" } */
|
||||
const char32_t *j = U"\x{12" "34}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *k = U"\x{0000ffffffff}";
|
||||
const char32_t *l = U"\x{010000edcb}"; /* { dg-warning "hex escape sequence out of range" } */
|
||||
const char32_t *m = U"\x{02000000000000000000edcb}"; /* { dg-warning "hex escape sequence out of range" } */
|
||||
const char32_t *n = U"\x{123ghij}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *o = U"\x{123.}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *p = U"\o{}"; /* { dg-error "empty delimited escape sequence" } */
|
||||
const char32_t *q = U"\o{12" "34}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *r = U"\o{0000037777777777}";
|
||||
const char32_t *s = U"\o{040000166713}"; /* { dg-warning "octal escape sequence out of range" } */
|
||||
const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */
|
||||
const char32_t *u = U"\o{1238}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
|
||||
const char32_t *v = U"\o{.}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */
|
8
gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C
Normal file
8
gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C
Normal file
|
@ -0,0 +1,8 @@
|
|||
// P2290R3 - Delimited escape sequences
|
||||
// { dg-do compile { target c++11 } }
|
||||
// { dg-require-effective-target wchar }
|
||||
// { dg-options "-pedantic" }
|
||||
|
||||
const char32_t *a = U"\u{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
|
||||
const char32_t *b = U"\x{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
|
||||
const char32_t *c = U"\o{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } }
|
8
gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C
Normal file
8
gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C
Normal file
|
@ -0,0 +1,8 @@
|
|||
// P2290R3 - Delimited escape sequences
|
||||
// { dg-do compile { target c++11 } }
|
||||
// { dg-require-effective-target wchar }
|
||||
// { dg-options "-pedantic-errors" }
|
||||
|
||||
const char32_t *a = U"\u{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
|
||||
const char32_t *b = U"\x{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
|
||||
const char32_t *c = U"\o{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } }
|
10
gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c
Normal file
10
gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c
Normal file
|
@ -0,0 +1,10 @@
|
|||
/* P2290R3 - Delimited escape sequences */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target wchar } */
|
||||
/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */
|
||||
|
||||
typedef __CHAR32_TYPE__ char32_t;
|
||||
|
||||
const char32_t *a = U"\u{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
|
||||
const char32_t *b = U"\x{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
|
||||
const char32_t *c = U"\o{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */
|
10
gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c
Normal file
10
gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c
Normal file
|
@ -0,0 +1,10 @@
|
|||
/* P2290R3 - Delimited escape sequences */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target wchar } */
|
||||
/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */
|
||||
|
||||
typedef __CHAR32_TYPE__ char32_t;
|
||||
|
||||
const char32_t *a = U"\u{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
|
||||
const char32_t *b = U"\x{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
|
||||
const char32_t *c = U"\o{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */
|
|
@ -1036,6 +1036,19 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Increment char_range->m_finish by a single character. */
|
||||
|
||||
static void
|
||||
extend_char_range (source_range *char_range,
|
||||
cpp_string_location_reader *loc_reader)
|
||||
{
|
||||
if (loc_reader)
|
||||
{
|
||||
gcc_assert (char_range);
|
||||
char_range->m_finish = loc_reader->get_next ().m_finish;
|
||||
}
|
||||
}
|
||||
|
||||
/* [lex.charset]: The character designated by the universal character
|
||||
name \UNNNNNNNN is that character whose character short name in
|
||||
ISO/IEC 10646 is NNNNNNNN; the character designated by the
|
||||
|
@ -1081,6 +1094,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
|||
unsigned int length;
|
||||
const uchar *str = *pstr;
|
||||
const uchar *base = str - 2;
|
||||
bool delimited = false;
|
||||
|
||||
if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
|
||||
cpp_error (pfile, CPP_DL_WARNING,
|
||||
|
@ -1095,7 +1109,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
|||
(int) str[-1]);
|
||||
|
||||
if (str[-1] == 'u')
|
||||
length = 4;
|
||||
{
|
||||
length = 4;
|
||||
if (str < limit && *str == '{')
|
||||
{
|
||||
str++;
|
||||
/* Magic value to indicate no digits seen. */
|
||||
length = 32;
|
||||
delimited = true;
|
||||
extend_char_range (char_range, loc_reader);
|
||||
}
|
||||
}
|
||||
else if (str[-1] == 'U')
|
||||
length = 8;
|
||||
else
|
||||
|
@ -1107,18 +1131,53 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
|||
result = 0;
|
||||
do
|
||||
{
|
||||
if (str == limit)
|
||||
break;
|
||||
c = *str;
|
||||
if (!ISXDIGIT (c))
|
||||
break;
|
||||
str++;
|
||||
if (loc_reader)
|
||||
extend_char_range (char_range, loc_reader);
|
||||
if (delimited)
|
||||
{
|
||||
gcc_assert (char_range);
|
||||
char_range->m_finish = loc_reader->get_next ().m_finish;
|
||||
if (!result)
|
||||
/* Accept arbitrary number of leading zeros.
|
||||
16 is another magic value, smaller than 32 above
|
||||
and bigger than 8, so that upon encountering first
|
||||
non-zero digit we can count 8 digits and after that
|
||||
or in overflow bit and ensure length doesn't decrease
|
||||
to 0, as delimited escape sequence doesn't have upper
|
||||
bound on the number of hex digits. */
|
||||
length = 16;
|
||||
else if (length == 16 - 8)
|
||||
{
|
||||
/* Make sure we detect overflows. */
|
||||
result |= 0x8000000;
|
||||
++length;
|
||||
}
|
||||
}
|
||||
|
||||
result = (result << 4) + hex_value (c);
|
||||
}
|
||||
while (--length && str < limit);
|
||||
while (--length);
|
||||
|
||||
if (delimited
|
||||
&& str < limit
|
||||
&& *str == '}'
|
||||
&& (length != 32 || !identifier_pos))
|
||||
{
|
||||
if (length == 32)
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"empty delimited escape sequence");
|
||||
else if (!CPP_OPTION (pfile, delimited_escape_seqs)
|
||||
&& CPP_OPTION (pfile, cpp_pedantic))
|
||||
cpp_error (pfile, CPP_DL_PEDWARN,
|
||||
"delimited escape sequences are only valid in C++23");
|
||||
str++;
|
||||
length = 0;
|
||||
delimited = false;
|
||||
extend_char_range (char_range, loc_reader);
|
||||
}
|
||||
|
||||
/* Partial UCNs are not valid in strings, but decompose into
|
||||
multiple tokens in identifiers, so we can't give a helpful
|
||||
|
@ -1132,9 +1191,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
|||
*pstr = str;
|
||||
if (length)
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"incomplete universal character name %.*s",
|
||||
(int) (str - base), base);
|
||||
if (!delimited)
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"incomplete universal character name %.*s",
|
||||
(int) (str - base), base);
|
||||
else
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"'\\u{' not terminated with '}' after %.*s",
|
||||
(int) (str - base), base);
|
||||
result = 1;
|
||||
}
|
||||
/* The C99 standard permits $, @ and ` to be specified as UCNs. We use
|
||||
|
@ -1212,9 +1276,8 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
|||
|
||||
from++; /* Skip u/U. */
|
||||
|
||||
if (loc_reader)
|
||||
/* The u/U is part of the spelling of this character. */
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
/* The u/U is part of the spelling of this character. */
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
|
||||
_cpp_valid_ucn (pfile, &from, limit, 0, &nst,
|
||||
&ucn, &char_range, loc_reader);
|
||||
|
@ -1392,6 +1455,8 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
|||
int digits_found = 0;
|
||||
size_t width = cvt.width;
|
||||
size_t mask = width_to_mask (width);
|
||||
bool delimited = false;
|
||||
const uchar *base = from - 1;
|
||||
|
||||
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||
|
@ -1404,8 +1469,14 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
|||
from++;
|
||||
|
||||
/* The 'x' is part of the spelling of this character. */
|
||||
if (loc_reader)
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
|
||||
if (from < limit && *from == '{')
|
||||
{
|
||||
delimited = true;
|
||||
from++;
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
}
|
||||
|
||||
while (from < limit)
|
||||
{
|
||||
|
@ -1413,19 +1484,42 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
|||
if (! hex_p (c))
|
||||
break;
|
||||
from++;
|
||||
if (loc_reader)
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
overflow |= n ^ (n << 4 >> 4);
|
||||
n = (n << 4) + hex_value (c);
|
||||
digits_found = 1;
|
||||
}
|
||||
|
||||
if (delimited && from < limit && *from == '}')
|
||||
{
|
||||
from++;
|
||||
if (!digits_found)
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"empty delimited escape sequence");
|
||||
return from;
|
||||
}
|
||||
else if (!CPP_OPTION (pfile, delimited_escape_seqs)
|
||||
&& CPP_OPTION (pfile, cpp_pedantic))
|
||||
cpp_error (pfile, CPP_DL_PEDWARN,
|
||||
"delimited escape sequences are only valid in C++23");
|
||||
delimited = false;
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
}
|
||||
|
||||
if (!digits_found)
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"\\x used with no following hex digits");
|
||||
return from;
|
||||
}
|
||||
else if (delimited)
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"'\\x{' not terminated with '}' after %.*s",
|
||||
(int) (from - base), base);
|
||||
return from;
|
||||
}
|
||||
|
||||
if (overflow | (n != (n & mask)))
|
||||
{
|
||||
|
@ -1459,25 +1553,71 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
|||
cpp_substring_ranges *ranges)
|
||||
{
|
||||
size_t count = 0;
|
||||
cppchar_t c, n = 0;
|
||||
cppchar_t c, n = 0, overflow = 0;
|
||||
size_t width = cvt.width;
|
||||
size_t mask = width_to_mask (width);
|
||||
bool delimited = false;
|
||||
const uchar *base = from - 1;
|
||||
|
||||
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||
|
||||
if (from < limit && *from == 'o')
|
||||
{
|
||||
from++;
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
if (from == limit || *from != '{')
|
||||
cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'");
|
||||
else
|
||||
{
|
||||
from++;
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
delimited = true;
|
||||
}
|
||||
}
|
||||
|
||||
while (from < limit && count++ < 3)
|
||||
{
|
||||
c = *from;
|
||||
if (c < '0' || c > '7')
|
||||
break;
|
||||
from++;
|
||||
if (loc_reader)
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
if (delimited)
|
||||
{
|
||||
count = 2;
|
||||
overflow |= n ^ (n << 3 >> 3);
|
||||
}
|
||||
n = (n << 3) + c - '0';
|
||||
}
|
||||
|
||||
if (n != (n & mask))
|
||||
if (delimited)
|
||||
{
|
||||
if (from < limit && *from == '}')
|
||||
{
|
||||
from++;
|
||||
if (count == 1)
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"empty delimited escape sequence");
|
||||
return from;
|
||||
}
|
||||
else if (!CPP_OPTION (pfile, delimited_escape_seqs)
|
||||
&& CPP_OPTION (pfile, cpp_pedantic))
|
||||
cpp_error (pfile, CPP_DL_PEDWARN,
|
||||
"delimited escape sequences are only valid in C++23");
|
||||
extend_char_range (&char_range, loc_reader);
|
||||
}
|
||||
else
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_ERROR,
|
||||
"'\\o{' not terminated with '}' after %.*s",
|
||||
(int) (from - base), base);
|
||||
return from;
|
||||
}
|
||||
}
|
||||
|
||||
if (overflow | (n != (n & mask)))
|
||||
{
|
||||
cpp_error (pfile, CPP_DL_PEDWARN,
|
||||
"octal escape sequence out of range");
|
||||
|
@ -1535,6 +1675,7 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
|||
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
case 'o':
|
||||
return convert_oct (pfile, from, limit, tbuf, cvt,
|
||||
char_range, loc_reader, ranges);
|
||||
|
||||
|
@ -2119,15 +2260,27 @@ _cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
|
|||
cppchar_t value = 0;
|
||||
size_t bufleft = len - (bufp - buf);
|
||||
int rval;
|
||||
bool delimited = false;
|
||||
|
||||
idp += 2;
|
||||
if (length == 4 && id[idp] == '{')
|
||||
{
|
||||
delimited = true;
|
||||
idp++;
|
||||
}
|
||||
while (length && idp < len && ISXDIGIT (id[idp]))
|
||||
{
|
||||
value = (value << 4) + hex_value (id[idp]);
|
||||
idp++;
|
||||
length--;
|
||||
if (!delimited)
|
||||
length--;
|
||||
}
|
||||
idp--;
|
||||
if (!delimited)
|
||||
idp--;
|
||||
/* else
|
||||
assert (id[idp] == '}');
|
||||
As the caller ensures it is a valid identifier, if it is
|
||||
delimited escape sequence, it must be terminated by }. */
|
||||
|
||||
/* Special case for EBCDIC: if the identifier contains
|
||||
a '$' specified using a UCN, translate it to EBCDIC. */
|
||||
|
|
|
@ -522,6 +522,9 @@ struct cpp_options
|
|||
/* Nonzero for C++23 size_t literals. */
|
||||
unsigned char size_t_literals;
|
||||
|
||||
/* Nonzero for C++23 delimited escape sequences. */
|
||||
unsigned char delimited_escape_seqs;
|
||||
|
||||
/* Holds the name of the target (execution) character set. */
|
||||
const char *narrow_charset;
|
||||
|
||||
|
|
|
@ -97,34 +97,35 @@ struct lang_flags
|
|||
char size_t_literals;
|
||||
char elifdef;
|
||||
char warning_directive;
|
||||
char delimited_escape_seqs;
|
||||
};
|
||||
|
||||
static const struct lang_flags lang_defaults[] =
|
||||
{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir */
|
||||
/* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
|
||||
/* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
|
||||
/* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
|
||||
/* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
|
||||
/* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1 },
|
||||
/* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1 },
|
||||
/* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
|
||||
/* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
|
||||
/* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 },
|
||||
/* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 },
|
||||
/* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0 },
|
||||
/* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0 },
|
||||
/* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
|
||||
/* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 },
|
||||
/* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
|
||||
/* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
|
||||
/* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0 },
|
||||
/* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0 },
|
||||
/* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
|
||||
{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim */
|
||||
/* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0 },
|
||||
/* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
/* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0 },
|
||||
/* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 },
|
||||
/* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1 },
|
||||
/* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1 },
|
||||
/* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
|
||||
};
|
||||
|
||||
/* Sets internal flags correctly for a given language. */
|
||||
|
@ -155,6 +156,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
|
|||
CPP_OPTION (pfile, size_t_literals) = l->size_t_literals;
|
||||
CPP_OPTION (pfile, elifdef) = l->elifdef;
|
||||
CPP_OPTION (pfile, warning_directive) = l->warning_directive;
|
||||
CPP_OPTION (pfile, delimited_escape_seqs) = l->delimited_escape_seqs;
|
||||
}
|
||||
|
||||
/* Initialize library global state. */
|
||||
|
|
|
@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
|
|||
/* Parse a UCN where P points just past \u or \U and return its bidi code. */
|
||||
|
||||
static bidi::kind
|
||||
get_bidi_ucn_1 (const unsigned char *p, bool is_U)
|
||||
get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
|
||||
{
|
||||
/* 6.4.3 Universal Character Names
|
||||
\u hex-quad
|
||||
\U hex-quad hex-quad
|
||||
\u { simple-hexadecimal-digit-sequence }
|
||||
where \unnnn means \U0000nnnn. */
|
||||
|
||||
*end = p + 4;
|
||||
if (is_U)
|
||||
{
|
||||
if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
|
||||
return bidi::kind::NONE;
|
||||
/* Skip 4B so we can treat \u and \U the same below. */
|
||||
p += 4;
|
||||
*end += 4;
|
||||
}
|
||||
else if (p[0] == '{')
|
||||
{
|
||||
p++;
|
||||
while (*p == '0')
|
||||
p++;
|
||||
if (p[0] != '2'
|
||||
|| p[1] != '0'
|
||||
|| !ISXDIGIT (p[2])
|
||||
|| !ISXDIGIT (p[3])
|
||||
|| p[4] != '}')
|
||||
return bidi::kind::NONE;
|
||||
*end = p + 5;
|
||||
}
|
||||
|
||||
/* All code points we are looking for start with 20xx. */
|
||||
|
@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p, bool is_U)
|
|||
If the kind is not NONE, write the location to *OUT.*/
|
||||
|
||||
static bidi::kind
|
||||
get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
|
||||
get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
|
||||
location_t *out)
|
||||
{
|
||||
bidi::kind result = get_bidi_ucn_1 (p, is_U);
|
||||
const unsigned char *end;
|
||||
bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
|
||||
if (result != bidi::kind::NONE)
|
||||
{
|
||||
const unsigned char *start = p - 2;
|
||||
size_t num_bytes = 2 + (is_U ? 8 : 4);
|
||||
size_t num_bytes = end - start;
|
||||
*out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
|
||||
}
|
||||
return result;
|
||||
|
|
Loading…
Add table
Reference in a new issue