From 00a81b8b9daf54a09bf535734944d740bfa4ed4b Mon Sep 17 00:00:00 2001
From: Jason Merrill <jason@redhat.com>
Date: Mon, 29 Mar 2010 16:07:29 -0400
Subject: [PATCH] More N3077 raw string changes

	More N3077 raw string changes
	* charset.c (cpp_interpret_string): Don't transform UCNs in raw
	strings.
	* lex.c (bufring_append): Split out from...
	(lex_raw_string): ...here.  Undo trigraph and line splicing
	transformations.  Do process line notes in multi-line literals.
	(_cpp_process_line_notes): Ignore notes that were already handled.

From-SVN: r157804
---
 gcc/testsuite/ChangeLog                    |   8 +
 gcc/testsuite/c-c++-common/raw-string-1.c  |  58 ++++++--
 gcc/testsuite/c-c++-common/raw-string-10.c |  20 +++
 gcc/testsuite/c-c++-common/raw-string-2.c  |  14 +-
 gcc/testsuite/c-c++-common/raw-string-8.c  |  10 ++
 gcc/testsuite/c-c++-common/raw-string-9.c  |  19 +++
 libcpp/ChangeLog                           |   8 +
 libcpp/charset.c                           |  21 +--
 libcpp/internal.h                          |   3 +-
 libcpp/lex.c                               | 161 +++++++++++++++++----
 10 files changed, 262 insertions(+), 60 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/raw-string-10.c
 create mode 100644 gcc/testsuite/c-c++-common/raw-string-8.c
 create mode 100644 gcc/testsuite/c-c++-common/raw-string-9.c

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e4d15c0c22e..8e491952cd5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,13 @@
 2010-03-29  Jason Merrill  <jason@redhat.com>
 
+	N3077
+	* c-c++-common/raw-string-1.c: Update handling of trigraphs, line
+	splicing and UCNs.
+	* c-c++-common/raw-string-2.c: Add trigraph test.
+	* c-c++-common/raw-string-8.c: New.
+	* c-c++-common/raw-string-9.c: New.
+	* c-c++-common/raw-string-10.c: New.
+
 	* c-c++-common/raw-string-1.c: Combine C and C++ raw string tests.
 	* c-c++-common/raw-string-2.c: Combine C and C++ raw string tests.
 	* c-c++-common/raw-string-3.c: Combine C and C++ raw string tests.
diff --git a/gcc/testsuite/c-c++-common/raw-string-1.c b/gcc/testsuite/c-c++-common/raw-string-1.c
index b790a317738..199a3c6c83f 100644
--- a/gcc/testsuite/c-c++-common/raw-string-1.c
+++ b/gcc/testsuite/c-c++-common/raw-string-1.c
@@ -1,6 +1,6 @@
 // { dg-do run }
 // { dg-require-effective-target wchar }
-// { dg-options "-std=gnu99 -Wno-c++-compat" { target c } }
+// { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
 // { dg-options "-std=c++0x" { target c++ } }
 
 #ifndef __cplusplus
@@ -13,57 +13,78 @@ typedef __CHAR32_TYPE__ char32_t;
 const char s0[] = R"(a\
 \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
 c)";
-const char s1[] = "a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char s1[] = "a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
 const char s2[] = R"*|*(a\
 b
 c)"
 c)*|"
 c)*|*";
-const char s3[] = "ab\nc)\"\nc)*|\"\nc";
+const char s3[] = "a\\\nb\nc)\"\nc)*|\"\nc";
+// The ) in ??) below is part of the raw string suffix )".
+const char s4[] = R"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char s5[] = "?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
 
 const char t0[] = u8R"(a\
 \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
 c)";
-const char t1[] = u8"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char t1[] = u8"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
 const char t2[] = u8R"*|*(a\
 b
 c)"
 c)*|"
 c)*|*";
-const char t3[] = u8"ab\nc)\"\nc)*|\"\nc";
+const char t3[] = u8"a\\\nb\nc)\"\nc)*|\"\nc";
+const char t4[] = u8R"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char t5[] = u8"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
 
 const char16_t u0[] = uR"(a\
 \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
 c)";
-const char16_t u1[] = u"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char16_t u1[] = u"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
 const char16_t u2[] = uR"*|*(a\
 b
 c)"
 c)*|"
 c)*|*";
-const char16_t u3[] = u"ab\nc)\"\nc)*|\"\nc";
+const char16_t u3[] = u"a\\\nb\nc)\"\nc)*|\"\nc";
+const char16_t u4[] = uR"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char16_t u5[] = u"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
 
 const char32_t U0[] = UR"(a\
 \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
 c)";
-const char32_t U1[] = U"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char32_t U1[] = U"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
 const char32_t U2[] = UR"*|*(a\
 b
 c)"
 c)*|"
 c)*|*";
-const char32_t U3[] = U"ab\nc)\"\nc)*|\"\nc";
+const char32_t U3[] = U"a\\\nb\nc)\"\nc)*|\"\nc";
+const char32_t U4[] = UR"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char32_t U5[] = U"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
 
 const wchar_t L0[] = LR"(a\
 \u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
 c)";
-const wchar_t L1[] = L"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const wchar_t L1[] = L"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
 const wchar_t L2[] = LR"*|*(a\
 b
 c)"
 c)*|"
 c)*|*";
-const wchar_t L3[] = L"ab\nc)\"\nc)*|\"\nc";
+const wchar_t L3[] = L"a\\\nb\nc)\"\nc)*|\"\nc";
+const wchar_t L4[] = LR"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const wchar_t L5[] = L"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
 
 int
 main (void)
@@ -74,30 +95,45 @@ main (void)
   if (sizeof (s2) != sizeof (s3)
       || __builtin_memcmp (s2, s3, sizeof (s2)) != 0)
     __builtin_abort ();
+  if (sizeof (s4) != sizeof (s5)
+      || __builtin_memcmp (s4, s5, sizeof (s4)) != 0)
+    __builtin_abort ();
   if (sizeof (t0) != sizeof (t1)
       || __builtin_memcmp (t0, t1, sizeof (t0)) != 0)
     __builtin_abort ();
   if (sizeof (t2) != sizeof (t3)
       || __builtin_memcmp (t2, t3, sizeof (t2)) != 0)
     __builtin_abort ();
+  if (sizeof (t4) != sizeof (t5)
+      || __builtin_memcmp (t4, t5, sizeof (t4)) != 0)
+    __builtin_abort ();
   if (sizeof (u0) != sizeof (u1)
       || __builtin_memcmp (u0, u1, sizeof (u0)) != 0)
     __builtin_abort ();
   if (sizeof (u2) != sizeof (u3)
       || __builtin_memcmp (u2, u3, sizeof (u2)) != 0)
     __builtin_abort ();
+  if (sizeof (u4) != sizeof (u5)
+      || __builtin_memcmp (u4, u5, sizeof (u4)) != 0)
+    __builtin_abort ();
   if (sizeof (U0) != sizeof (U1)
       || __builtin_memcmp (U0, U1, sizeof (U0)) != 0)
     __builtin_abort ();
   if (sizeof (U2) != sizeof (U3)
       || __builtin_memcmp (U2, U3, sizeof (U2)) != 0)
     __builtin_abort ();
+  if (sizeof (U4) != sizeof (U5)
+      || __builtin_memcmp (U4, U5, sizeof (U4)) != 0)
+    __builtin_abort ();
   if (sizeof (L0) != sizeof (L1)
       || __builtin_memcmp (L0, L1, sizeof (L0)) != 0)
     __builtin_abort ();
   if (sizeof (L2) != sizeof (L3)
       || __builtin_memcmp (L2, L3, sizeof (L2)) != 0)
     __builtin_abort ();
+  if (sizeof (L4) != sizeof (L5)
+      || __builtin_memcmp (L4, L5, sizeof (L4)) != 0)
+    __builtin_abort ();
   if (sizeof (R"*()*") != 1
       || __builtin_memcmp (R"*()*", "", 1) != 0)
     __builtin_abort ();
diff --git a/gcc/testsuite/c-c++-common/raw-string-10.c b/gcc/testsuite/c-c++-common/raw-string-10.c
new file mode 100644
index 00000000000..76ed34e60c3
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/raw-string-10.c
@@ -0,0 +1,20 @@
+// Test that we don't revert trigraphs and line splicing when a raw string
+// literal is formed by token pasting.
+// { dg-options "-std=gnu99 -trigraphs" { target c } }
+// { dg-options "-std=c++0x" { target c++ } }
+// { dg-do run }
+
+#define PASTE(X,Y) X##Y
+
+const char a[] = PASTE(R,"(??>\
+)");
+
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+
+int main()
+{
+  TEST (a, "}");
+}
diff --git a/gcc/testsuite/c-c++-common/raw-string-2.c b/gcc/testsuite/c-c++-common/raw-string-2.c
index 503bcf5d517..e296a080504 100644
--- a/gcc/testsuite/c-c++-common/raw-string-2.c
+++ b/gcc/testsuite/c-c++-common/raw-string-2.c
@@ -1,6 +1,6 @@
 // { dg-do run }
 // { dg-require-effective-target wchar }
-// { dg-options "-std=gnu99 -Wno-c++-compat" { target c } }
+// { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
 // { dg-options "-std=c++0x" { target c++ } }
 
 #ifndef __cplusplus
@@ -32,6 +32,8 @@ const char s08[] = u8R"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
 const char s09[] = u8R"/^&|~!=,"'(a)/^&|~!=,"'" u8"(b)";
 const char s10[] = u8"(a)" u8R"0123456789abcdef(b)0123456789abcdef";
 const char s11[] = u8R"ghijklmnopqrstuv(a)ghijklmnopqrstuv" u8R"w(b)w";
+const char s12[] = R"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
 
 const char16_t u03[] = R"-(a)-" u"(b)";
 const char16_t u04[] = "(a)" uR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -42,6 +44,8 @@ const char16_t u08[] = uR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
 const char16_t u09[] = uR"/^&|~!=,"'(a)/^&|~!=,"'" u"(b)";
 const char16_t u10[] = u"(a)" uR"0123456789abcdef(b)0123456789abcdef";
 const char16_t u11[] = uR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" uR"w(b)w";
+const char16_t u12[] = uR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
 
 const char32_t U03[] = R"-(a)-" U"(b)";
 const char32_t U04[] = "(a)" UR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -52,6 +56,8 @@ const char32_t U08[] = UR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
 const char32_t U09[] = UR"/^&|~!=,"'(a)/^&|~!=,"'" U"(b)";
 const char32_t U10[] = U"(a)" UR"0123456789abcdef(b)0123456789abcdef";
 const char32_t U11[] = UR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" UR"w(b)w";
+const char32_t U12[] = UR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
 
 const wchar_t L03[] = R"-(a)-" L"(b)";
 const wchar_t L04[] = "(a)" LR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
@@ -62,6 +68,8 @@ const wchar_t L08[] = LR"(a)" R"_{}#[]<>%:;.?*+-(b)_{}#[]<>%:;.?*+-";
 const wchar_t L09[] = LR"/^&|~!=,"'(a)/^&|~!=,"'" L"(b)";
 const wchar_t L10[] = L"(a)" LR"0123456789abcdef(b)0123456789abcdef";
 const wchar_t L11[] = LR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" LR"w(b)w";
+const wchar_t L12[] = LR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
 
 int
 main (void)
@@ -82,6 +90,7 @@ main (void)
   TEST (s09, "a(b)");
   TEST (s10, "(a)b");
   TEST (s11, "ab");
+  TEST (s12, "a");
   TEST (u03, u"a(b)");
   TEST (u04, u"(a)b");
   TEST (u05, u"ab");
@@ -91,6 +100,7 @@ main (void)
   TEST (u09, u"a(b)");
   TEST (u10, u"(a)b");
   TEST (u11, u"ab");
+  TEST (u12, u"a");
   TEST (U03, U"a(b)");
   TEST (U04, U"(a)b");
   TEST (U05, U"ab");
@@ -100,6 +110,7 @@ main (void)
   TEST (U09, U"a(b)");
   TEST (U10, U"(a)b");
   TEST (U11, U"ab");
+  TEST (U12, U"a");
   TEST (L03, L"a(b)");
   TEST (L04, L"(a)b");
   TEST (L05, L"ab");
@@ -109,5 +120,6 @@ main (void)
   TEST (L09, L"a(b)");
   TEST (L10, L"(a)b");
   TEST (L11, L"ab");
+  TEST (L12, L"a");
   return 0;
 }
diff --git a/gcc/testsuite/c-c++-common/raw-string-8.c b/gcc/testsuite/c-c++-common/raw-string-8.c
new file mode 100644
index 00000000000..685b2ef681a
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/raw-string-8.c
@@ -0,0 +1,10 @@
+// Test that we track line numbers properly across newlines
+// both escaped and not in raw strings.
+// { dg-options "-std=gnu99" { target c } }
+// { dg-options "-std=c++0x" { target c++ } }
+
+const char a[] = R"(\
+
+)";
+
+T t;				// { dg-error "" }
diff --git a/gcc/testsuite/c-c++-common/raw-string-9.c b/gcc/testsuite/c-c++-common/raw-string-9.c
new file mode 100644
index 00000000000..6ba6b1a33e3
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/raw-string-9.c
@@ -0,0 +1,19 @@
+// Make sure that we properly handle trigraphs in raw strings when
+// trigraphs are disabled, too.
+// { dg-options "-std=gnu99" { target c } }
+// { dg-options "-std=gnu++0x" { target c++ } }
+// { dg-do run }
+
+const char b[] = "??>";		// { dg-message "-trigraphs" }
+const char a[] = R"(??>??)??/
+??)";
+
+#define TEST(str, val) \
+  if (sizeof (str) != sizeof (val) \
+      || __builtin_memcmp (str, val, sizeof (str)) != 0) \
+    __builtin_abort ()
+
+int main()
+{
+  TEST (a, "?\?>?\?)?\?/\n?\?");
+}
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 24030fb4c8a..ab115d56095 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,5 +1,13 @@
 2010-03-29  Jason Merrill  <jason@redhat.com>
 
+	More N3077 raw string changes
+	* charset.c (cpp_interpret_string): Don't transform UCNs in raw
+	strings.
+	* lex.c (bufring_append): Split out from...
+	(lex_raw_string): ...here.  Undo trigraph and line splicing
+	transformations.  Do process line notes in multi-line literals.
+	(_cpp_process_line_notes): Ignore notes that were already handled.
+
 	Some raw string changes from N3077
 	* charset.c (cpp_interpret_string): Change inner delimiters to ().
 	* lex.c (lex_raw_string): Likewise.  Also disallow '\' in delimiter.
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 282430fe9ed..304efc8de0d 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -1403,23 +1403,10 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
 	  if (limit >= p + (p - prefix) + 1)
 	    limit -= (p - prefix) + 1;
 
-	  for (;;)
-	    {
-	      base = p;
-	      while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
-		p++;
-	      if (p > base)
-		{
-		  /* We have a run of normal characters; these can be fed
-		     directly to convert_cset.  */
-		  if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
-		    goto fail;
-		}
-	      if (p == limit)
-		break;
-
-	      p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
-	    }
+	  /* Raw strings are all normal characters; these can be fed
+	     directly to convert_cset.  */
+	  if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
+	    goto fail;
 
 	  continue;
 	}
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 555874c1d47..9209b55def0 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -240,7 +240,8 @@ struct _cpp_line_note
 
   /* Type of note.  The 9 'from' trigraph characters represent those
      trigraphs, '\\' an escaped newline, ' ' an escaped newline with
-     intervening space, and anything else is invalid.  */
+     intervening space, 0 represents a note that has already been handled,
+     and anything else is invalid.  */
   unsigned int type;
 };
 
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 74deab20608..846671da460 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -314,6 +314,8 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 		}
 	    }
 	}
+      else if (note->type == 0)
+	/* Already processed in lex_raw_string.  */;
       else
 	abort ();
     }
@@ -674,8 +676,37 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
   token->val.str.text = dest;
 }
 
+/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
+   sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
+
+static void
+bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
+		_cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
+{
+  _cpp_buff *first_buff = *first_buff_p;
+  _cpp_buff *last_buff = *last_buff_p;
+
+  if (first_buff == NULL)
+    first_buff = last_buff = _cpp_get_buff (pfile, len);
+  else if (len > BUFF_ROOM (last_buff))
+    {
+      size_t room = BUFF_ROOM (last_buff);
+      memcpy (BUFF_FRONT (last_buff), base, room);
+      BUFF_FRONT (last_buff) += room;
+      base += room;
+      len -= room;
+      last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
+    }
+
+  memcpy (BUFF_FRONT (last_buff), base, len);
+  BUFF_FRONT (last_buff) += len;
+
+  *first_buff_p = first_buff;
+  *last_buff_p = last_buff;
+}
+
 /* Lexes a raw string.  The stored string contains the spelling, including
-   double quotes, delimiter string, '[' and ']', any leading
+   double quotes, delimiter string, '(' and ')', any leading
    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
    literal, or CPP_OTHER if it was not properly terminated.
 
@@ -692,6 +723,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
   enum cpp_ttype type;
   size_t total_len = 0;
   _cpp_buff *first_buff = NULL, *last_buff = NULL;
+  _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
 
   type = (*base == 'L' ? CPP_WSTRING :
 	  *base == 'U' ? CPP_STRING32 :
@@ -749,7 +781,99 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
   cur = raw_prefix + raw_prefix_len + 1;
   for (;;)
     {
-      cppchar_t c = *cur++;
+#define BUF_APPEND(STR,LEN)					\
+      do {							\
+	bufring_append (pfile, (const uchar *)(STR), (LEN),	\
+			&first_buff, &last_buff);		\
+	total_len += (LEN);					\
+      } while (0);
+
+      cppchar_t c;
+
+      /* If we previously performed any trigraph or line splicing
+	 transformations, undo them within the body of the raw string.  */
+      while (note->pos < cur)
+	++note;
+      for (; note->pos == cur; ++note)
+	{
+	  switch (note->type)
+	    {
+	    case '\\':
+	    case ' ':
+	      /* Restore backslash followed by newline.  */
+	      BUF_APPEND (base, cur - base);
+	      base = cur;
+	      BUF_APPEND ("\\", 1);
+	    after_backslash:
+	      if (note->type == ' ')
+		{
+		  /* GNU backslash whitespace newline extension.  FIXME
+		     could be any sequence of non-vertical space.  When we
+		     can properly restore any such sequence, we should mark
+		     this note as handled so _cpp_process_line_notes
+		     doesn't warn.  */
+		  BUF_APPEND (" ", 1);
+		}
+
+	      BUF_APPEND ("\n", 1);
+	      break;
+
+	    case 0:
+	      /* Already handled.  */
+	      break;
+
+	    default:
+	      if (_cpp_trigraph_map[note->type])
+		{
+		  /* Don't warn about this trigraph in
+		     _cpp_process_line_notes, since trigraphs show up as
+		     trigraphs in raw strings.  */
+		  unsigned type = note->type;
+		  note->type = 0;
+
+		  if (!CPP_OPTION (pfile, trigraphs))
+		    /* If we didn't convert the trigraph in the first
+		       place, don't do anything now either.  */
+		    break;
+
+		  BUF_APPEND (base, cur - base);
+		  base = cur;
+		  BUF_APPEND ("??", 2);
+
+		  /* ??/ followed by newline gets two line notes, one for
+		     the trigraph and one for the backslash/newline.  */
+		  if (type == '/' && note[1].pos == cur)
+		    {
+		      if (note[1].type != '\\'
+			  && note[1].type != ' ')
+			abort ();
+		      BUF_APPEND ("/", 1);
+		      ++note;
+		      goto after_backslash;
+		    }
+		  /* The ) from ??) could be part of the suffix.  */
+		  else if (type == ')'
+			   && strncmp ((const char *) cur+1,
+				       (const char *) raw_prefix,
+				       raw_prefix_len) == 0
+			   && cur[raw_prefix_len+1] == '"')
+		    {
+		      cur += raw_prefix_len+2;
+		      goto break_outer_loop;
+		    }
+		  else
+		    {
+		      /* Skip the replacement character.  */
+		      base = ++cur;
+		      BUF_APPEND (&type, 1);
+		    }
+		}
+	      else
+		abort ();
+	      break;
+	    }
+	}
+      c = *cur++;
 
       if (c == ')'
 	  && strncmp ((const char *) cur, (const char *) raw_prefix,
@@ -772,39 +896,14 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 	      break;
 	    }
 
-	  /* raw strings allow embedded non-escaped newlines, which
-	     complicates this routine a lot.  */
-	  if (first_buff == NULL)
-	    {
-	      total_len = cur - base;
-	      first_buff = last_buff = _cpp_get_buff (pfile, total_len);
-	      memcpy (BUFF_FRONT (last_buff), base, total_len);
-	      raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
-	      BUFF_FRONT (last_buff) += total_len;
-	    }
-	  else
-	    {
-	      size_t len = cur - base;
-	      size_t cur_len = len > BUFF_ROOM (last_buff)
-			       ? BUFF_ROOM (last_buff) : len;
-
-	      total_len += len;
-	      memcpy (BUFF_FRONT (last_buff), base, cur_len);
-	      BUFF_FRONT (last_buff) += cur_len;
-	      if (len > cur_len)
-		{
-		  last_buff = _cpp_append_extend_buff (pfile, last_buff,
-						       len - cur_len);
-		  memcpy (BUFF_FRONT (last_buff), base + cur_len,
-			  len - cur_len);
-		  BUFF_FRONT (last_buff) += len - cur_len;
-		}
-	    }
+	  BUF_APPEND (base, cur - base);
 
 	  if (pfile->buffer->cur < pfile->buffer->rlimit)
 	    CPP_INCREMENT_LINE (pfile, 0);
 	  pfile->buffer->need_line = true;
 
+	  pfile->buffer->cur = cur-1;
+	  _cpp_process_line_notes (pfile, false);
 	  if (!_cpp_get_fresh_line (pfile))
 	    {
 	      source_location src_loc = token->src_loc;
@@ -820,11 +919,13 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 	    }
 
 	  cur = base = pfile->buffer->cur;
+	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
 	}
       else if (c == '\0' && !saw_NUL)
 	LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
 				     CPP_BUF_COLUMN (pfile->buffer, cur));
     }
+ break_outer_loop:
 
   if (saw_NUL && !pfile->state.skipping)
     cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,