gcc/libstdc++-v3/testsuite/22_locale/codecvt/codecvt_unicode.h

// Copyright (C) 2020-2024 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3.  If not see
// <http://www.gnu.org/licenses/>.

#include <algorithm>
#include <locale>
#include <testsuite_hooks.h>

struct test_offsets_ok
{
  size_t in_size, out_size;
};
struct test_offsets_partial
{
  size_t in_size, out_size, expected_in_next, expected_out_next;
};

template <class CharT> struct test_offsets_error
{
  size_t in_size, out_size, expected_in_next, expected_out_next;
  CharT replace_char;
  size_t replace_pos;
};

template <class T, size_t N>
auto constexpr array_size (const T (&)[N]) -> size_t
{
  return N;
}

template <class InternT, class ExternT>
void
utf8_to_utf32_in_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 11, "");
  static_assert (array_size (expected) == 5, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 10);
  VERIFY (char_traits<InternT>::length (exp) == 4);

  test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 4}};
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }

  for (auto t : offsets)
    {
      InternT out[array_size (exp)] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res
	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, array_size (out));
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }
}

template <class InternT, class ExternT>
void
utf8_to_utf32_in_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 11, "");
  static_assert (array_size (expected) == 5, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 10);
  VERIFY (char_traits<InternT>::length (exp) == 4);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP

    {3, 1, 1, 1}, // no space for second CP
    {2, 2, 1, 1}, // incomplete second CP
    {2, 1, 1, 1}, // incomplete second CP, and no space for it

    {6, 2, 3, 2}, // no space for third CP
    {4, 3, 3, 2}, // incomplete third CP
    {5, 3, 3, 2}, // incomplete third CP
    {4, 2, 3, 2}, // incomplete third CP, and no space for it
    {5, 2, 3, 2}, // incomplete third CP, and no space for it

    {10, 3, 6, 3}, // no space for fourth CP
    {7, 4, 6, 3},  // incomplete fourth CP
    {8, 4, 6, 3},  // incomplete fourth CP
    {9, 4, 6, 3},  // incomplete fourth CP
    {7, 3, 6, 3},  // incomplete fourth CP, and no space for it
    {8, 3, 6, 3},  // incomplete fourth CP, and no space for it
    {9, 3, 6, 3},  // incomplete fourth CP, and no space for it
  };

  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);
    }
}

template <class InternT, class ExternT>
void
utf8_to_utf32_in_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
  const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
  const char32_t expected[] = U"b\u0448\uD700\U0010AAAA";
  static_assert (array_size (input) == 11, "");
  static_assert (array_size (expected) == 5, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 10);
  VERIFY (char_traits<InternT>::length (exp) == 4);

  // There are 5 classes of errors in UTF-8 decoding
  // 1. Missing leading byte
  // 2. Missing trailing byte
  // 3. Surrogate CP
  // 4. Overlong sequence
  // 5. CP out of Unicode range
  test_offsets_error<unsigned char> offsets[] = {

    // 1. Missing leading byte. We will replace the leading byte with
    // non-leading byte, such as a byte that is always invalid or a trailing
    // byte.

    // replace leading byte with invalid byte
    {1, 4, 0, 0, 0xFF, 0},
    {3, 4, 1, 1, 0xFF, 1},
    {6, 4, 3, 2, 0xFF, 3},
    {10, 4, 6, 3, 0xFF, 6},

    // replace leading byte with trailing byte
    {1, 4, 0, 0, 0b10101010, 0},
    {3, 4, 1, 1, 0b10101010, 1},
    {6, 4, 3, 2, 0b10101010, 3},
    {10, 4, 6, 3, 0b10101010, 6},

    // 2. Missing trailing byte. We will replace the trailing byte with
    // non-trailing byte, such as a byte that is always invalid or a leading
    // byte (simple ASCII byte in our case).

    // replace first trailing byte with ASCII byte
    {3, 4, 1, 1, 'z', 2},
    {6, 4, 3, 2, 'z', 4},
    {10, 4, 6, 3, 'z', 7},

    // replace first trailing byte with invalid byte
    {3, 4, 1, 1, 0xFF, 2},
    {6, 4, 3, 2, 0xFF, 4},
    {10, 4, 6, 3, 0xFF, 7},

    // replace second trailing byte with ASCII byte
    {6, 4, 3, 2, 'z', 5},
    {10, 4, 6, 3, 'z', 8},

    // replace second trailing byte with invalid byte
    {6, 4, 3, 2, 0xFF, 5},
    {10, 4, 6, 3, 0xFF, 8},

    // replace third trailing byte
    {10, 4, 6, 3, 'z', 9},
    {10, 4, 6, 3, 0xFF, 9},

    // 2.1 The following test-cases raise doubt whether error or partial should
    // be returned. For example, we have 4-byte sequence with valid leading
    // byte. If we hide the last byte we need to return partial. But, if the
    // second or third byte, which are visible to the call to codecvt, are
    // malformed then error should be returned.

    // replace first trailing byte with ASCII byte, also incomplete at end
    {5, 4, 3, 2, 'z', 4},
    {8, 4, 6, 3, 'z', 7},
    {9, 4, 6, 3, 'z', 7},

    // replace first trailing byte with invalid byte, also incomplete at end
    {5, 4, 3, 2, 0xFF, 4},
    {8, 4, 6, 3, 0xFF, 7},
    {9, 4, 6, 3, 0xFF, 7},

    // replace second trailing byte with ASCII byte, also incomplete at end
    {9, 4, 6, 3, 'z', 8},

    // replace second trailing byte with invalid byte, also incomplete at end
    {9, 4, 6, 3, 0xFF, 8},

    // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
    // CP U+D700
    {6, 4, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
    {6, 4, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
    {6, 4, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
    {6, 4, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00

    // 4. Overlong sequence. The CPs in the input are chosen such as modifying
    // just the leading byte is enough to make them overlong, i.e. for the
    // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
    // zeroes.
    {3, 4, 1, 1, 0b11000000, 1},  // make the 2-byte CP overlong
    {3, 4, 1, 1, 0b11000001, 1},  // make the 2-byte CP overlong
    {6, 4, 3, 2, 0b11100000, 3},  // make the 3-byte CP overlong
    {10, 4, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong

    // 5. CP above range
    // turn U+10AAAA into U+14AAAA by changing its leading byte
    {10, 4, 6, 3, 0b11110101, 6},
    // turn U+10AAAA into U+11AAAA by changing its 2nd byte
    {10, 4, 6, 3, 0b10011010, 7},
  };
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT, class ExternT>
void
utf8_to_utf32_in (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf8_to_utf32_in_ok (cvt);
  utf8_to_utf32_in_partial (cvt);
  utf8_to_utf32_in_error (cvt);
}

template <class InternT, class ExternT>
void
utf32_to_utf8_out_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 5, "");
  static_assert (array_size (expected) == 11, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 4);
  VERIFY (char_traits<ExternT>::length (exp) == 10);

  test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {4, 10}};
  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);
    }
}

template <class InternT, class ExternT>
void
utf32_to_utf8_out_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 5, "");
  static_assert (array_size (expected) == 11, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 4);
  VERIFY (char_traits<ExternT>::length (exp) == 10);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP

    {2, 1, 1, 1}, // no space for second CP
    {2, 2, 1, 1}, // no space for second CP

    {3, 3, 2, 3}, // no space for third CP
    {3, 4, 2, 3}, // no space for third CP
    {3, 5, 2, 3}, // no space for third CP

    {4, 6, 3, 6}, // no space for fourth CP
    {4, 7, 3, 6}, // no space for fourth CP
    {4, 8, 3, 6}, // no space for fourth CP
    {4, 9, 3, 6}, // no space for fourth CP
  };
  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);
    }
}

template <class InternT, class ExternT>
void
utf32_to_utf8_out_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 5, "");
  static_assert (array_size (expected) == 11, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 4);
  VERIFY (char_traits<ExternT>::length (exp) == 10);

  test_offsets_error<InternT> offsets[] = {

    // Surrogate CP
    {4, 10, 0, 0, 0xD800, 0},
    {4, 10, 1, 1, 0xDBFF, 1},
    {4, 10, 2, 3, 0xDC00, 2},
    {4, 10, 3, 6, 0xDFFF, 3},

    // CP out of range
    {4, 10, 0, 0, 0x00110000, 0},
    {4, 10, 1, 1, 0x00110000, 1},
    {4, 10, 2, 3, 0x00110000, 2},
    {4, 10, 3, 6, 0x00110000, 3}};

  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT, class ExternT>
void
utf32_to_utf8_out (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf32_to_utf8_out_ok (cvt);
  utf32_to_utf8_out_partial (cvt);
  utf32_to_utf8_out_error (cvt);
}

template <class InternT, class ExternT>
void
test_utf8_utf32_cvt (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf8_to_utf32_in (cvt);
  utf32_to_utf8_out (cvt);
}

template <class InternT, class ExternT>
void
utf8_to_utf16_in_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 11, "");
  static_assert (array_size (expected) == 6, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 10);
  VERIFY (char_traits<InternT>::length (exp) == 5);

  test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}, {10, 5}};
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }

  for (auto t : offsets)
    {
      InternT out[array_size (exp)] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res
	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, array_size (out));
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }
}

template <class InternT, class ExternT>
void
utf8_to_utf16_in_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 11, "");
  static_assert (array_size (expected) == 6, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 10);
  VERIFY (char_traits<InternT>::length (exp) == 5);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP

    {3, 1, 1, 1}, // no space for second CP
    {2, 2, 1, 1}, // incomplete second CP
    {2, 1, 1, 1}, // incomplete second CP, and no space for it

    {6, 2, 3, 2}, // no space for third CP
    {4, 3, 3, 2}, // incomplete third CP
    {5, 3, 3, 2}, // incomplete third CP
    {4, 2, 3, 2}, // incomplete third CP, and no space for it
    {5, 2, 3, 2}, // incomplete third CP, and no space for it

    {10, 3, 6, 3}, // no space for fourth CP
    {10, 4, 6, 3}, // no space for fourth CP
    {7, 5, 6, 3},  // incomplete fourth CP
    {8, 5, 6, 3},  // incomplete fourth CP
    {9, 5, 6, 3},  // incomplete fourth CP
    {7, 3, 6, 3},  // incomplete fourth CP, and no space for it
    {8, 3, 6, 3},  // incomplete fourth CP, and no space for it
    {9, 3, 6, 3},  // incomplete fourth CP, and no space for it
    {7, 4, 6, 3},  // incomplete fourth CP, and no space for it
    {8, 4, 6, 3},  // incomplete fourth CP, and no space for it
    {9, 4, 6, 3},  // incomplete fourth CP, and no space for it

  };

  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);
    }
}

template <class InternT, class ExternT>
void
utf8_to_utf16_in_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
  const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uD700\U0010AAAA";
  static_assert (array_size (input) == 11, "");
  static_assert (array_size (expected) == 6, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 10);
  VERIFY (char_traits<InternT>::length (exp) == 5);

  // There are 5 classes of errors in UTF-8 decoding
  // 1. Missing leading byte
  // 2. Missing trailing byte
  // 3. Surrogate CP
  // 4. Overlong sequence
  // 5. CP out of Unicode range
  test_offsets_error<unsigned char> offsets[] = {

    // 1. Missing leading byte. We will replace the leading byte with
    // non-leading byte, such as a byte that is always invalid or a trailing
    // byte.

    // replace leading byte with invalid byte
    {1, 5, 0, 0, 0xFF, 0},
    {3, 5, 1, 1, 0xFF, 1},
    {6, 5, 3, 2, 0xFF, 3},
    {10, 5, 6, 3, 0xFF, 6},

    // replace leading byte with trailing byte
    {1, 5, 0, 0, 0b10101010, 0},
    {3, 5, 1, 1, 0b10101010, 1},
    {6, 5, 3, 2, 0b10101010, 3},
    {10, 5, 6, 3, 0b10101010, 6},

    // 2. Missing trailing byte. We will replace the trailing byte with
    // non-trailing byte, such as a byte that is always invalid or a leading
    // byte (simple ASCII byte in our case).

    // replace first trailing byte with ASCII byte
    {3, 5, 1, 1, 'z', 2},
    {6, 5, 3, 2, 'z', 4},
    {10, 5, 6, 3, 'z', 7},

    // replace first trailing byte with invalid byte
    {3, 5, 1, 1, 0xFF, 2},
    {6, 5, 3, 2, 0xFF, 4},
    {10, 5, 6, 3, 0xFF, 7},

    // replace second trailing byte with ASCII byte
    {6, 5, 3, 2, 'z', 5},
    {10, 5, 6, 3, 'z', 8},

    // replace second trailing byte with invalid byte
    {6, 5, 3, 2, 0xFF, 5},
    {10, 5, 6, 3, 0xFF, 8},

    // replace third trailing byte
    {10, 5, 6, 3, 'z', 9},
    {10, 5, 6, 3, 0xFF, 9},

    // 2.1 The following test-cases raise doubt whether error or partial should
    // be returned. For example, we have 4-byte sequence with valid leading
    // byte. If we hide the last byte we need to return partial. But, if the
    // second or third byte, which are visible to the call to codecvt, are
    // malformed then error should be returned.

    // replace first trailing byte with ASCII byte, also incomplete at end
    {5, 5, 3, 2, 'z', 4},
    {8, 5, 6, 3, 'z', 7},
    {9, 5, 6, 3, 'z', 7},

    // replace first trailing byte with invalid byte, also incomplete at end
    {5, 5, 3, 2, 0xFF, 4},
    {8, 5, 6, 3, 0xFF, 7},
    {9, 5, 6, 3, 0xFF, 7},

    // replace second trailing byte with ASCII byte, also incomplete at end
    {9, 5, 6, 3, 'z', 8},

    // replace second trailing byte with invalid byte, also incomplete at end
    {9, 5, 6, 3, 0xFF, 8},

    // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
    // CP U+D700
    {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
    {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
    {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
    {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00

    // 4. Overlong sequence. The CPs in the input are chosen such as modifying
    // just the leading byte is enough to make them overlong, i.e. for the
    // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
    // zeroes.
    {3, 5, 1, 1, 0b11000000, 1},  // make the 2-byte CP overlong
    {3, 5, 1, 1, 0b11000001, 1},  // make the 2-byte CP overlong
    {6, 5, 3, 2, 0b11100000, 3},  // make the 3-byte CP overlong
    {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong

    // 5. CP above range
    // turn U+10AAAA into U+14AAAA by changing its leading byte
    {10, 5, 6, 3, 0b11110101, 6},
    // turn U+10AAAA into U+11AAAA by changing its 2nd byte
    {10, 5, 6, 3, 0b10011010, 7},
  };
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT, class ExternT>
void
utf8_to_utf16_in (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf8_to_utf16_in_ok (cvt);
  utf8_to_utf16_in_partial (cvt);
  utf8_to_utf16_in_error (cvt);
}

template <class InternT, class ExternT>
void
utf16_to_utf8_out_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 11, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 5);
  VERIFY (char_traits<ExternT>::length (exp) == 10);

  test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}, {5, 10}};
  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);
    }
}

template <class InternT, class ExternT>
void
utf16_to_utf8_out_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 11, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 5);
  VERIFY (char_traits<ExternT>::length (exp) == 10);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP

    {2, 1, 1, 1}, // no space for second CP
    {2, 2, 1, 1}, // no space for second CP

    {3, 3, 2, 3}, // no space for third CP
    {3, 4, 2, 3}, // no space for third CP
    {3, 5, 2, 3}, // no space for third CP

    {5, 6, 3, 6}, // no space for fourth CP
    {5, 7, 3, 6}, // no space for fourth CP
    {5, 8, 3, 6}, // no space for fourth CP
    {5, 9, 3, 6}, // no space for fourth CP

    {4, 10, 3, 6}, // incomplete fourth CP

    {4, 6, 3, 6}, // incomplete fourth CP, and no space for it
    {4, 7, 3, 6}, // incomplete fourth CP, and no space for it
    {4, 8, 3, 6}, // incomplete fourth CP, and no space for it
    {4, 9, 3, 6}, // incomplete fourth CP, and no space for it
  };
  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);
    }
}

template <class InternT, class ExternT>
void
utf16_to_utf8_out_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 11, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 5);
  VERIFY (char_traits<ExternT>::length (exp) == 10);

  // The only possible error in UTF-16 is unpaired surrogate code units.
  // So we replace valid code points (scalar values) with lone surrogate CU.
  test_offsets_error<InternT> offsets[] = {
    {5, 10, 0, 0, 0xD800, 0},
    {5, 10, 0, 0, 0xDBFF, 0},
    {5, 10, 0, 0, 0xDC00, 0},
    {5, 10, 0, 0, 0xDFFF, 0},

    {5, 10, 1, 1, 0xD800, 1},
    {5, 10, 1, 1, 0xDBFF, 1},
    {5, 10, 1, 1, 0xDC00, 1},
    {5, 10, 1, 1, 0xDFFF, 1},

    {5, 10, 2, 3, 0xD800, 2},
    {5, 10, 2, 3, 0xDBFF, 2},
    {5, 10, 2, 3, 0xDC00, 2},
    {5, 10, 2, 3, 0xDFFF, 2},

    // make the leading surrogate a trailing one
    {5, 10, 3, 6, 0xDC00, 3},
    {5, 10, 3, 6, 0xDFFF, 3},

    // make the trailing surrogate a leading one
    {5, 10, 3, 6, 0xD800, 4},
    {5, 10, 3, 6, 0xDBFF, 4},

    // make the trailing surrogate a BMP char
    {5, 10, 3, 6, u'z', 4},
  };

  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT, class ExternT>
void
utf16_to_utf8_out (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf16_to_utf8_out_ok (cvt);
  utf16_to_utf8_out_partial (cvt);
  utf16_to_utf8_out_error (cvt);
}

template <class InternT, class ExternT>
void
test_utf8_utf16_cvt (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf8_to_utf16_in (cvt);
  utf16_to_utf8_out (cvt);
}

template <class InternT, class ExternT>
void
utf8_to_ucs2_in_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
  const unsigned char input[] = "b\u0448\uAAAA";
  const char16_t expected[] = u"b\u0448\uAAAA";
  static_assert (array_size (input) == 7, "");
  static_assert (array_size (expected) == 4, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 6);
  VERIFY (char_traits<InternT>::length (exp) == 3);

  test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {3, 2}, {6, 3}};
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }

  for (auto t : offsets)
    {
      InternT out[array_size (exp)] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res
	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, array_size (out));
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }
}

template <class InternT, class ExternT>
void
utf8_to_ucs2_in_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
  const unsigned char input[] = "b\u0448\uAAAA";
  const char16_t expected[] = u"b\u0448\uAAAA";
  static_assert (array_size (input) == 7, "");
  static_assert (array_size (expected) == 4, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 6);
  VERIFY (char_traits<InternT>::length (exp) == 3);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP

    {3, 1, 1, 1}, // no space for second CP
    {2, 2, 1, 1}, // incomplete second CP
    {2, 1, 1, 1}, // incomplete second CP, and no space for it

    {6, 2, 3, 2}, // no space for third CP
    {4, 3, 3, 2}, // incomplete third CP
    {5, 3, 3, 2}, // incomplete third CP
    {4, 2, 3, 2}, // incomplete third CP, and no space for it
    {5, 2, 3, 2}, // incomplete third CP, and no space for it
  };

  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);
    }
}

template <class InternT, class ExternT>
void
utf8_to_ucs2_in_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uD700\U0010AAAA";
  static_assert (array_size (input) == 11, "");
  static_assert (array_size (expected) == 6, "");

  ExternT in[array_size (input)];
  InternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<ExternT>::length (in) == 10);
  VERIFY (char_traits<InternT>::length (exp) == 5);

  // There are 5 classes of errors in UTF-8 decoding
  // 1. Missing leading byte
  // 2. Missing trailing byte
  // 3. Surrogate CP
  // 4. Overlong sequence
  // 5. CP out of Unicode range
  test_offsets_error<unsigned char> offsets[] = {

    // 1. Missing leading byte. We will replace the leading byte with
    // non-leading byte, such as a byte that is always invalid or a trailing
    // byte.

    // replace leading byte with invalid byte
    {1, 5, 0, 0, 0xFF, 0},
    {3, 5, 1, 1, 0xFF, 1},
    {6, 5, 3, 2, 0xFF, 3},
    {10, 5, 6, 3, 0xFF, 6},

    // replace leading byte with trailing byte
    {1, 5, 0, 0, 0b10101010, 0},
    {3, 5, 1, 1, 0b10101010, 1},
    {6, 5, 3, 2, 0b10101010, 3},
    {10, 5, 6, 3, 0b10101010, 6},

    // 2. Missing trailing byte. We will replace the trailing byte with
    // non-trailing byte, such as a byte that is always invalid or a leading
    // byte (simple ASCII byte in our case).

    // replace first trailing byte with ASCII byte
    {3, 5, 1, 1, 'z', 2},
    {6, 5, 3, 2, 'z', 4},
    {10, 5, 6, 3, 'z', 7},

    // replace first trailing byte with invalid byte
    {3, 5, 1, 1, 0xFF, 2},
    {6, 5, 3, 2, 0xFF, 4},
    {10, 5, 6, 3, 0xFF, 7},

    // replace second trailing byte with ASCII byte
    {6, 5, 3, 2, 'z', 5},
    {10, 5, 6, 3, 'z', 8},

    // replace second trailing byte with invalid byte
    {6, 5, 3, 2, 0xFF, 5},
    {10, 5, 6, 3, 0xFF, 8},

    // replace third trailing byte
    {10, 5, 6, 3, 'z', 9},
    {10, 5, 6, 3, 0xFF, 9},

    // 2.1 The following test-cases raise doubt whether error or partial should
    // be returned. For example, we have 4-byte sequence with valid leading
    // byte. If we hide the last byte we need to return partial. But, if the
    // second or third byte, which are visible to the call to codecvt, are
    // malformed then error should be returned.

    // replace first trailing byte with ASCII byte, also incomplete at end
    {5, 5, 3, 2, 'z', 4},
    {8, 5, 6, 3, 'z', 7},
    {9, 5, 6, 3, 'z', 7},

    // replace first trailing byte with invalid byte, also incomplete at end
    {5, 5, 3, 2, 0xFF, 4},
    {8, 5, 6, 3, 0xFF, 7},
    {9, 5, 6, 3, 0xFF, 7},

    // replace second trailing byte with ASCII byte, also incomplete at end
    {9, 5, 6, 3, 'z', 8},

    // replace second trailing byte with invalid byte, also incomplete at end
    {9, 5, 6, 3, 0xFF, 8},

    // 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
    // CP U+D700
    {6, 5, 3, 2, 0b10100000, 4}, // turn U+D700 into U+D800
    {6, 5, 3, 2, 0b10101100, 4}, // turn U+D700 into U+DB00
    {6, 5, 3, 2, 0b10110000, 4}, // turn U+D700 into U+DC00
    {6, 5, 3, 2, 0b10111100, 4}, // turn U+D700 into U+DF00

    // 4. Overlong sequence. The CPs in the input are chosen such as modifying
    // just the leading byte is enough to make them overlong, i.e. for the
    // 3-byte and 4-byte CP the second byte (first trailing) has enough leading
    // zeroes.
    {3, 5, 1, 1, 0b11000000, 1},  // make the 2-byte CP overlong
    {3, 5, 1, 1, 0b11000001, 1},  // make the 2-byte CP overlong
    {6, 5, 3, 2, 0b11100000, 3},  // make the 3-byte CP overlong
    {10, 5, 6, 3, 0b11110000, 6}, // make the 4-byte CP overlong

    // 5. CP above range
    // turn U+10AAAA into U+14AAAA by changing its leading byte
    {10, 5, 6, 3, 0b11110101, 6},
    // turn U+10AAAA into U+11AAAA by changing its 2nd byte
    {10, 5, 6, 3, 0b10011010, 7},
    // Don't replace anything, show full 4-byte CP U+10AAAA
    {10, 4, 6, 3, 'b', 0},
    {10, 5, 6, 3, 'b', 0},
    // Don't replace anything, show incomplete 4-byte CP at the end. It's still
    // out of UCS2 range just by seeing the first byte.
    {7, 4, 6, 3, 'b', 0}, // incomplete fourth CP
    {8, 4, 6, 3, 'b', 0}, // incomplete fourth CP
    {9, 4, 6, 3, 'b', 0}, // incomplete fourth CP
    {7, 5, 6, 3, 'b', 0}, // incomplete fourth CP
    {8, 5, 6, 3, 'b', 0}, // incomplete fourth CP
    {9, 5, 6, 3, 'b', 0}, // incomplete fourth CP
  };
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const ExternT *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT, class ExternT>
void
utf8_to_ucs2_in (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf8_to_ucs2_in_ok (cvt);
  utf8_to_ucs2_in_partial (cvt);
  utf8_to_ucs2_in_error (cvt);
}

template <class InternT, class ExternT>
void
ucs2_to_utf8_out_ok (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
  const char16_t input[] = u"b\u0448\uAAAA";
  const unsigned char expected[] = "b\u0448\uAAAA";
  static_assert (array_size (input) == 4, "");
  static_assert (array_size (expected) == 7, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 3);
  VERIFY (char_traits<ExternT>::length (exp) == 6);

  test_offsets_ok offsets[] = {{0, 0}, {1, 1}, {2, 3}, {3, 6}};
  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);
    }
}

template <class InternT, class ExternT>
void
ucs2_to_utf8_out_partial (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  // UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
  const char16_t input[] = u"b\u0448\uAAAA";
  const unsigned char expected[] = "b\u0448\uAAAA";
  static_assert (array_size (input) == 4, "");
  static_assert (array_size (expected) == 7, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 3);
  VERIFY (char_traits<ExternT>::length (exp) == 6);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP

    {2, 1, 1, 1}, // no space for second CP
    {2, 2, 1, 1}, // no space for second CP

    {3, 3, 2, 3}, // no space for third CP
    {3, 4, 2, 3}, // no space for third CP
    {3, 5, 2, 3}, // no space for third CP
  };
  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);
    }
}

template <class InternT, class ExternT>
void
ucs2_to_utf8_out_error (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 11, "");

  InternT in[array_size (input)];
  ExternT exp[array_size (expected)];
  copy (begin (input), end (input), begin (in));
  copy (begin (expected), end (expected), begin (exp));
  VERIFY (char_traits<InternT>::length (in) == 5);
  VERIFY (char_traits<ExternT>::length (exp) == 10);

  test_offsets_error<InternT> offsets[] = {
    {3, 6, 0, 0, 0xD800, 0},
    {3, 6, 0, 0, 0xDBFF, 0},
    {3, 6, 0, 0, 0xDC00, 0},
    {3, 6, 0, 0, 0xDFFF, 0},

    {3, 6, 1, 1, 0xD800, 1},
    {3, 6, 1, 1, 0xDBFF, 1},
    {3, 6, 1, 1, 0xDC00, 1},
    {3, 6, 1, 1, 0xDFFF, 1},

    {3, 6, 2, 3, 0xD800, 2},
    {3, 6, 2, 3, 0xDBFF, 2},
    {3, 6, 2, 3, 0xDC00, 2},
    {3, 6, 2, 3, 0xDFFF, 2},

    // make the leading surrogate a trailing one
    {5, 10, 3, 6, 0xDC00, 3},
    {5, 10, 3, 6, 0xDFFF, 3},

    // make the trailing surrogate a leading one
    {5, 10, 3, 6, 0xD800, 4},
    {5, 10, 3, 6, 0xDBFF, 4},

    // make the trailing surrogate a BMP char
    {5, 10, 3, 6, u'z', 4},

    // don't replace anything in the test cases bellow, just show the surrogate
    // pair (fourth CP) fully or partially
    {5, 10, 3, 6, u'b', 0},
    {5, 7, 3, 6, u'b', 0}, // no space for fourth CP
    {5, 8, 3, 6, u'b', 0}, // no space for fourth CP
    {5, 9, 3, 6, u'b', 0}, // no space for fourth CP

    {4, 10, 3, 6, u'b', 0}, // incomplete fourth CP
    {4, 7, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
    {4, 8, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
    {4, 9, 3, 6, u'b', 0},  // incomplete fourth CP, and no space for it
  };

  for (auto t : offsets)
    {
      ExternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (ExternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<ExternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT, class ExternT>
void
ucs2_to_utf8_out (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  ucs2_to_utf8_out_ok (cvt);
  ucs2_to_utf8_out_partial (cvt);
  ucs2_to_utf8_out_error (cvt);
}

template <class InternT, class ExternT>
void
test_utf8_ucs2_cvt (const std::codecvt<InternT, ExternT, mbstate_t> &cvt)
{
  utf8_to_ucs2_in (cvt);
  ucs2_to_utf8_out (cvt);
}

enum utf16_endianess
{
  utf16_big_endian,
  utf16_little_endian
};

template <class Iter1, class Iter2>
Iter2
utf16_to_bytes (Iter1 f, Iter1 l, Iter2 o, utf16_endianess e)
{
  if (e == utf16_big_endian)
    for (; f != l; ++f)
      {
	*o++ = (*f >> 8) & 0xFF;
	*o++ = *f & 0xFF;
      }
  else
    for (; f != l; ++f)
      {
	*o++ = *f & 0xFF;
	*o++ = (*f >> 8) & 0xFF;
      }
  return o;
}

template <class InternT>
void
utf16_to_utf32_in_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
		      utf16_endianess endianess)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 5, "");

  char in[array_size (input) * 2];
  InternT exp[array_size (expected)];
  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
  copy (begin (expected), end (expected), begin (exp));

  test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}, {10, 4}};
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }

  for (auto t : offsets)
    {
      InternT out[array_size (exp)] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res
	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, array_size (out));
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }
}

template <class InternT>
void
utf16_to_utf32_in_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
			   utf16_endianess endianess)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 5, "");

  char in[array_size (input) * 2];
  InternT exp[array_size (expected)];
  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
  copy (begin (expected), end (expected), begin (exp));

  test_offsets_partial offsets[] = {
    {2, 0, 0, 0}, // no space for first CP
    {1, 1, 0, 0}, // incomplete first CP
    {1, 0, 0, 0}, // incomplete first CP, and no space for it

    {4, 1, 2, 1}, // no space for second CP
    {3, 2, 2, 1}, // incomplete second CP
    {3, 1, 2, 1}, // incomplete second CP, and no space for it

    {6, 2, 4, 2}, // no space for third CP
    {5, 3, 4, 2}, // incomplete third CP
    {5, 2, 4, 2}, // incomplete third CP, and no space for it

    {10, 3, 6, 3}, // no space for fourth CP
    {7, 4, 6, 3},  // incomplete fourth CP
    {8, 4, 6, 3},  // incomplete fourth CP
    {9, 4, 6, 3},  // incomplete fourth CP
    {7, 3, 6, 3},  // incomplete fourth CP, and no space for it
    {8, 3, 6, 3},  // incomplete fourth CP, and no space for it
    {9, 3, 6, 3},  // incomplete fourth CP, and no space for it
  };

  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);
    }
}

template <class InternT>
void
utf16_to_utf32_in_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
			 utf16_endianess endianess)
{
  using namespace std;
  char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const char32_t expected[] = U"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 5, "");

  InternT exp[array_size (expected)];
  copy (begin (expected), end (expected), begin (exp));

  // The only possible error in UTF-16 is unpaired surrogate code units.
  // So we replace valid code points (scalar values) with lone surrogate CU.
  test_offsets_error<char16_t> offsets[] = {
    {10, 4, 0, 0, 0xD800, 0},
    {10, 4, 0, 0, 0xDBFF, 0},
    {10, 4, 0, 0, 0xDC00, 0},
    {10, 4, 0, 0, 0xDFFF, 0},

    {10, 4, 2, 1, 0xD800, 1},
    {10, 4, 2, 1, 0xDBFF, 1},
    {10, 4, 2, 1, 0xDC00, 1},
    {10, 4, 2, 1, 0xDFFF, 1},

    {10, 4, 4, 2, 0xD800, 2},
    {10, 4, 4, 2, 0xDBFF, 2},
    {10, 4, 4, 2, 0xDC00, 2},
    {10, 4, 4, 2, 0xDFFF, 2},

    // make the leading surrogate a trailing one
    {10, 4, 6, 3, 0xDC00, 3},
    {10, 4, 6, 3, 0xDFFF, 3},

    // make the trailing surrogate a leading one
    {10, 4, 6, 3, 0xD800, 4},
    {10, 4, 6, 3, 0xDBFF, 4},

    // make the trailing surrogate a BMP char
    {10, 4, 6, 3, u'z', 4},
  };

  for (auto t : offsets)
    {
      char in[array_size (input) * 2];
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = input[t.replace_pos];
      input[t.replace_pos] = t.replace_char; // replace in input, not in in
      utf16_to_bytes (begin (input), end (input), begin (in), endianess);

      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);

      input[t.replace_pos] = old_char;
    }
}

template <class InternT>
void
utf32_to_utf16_out_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
		       utf16_endianess endianess)
{
  using namespace std;
  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 5, "");
  static_assert (array_size (expected) == 6, "");

  InternT in[array_size (input)];
  char exp[array_size (expected) * 2];
  copy (begin (input), end (input), begin (in));
  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);

  test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}, {4, 10}};
  for (auto t : offsets)
    {
      char out[array_size (exp) - 2] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (char *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);
    }
}

template <class InternT>
void
utf32_to_utf16_out_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
			    utf16_endianess endianess)
{
  using namespace std;
  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 5, "");
  static_assert (array_size (expected) == 6, "");

  InternT in[array_size (input)];
  char exp[array_size (expected) * 2];
  copy (begin (input), end (input), begin (in));
  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP
    {1, 1, 0, 0}, // no space for first CP

    {2, 2, 1, 2}, // no space for second CP
    {2, 3, 1, 2}, // no space for second CP

    {3, 4, 2, 4}, // no space for third CP
    {3, 5, 2, 4}, // no space for third CP

    {4, 6, 3, 6}, // no space for fourth CP
    {4, 7, 3, 6}, // no space for fourth CP
    {4, 8, 3, 6}, // no space for fourth CP
    {4, 9, 3, 6}, // no space for fourth CP
  };
  for (auto t : offsets)
    {
      char out[array_size (exp) - 2] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (char *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);
    }
}

template <class InternT>
void
utf32_to_utf16_out_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
			  utf16_endianess endianess)
{
  using namespace std;
  const char32_t input[] = U"b\u0448\uAAAA\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 5, "");
  static_assert (array_size (expected) == 6, "");

  InternT in[array_size (input)];
  char exp[array_size (expected) * 2];
  copy (begin (input), end (input), begin (in));
  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);

  test_offsets_error<InternT> offsets[] = {

    // Surrogate CP
    {4, 10, 0, 0, 0xD800, 0},
    {4, 10, 1, 2, 0xDBFF, 1},
    {4, 10, 2, 4, 0xDC00, 2},
    {4, 10, 3, 6, 0xDFFF, 3},

    // CP out of range
    {4, 10, 0, 0, 0x00110000, 0},
    {4, 10, 1, 2, 0x00110000, 1},
    {4, 10, 2, 4, 0x00110000, 2},
    {4, 10, 3, 6, 0x00110000, 3}};

  for (auto t : offsets)
    {
      char out[array_size (exp) - 2] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (char *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT>
void
test_utf16_utf32_cvt (const std::codecvt<InternT, char, mbstate_t> &cvt,
		      utf16_endianess endianess)
{
  utf16_to_utf32_in_ok (cvt, endianess);
  utf16_to_utf32_in_partial (cvt, endianess);
  utf16_to_utf32_in_error (cvt, endianess);
  utf32_to_utf16_out_ok (cvt, endianess);
  utf32_to_utf16_out_partial (cvt, endianess);
  utf32_to_utf16_out_error (cvt, endianess);
}

template <class InternT>
void
utf16_to_ucs2_in_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
		     utf16_endianess endianess)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA";
  const char16_t expected[] = u"b\u0448\uAAAA";
  static_assert (array_size (input) == 4, "");
  static_assert (array_size (expected) == 4, "");

  char in[array_size (input) * 2];
  InternT exp[array_size (expected)];
  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
  copy (begin (expected), end (expected), begin (exp));

  test_offsets_ok offsets[] = {{0, 0}, {2, 1}, {4, 2}, {6, 3}};
  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }

  for (auto t : offsets)
    {
      InternT out[array_size (exp)] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res
	= cvt.in (state, in, in + t.in_size, in_next, out, end (out), out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<InternT>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, array_size (out));
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.in_size);
    }
}

template <class InternT>
void
utf16_to_ucs2_in_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
			  utf16_endianess endianess)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA";
  const char16_t expected[] = u"b\u0448\uAAAA";
  static_assert (array_size (input) == 4, "");
  static_assert (array_size (expected) == 4, "");

  char in[array_size (input) * 2];
  InternT exp[array_size (expected)];
  utf16_to_bytes (begin (input), end (input), begin (in), endianess);
  copy (begin (expected), end (expected), begin (exp));

  test_offsets_partial offsets[] = {
    {2, 0, 0, 0}, // no space for first CP
    {1, 1, 0, 0}, // incomplete first CP
    {1, 0, 0, 0}, // incomplete first CP, and no space for it

    {4, 1, 2, 1}, // no space for second CP
    {3, 2, 2, 1}, // incomplete second CP
    {3, 1, 2, 1}, // incomplete second CP, and no space for it

    {6, 2, 4, 2}, // no space for third CP
    {5, 3, 4, 2}, // incomplete third CP
    {5, 2, 4, 2}, // incomplete third CP, and no space for it
  };

  for (auto t : offsets)
    {
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);
    }
}

template <class InternT>
void
utf16_to_ucs2_in_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
			utf16_endianess endianess)
{
  using namespace std;
  char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 6, "");

  InternT exp[array_size (expected)];
  copy (begin (expected), end (expected), begin (exp));

  // The only possible error in UTF-16 is unpaired surrogate code units.
  // Additionally, because the target encoding is UCS-2, a proper pair of
  // surrogates is also error. Simply, any surrogate CU is error.
  test_offsets_error<char16_t> offsets[] = {
    {6, 3, 0, 0, 0xD800, 0},
    {6, 3, 0, 0, 0xDBFF, 0},
    {6, 3, 0, 0, 0xDC00, 0},
    {6, 3, 0, 0, 0xDFFF, 0},

    {6, 3, 2, 1, 0xD800, 1},
    {6, 3, 2, 1, 0xDBFF, 1},
    {6, 3, 2, 1, 0xDC00, 1},
    {6, 3, 2, 1, 0xDFFF, 1},

    {6, 3, 4, 2, 0xD800, 2},
    {6, 3, 4, 2, 0xDBFF, 2},
    {6, 3, 4, 2, 0xDC00, 2},
    {6, 3, 4, 2, 0xDFFF, 2},

    // make the leading surrogate a trailing one
    {10, 5, 6, 3, 0xDC00, 3},
    {10, 5, 6, 3, 0xDFFF, 3},

    // make the trailing surrogate a leading one
    {10, 5, 6, 3, 0xD800, 4},
    {10, 5, 6, 3, 0xDBFF, 4},

    // make the trailing surrogate a BMP char
    {10, 5, 6, 3, u'z', 4},

    // don't replace anything in the test cases bellow, just show the surrogate
    // pair (fourth CP) fully or partially (just the first surrogate)
    {10, 5, 6, 3, u'b', 0},
    {8, 5, 6, 3, u'b', 0},
    {9, 5, 6, 3, u'b', 0},

    {10, 4, 6, 3, u'b', 0},
    {8, 4, 6, 3, u'b', 0},
    {9, 4, 6, 3, u'b', 0},
  };

  for (auto t : offsets)
    {
      char in[array_size (input) * 2];
      InternT out[array_size (exp) - 1] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = input[t.replace_pos];
      input[t.replace_pos] = t.replace_char; // replace in input, not in in
      utf16_to_bytes (begin (input), end (input), begin (in), endianess);

      auto state = mbstate_t{};
      auto in_next = (const char *) nullptr;
      auto out_next = (InternT *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.in (state, in, in + t.in_size, in_next, out, out + t.out_size,
		    out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<InternT>::compare (out, exp, t.expected_out_next)
	      == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      state = {};
      auto len = cvt.length (state, in, in + t.in_size, t.out_size);
      VERIFY (len >= 0);
      VERIFY (static_cast<size_t> (len) == t.expected_in_next);

      input[t.replace_pos] = old_char;
    }
}

template <class InternT>
void
ucs2_to_utf16_out_ok (const std::codecvt<InternT, char, mbstate_t> &cvt,
		      utf16_endianess endianess)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA";
  const char16_t expected[] = u"b\u0448\uAAAA";
  static_assert (array_size (input) == 4, "");
  static_assert (array_size (expected) == 4, "");

  InternT in[array_size (input)];
  char exp[array_size (expected) * 2];
  copy (begin (input), end (input), begin (in));
  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);

  test_offsets_ok offsets[] = {{0, 0}, {1, 2}, {2, 4}, {3, 6}};
  for (auto t : offsets)
    {
      char out[array_size (exp) - 2] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (char *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.ok);
      VERIFY (in_next == in + t.in_size);
      VERIFY (out_next == out + t.out_size);
      VERIFY (char_traits<char>::compare (out, exp, t.out_size) == 0);
      if (t.out_size < array_size (out))
	VERIFY (out[t.out_size] == 0);
    }
}

template <class InternT>
void
ucs2_to_utf16_out_partial (const std::codecvt<InternT, char, mbstate_t> &cvt,
			   utf16_endianess endianess)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA";
  const char16_t expected[] = u"b\u0448\uAAAA";
  static_assert (array_size (input) == 4, "");
  static_assert (array_size (expected) == 4, "");

  InternT in[array_size (input)];
  char exp[array_size (expected) * 2];
  copy (begin (input), end (input), begin (in));
  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);

  test_offsets_partial offsets[] = {
    {1, 0, 0, 0}, // no space for first CP
    {1, 1, 0, 0}, // no space for first CP

    {2, 2, 1, 2}, // no space for second CP
    {2, 3, 1, 2}, // no space for second CP

    {3, 4, 2, 4}, // no space for third CP
    {3, 5, 2, 4}, // no space for third CP
  };
  for (auto t : offsets)
    {
      char out[array_size (exp) - 2] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (char *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.partial);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);
    }
}

template <class InternT>
void
ucs2_to_utf16_out_error (const std::codecvt<InternT, char, mbstate_t> &cvt,
			 utf16_endianess endianess)
{
  using namespace std;
  const char16_t input[] = u"b\u0448\uAAAA\U0010AAAA";
  const char16_t expected[] = u"b\u0448\uAAAA\U0010AAAA";
  static_assert (array_size (input) == 6, "");
  static_assert (array_size (expected) == 6, "");

  InternT in[array_size (input)];
  char exp[array_size (expected) * 2];
  copy (begin (input), end (input), begin (in));
  utf16_to_bytes (begin (expected), end (expected), begin (exp), endianess);

  test_offsets_error<InternT> offsets[] = {
    {3, 6, 0, 0, 0xD800, 0},
    {3, 6, 0, 0, 0xDBFF, 0},
    {3, 6, 0, 0, 0xDC00, 0},
    {3, 6, 0, 0, 0xDFFF, 0},

    {3, 6, 1, 2, 0xD800, 1},
    {3, 6, 1, 2, 0xDBFF, 1},
    {3, 6, 1, 2, 0xDC00, 1},
    {3, 6, 1, 2, 0xDFFF, 1},

    {3, 6, 2, 4, 0xD800, 2},
    {3, 6, 2, 4, 0xDBFF, 2},
    {3, 6, 2, 4, 0xDC00, 2},
    {3, 6, 2, 4, 0xDFFF, 2},

    // make the leading surrogate a trailing one
    {5, 10, 3, 6, 0xDC00, 3},
    {5, 10, 3, 6, 0xDFFF, 3},

    // make the trailing surrogate a leading one
    {5, 10, 3, 6, 0xD800, 4},
    {5, 10, 3, 6, 0xDBFF, 4},

    // make the trailing surrogate a BMP char
    {5, 10, 3, 6, u'z', 4},

    // don't replace anything in the test cases bellow, just show the surrogate
    // pair (fourth CP) fully or partially (just the first surrogate)
    {5, 10, 3, 6, u'b', 0},
    {5, 8, 3, 6, u'b', 0},
    {5, 9, 3, 6, u'b', 0},

    {4, 10, 3, 6, u'b', 0},
    {4, 8, 3, 6, u'b', 0},
    {4, 9, 3, 6, u'b', 0},
  };

  for (auto t : offsets)
    {
      char out[array_size (exp) - 2] = {};
      VERIFY (t.in_size <= array_size (in));
      VERIFY (t.out_size <= array_size (out));
      VERIFY (t.expected_in_next <= t.in_size);
      VERIFY (t.expected_out_next <= t.out_size);
      auto old_char = in[t.replace_pos];
      in[t.replace_pos] = t.replace_char;

      auto state = mbstate_t{};
      auto in_next = (const InternT *) nullptr;
      auto out_next = (char *) nullptr;
      auto res = codecvt_base::result ();

      res = cvt.out (state, in, in + t.in_size, in_next, out, out + t.out_size,
		     out_next);
      VERIFY (res == cvt.error);
      VERIFY (in_next == in + t.expected_in_next);
      VERIFY (out_next == out + t.expected_out_next);
      VERIFY (char_traits<char>::compare (out, exp, t.expected_out_next) == 0);
      if (t.expected_out_next < array_size (out))
	VERIFY (out[t.expected_out_next] == 0);

      in[t.replace_pos] = old_char;
    }
}

template <class InternT>
void
test_utf16_ucs2_cvt (const std::codecvt<InternT, char, mbstate_t> &cvt,
		     utf16_endianess endianess)
{
  utf16_to_ucs2_in_ok (cvt, endianess);
  utf16_to_ucs2_in_partial (cvt, endianess);
  utf16_to_ucs2_in_error (cvt, endianess);
  ucs2_to_utf16_out_ok (cvt, endianess);
  ucs2_to_utf16_out_partial (cvt, endianess);
  ucs2_to_utf16_out_error (cvt, endianess);
}