Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-08-02 16:55:52 +02:00
parent 290b56a879
commit c49bc478b4
5 changed files with 184 additions and 7 deletions

View file

@ -482,12 +482,13 @@ details. */
346: (Belatedly) add posix_spawn_file_actions_addchdir_np, 346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np. posix_spawn_file_actions_addfchdir_np.
347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. 347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
348: Add c8rtomb, mbrtoc.
Note that we forgot to bump the api for ualarm, strtoll, strtoull, Note that we forgot to bump the api for ualarm, strtoll, strtoull,
sigaltstack, sethostname. */ sigaltstack, sethostname. */
#define CYGWIN_VERSION_API_MAJOR 0 #define CYGWIN_VERSION_API_MAJOR 0
#define CYGWIN_VERSION_API_MINOR 346 #define CYGWIN_VERSION_API_MINOR 348
/* There is also a compatibity version number associated with the shared memory /* There is also a compatibity version number associated with the shared memory
regions. It is incremented when incompatible changes are made to the shared regions. It is incremented when incompatible changes are made to the shared

View file

@ -4,8 +4,11 @@
#include <sys/cdefs.h> #include <sys/cdefs.h>
#include <wchar.h> #include <wchar.h>
typedef __uint16_t char16_t; /* Either C2x or if C++ doesn't already define char8_t */
typedef __uint32_t char32_t; #if __ISO_C_VISIBLE >= 2020 && !defined (__cpp_char8_t)
typedef unsigned char char8_t;
#endif
/* C++11 already defines those types. */ /* C++11 already defines those types. */
#if !defined (__cplusplus) || (__cplusplus - 0 < 201103L) #if !defined (__cplusplus) || (__cplusplus - 0 < 201103L)
typedef __uint_least16_t char16_t; typedef __uint_least16_t char16_t;
@ -14,6 +17,13 @@ typedef __uint_least32_t char32_t;
__BEGIN_DECLS __BEGIN_DECLS
/* Either C2x or if C++ defines char8_t */
#if __ISO_C_VISIBLE >= 2020 || defined (__cpp_char8_t)
size_t c8rtomb(char * __restrict, char8_t, mbstate_t * __restrict);
size_t mbrtoc8(char8_t * __restrict, const char * __restrict, size_t,
mbstate_t * __restrict);
#endif
size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict); size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t, size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
mbstate_t * __restrict); mbstate_t * __restrict);

View file

@ -27,7 +27,7 @@ What's new:
- New API calls: posix_spawn_file_actions_addchdir_np, - New API calls: posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np. posix_spawn_file_actions_addfchdir_np.
- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. - New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
What changed: What changed:
------------- -------------

View file

@ -155,6 +155,103 @@ c16rtomb (char *s, char16_t wc, mbstate_t *ps)
return wcrtomb (s, (wchar_t) wc, ps); return wcrtomb (s, (wchar_t) wc, ps);
} }
extern "C" size_t
c8rtomb (char *s, char8_t c8, mbstate_t *ps)
{
struct _reent *reent = _REENT;
char32_t wc;
if (ps == NULL)
{
_REENT_CHECK_MISC(reent);
ps = &(_REENT_MBRTOWC_STATE(reent));
}
if (s == NULL)
{
ps->__count = 0;
return 1;
}
if ((ps->__count & 0xff00) != 0xc800)
{
switch (c8)
{
case 0 ... 0x7f: /* single octet */
ps->__count = 0;
wc = c8;
break;
case 0xc2 ... 0xf4: /* valid lead byte */
ps->__count = 0xc801;
ps->__value.__wchb[0] = c8;
return 0;
default:
goto ilseq;
}
}
else
{
/* We already collected something... */
int idx = ps->__count & 0x3;
char8_t &c1 = ps->__value.__wchb[0];
char8_t &c2 = ps->__value.__wchb[1];
char8_t &c3 = ps->__value.__wchb[2];
switch (idx)
{
case 1:
/* Annoyingly complex check for validity for 2nd octet. */
if (c8 <= 0x7f || c8 >= 0xc0)
goto ilseq;
if (c1 == 0xe0 && c8 <= 0x9f)
goto ilseq;
if (c1 == 0xed && c8 >= 0xa0)
goto ilseq;
if (c1 == 0xf0 && c8 <= 0x8f)
goto ilseq;
if (c1 == 0xf4 && c8 >= 0x90)
goto ilseq;
if (c1 >= 0xe0)
{
ps->__count = 0xc802;
c2 = c8;
return 0;
}
wc = ((c1 & 0x1f) << 6)
| (c8 & 0x3f);
break;
case 2:
if (c8 <= 0x7f || c8 >= 0xc0)
goto ilseq;
if (c1 >= 0xf0)
{
ps->__count = 0xc803;
c3 = c8;
return 0;
}
wc = ((c1 & 0x0f) << 12)
| ((c2 & 0x3f) << 6)
| (c8 & 0x3f);
break;
case 3:
if (c8 <= 0x7f || c8 >= 0xc0)
goto ilseq;
wc = ((c1 & 0x07) << 18)
| ((c2 & 0x3f) << 12)
| ((c3 & 0x3f) << 6)
| (c8 & 0x3f);
break;
default: /* Shouldn't happen */
goto ilseq;
}
}
ps->__count = 0;
return c32rtomb (s, wc, ps);
ilseq:
ps->__count = 0;
_REENT_ERRNO(reent) = EILSEQ;
return (size_t)(-1);
}
extern "C" size_t extern "C" size_t
mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps) mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
{ {
@ -245,6 +342,75 @@ ilseq:
return (size_t)(-1); return (size_t)(-1);
} }
extern "C" size_t
mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
{
struct _reent *reent = _REENT;
size_t len;
char32_t wc;
if (ps == NULL)
{
_REENT_CHECK_MISC(reent);
ps = &(_REENT_MBRTOWC_STATE(reent));
}
if (s == NULL)
{
if (ps)
ps->__count = 0;
return 1;
}
else if ((ps->__count & 0xff00) == 0xc800)
{
/* Return next utf-8 octet in line. */
int idx = ps->__count & 0x3;
if (pc8)
*pc8 = ps->__value.__wchb[--idx];
if (idx == 0)
ps->__count = 0;
return -3;
}
len = mbrtoc32 (&wc, s, n, ps);
if (len > 0)
{
/* octets stored back to front for easier indexing */
switch (wc)
{
case 0 ... 0x7f:
ps->__value.__wchb[0] = wc;
ps->__count = 0;
break;
case 0x80 ... 0x7ff:
ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6);
ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
ps->__count = 0xc800 | 1;
break;
case 0x800 ... 0xffff:
ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12);
ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
ps->__count = 0xc800 | 2;
break;
case 0x10000 ... 0x10ffff:
ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18);
ps->__value.__wchb[2] = 0x80 | ((wc & 0x3f000) >> 12);
ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
ps->__count = 0xc800 | 3;
break;
default:
ps->__count = 0;
_REENT_ERRNO(reent) = EILSEQ;
return (size_t)(-1);
}
if (pc8)
*pc8 = ps->__value.__wchb[ps->__count & 0x3];
}
return len;
}
extern "C" size_t extern "C" size_t
mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps) mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
{ {

View file

@ -46,12 +46,12 @@ Add support for GB18030 codeset.
</para></listitem> </para></listitem>
<listitem><para> <listitem><para>
- New API calls: posix_spawn_file_actions_addchdir_np, New API calls: posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np. posix_spawn_file_actions_addfchdir_np.
</para></listitem> </para></listitem>
<listitem><para> <listitem><para>
- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
</para></listitem> </para></listitem>
</itemizedlist> </itemizedlist>