Cygwin: Add ISO C11 functions c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.

Add uchar.h accordingly.

For the c32 functions, use the internal functions wirtomb and mbrtowi
as base, and convert wirtomb and mbrtowi to inline functions calling
the c32 functions.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2023-07-31 22:38:28 +02:00
parent 68bb3deabd
commit 4f258c55e8
7 changed files with 127 additions and 22 deletions

View file

@ -268,6 +268,8 @@ bindresvport_sa = cygwin_bindresvport_sa SIGFE
bsearch NOSIGFE bsearch NOSIGFE
btowc NOSIGFE btowc NOSIGFE
bzero NOSIGFE bzero NOSIGFE
c16rtomb NOSIGFE
c32rtomb NOSIGFE
cabs NOSIGFE cabs NOSIGFE
cabsf NOSIGFE cabsf NOSIGFE
cabsl NOSIGFE cabsl NOSIGFE
@ -936,6 +938,8 @@ malloc_trim SIGFE
malloc_usable_size SIGFE malloc_usable_size SIGFE
mallopt SIGFE mallopt SIGFE
mblen NOSIGFE mblen NOSIGFE
mbrtoc16 NOSIGFE
mbrtoc32 NOSIGFE
mbrlen NOSIGFE mbrlen NOSIGFE
mbrtowc NOSIGFE mbrtowc NOSIGFE
mbsinit NOSIGFE mbsinit NOSIGFE

View file

@ -481,6 +481,7 @@ details. */
345: Reinstantiate _alloca. 345: Reinstantiate _alloca.
346: (Belatedly) add posix_spawn_file_actions_addchdir_np, 346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np. posix_spawn_file_actions_addfchdir_np.
347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
Note that we forgot to bump the api for ualarm, strtoll, strtoull, Note that we forgot to bump the api for ualarm, strtoll, strtoull,
sigaltstack, sethostname. */ sigaltstack, sethostname. */

View file

@ -0,0 +1,22 @@
#ifndef _UCHAR_H
#define _UCHAR_H
#include <sys/cdefs.h>
#include <wchar.h>
typedef __uint16_t char16_t;
typedef __uint32_t char32_t;
__BEGIN_DECLS
size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
mbstate_t * __restrict);
size_t c32rtomb(char * __restrict, char32_t, mbstate_t * __restrict);
size_t mbrtoc32(char32_t * __restrict, const char * __restrict, size_t,
mbstate_t * __restrict);
__END_DECLS
#endif /* _UCHAR_H */

View file

@ -13,6 +13,7 @@ details. */
/* Internal headers from newlib */ /* Internal headers from newlib */
#include "../locale/setlocale.h" #include "../locale/setlocale.h"
#include <uchar.h>
#define ENCODING_LEN 31 #define ENCODING_LEN 31
@ -46,11 +47,19 @@ void wcintowcs (wchar_t *, wint_t *, size_t);
/* replacement function for wcrtomb, converting a UTF-32 char to a /* replacement function for wcrtomb, converting a UTF-32 char to a
multibyte string. */ multibyte string. */
size_t wirtomb (char *, wint_t, mbstate_t *); static inline size_t
wirtomb (char *s, wint_t wc, mbstate_t *ps)
{
return c32rtomb (s,(char32_t) wc, ps);
}
/* replacement function for mbrtowc, returning a wint_t representing /* replacement function for mbrtowc, returning a wint_t representing
a UTF-32 value. Defined in strfuncs.cc */ a UTF-32 value. */
extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *); static inline size_t
mbrtowi (wint_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
return mbrtoc32 ((char32_t *) pwc, s, n, ps);
}
/* replacement function for mbsnrtowcs, returning a wint_t representing /* replacement function for mbsnrtowcs, returning a wint_t representing
a UTF-32 value. Defined in strfuncs.cc. a UTF-32 value. Defined in strfuncs.cc.

View file

@ -27,6 +27,8 @@ What's new:
- New API calls: posix_spawn_file_actions_addchdir_np, - New API calls: posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np. posix_spawn_file_actions_addfchdir_np.
- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
What changed: What changed:
------------- -------------

View file

@ -10,6 +10,7 @@ details. */
#include <stdlib.h> #include <stdlib.h>
#include <sys/param.h> #include <sys/param.h>
#include <wchar.h> #include <wchar.h>
#include <uchar.h>
#include <ntdll.h> #include <ntdll.h>
#include "path.h" #include "path.h"
#include "fhandler.h" #include "fhandler.h"
@ -132,26 +133,30 @@ wcintowcs (wchar_t *dest, wint_t *src, size_t len)
/* replacement function for wcrtomb, converting a UTF-32 char to a /* replacement function for wcrtomb, converting a UTF-32 char to a
multibyte string. */ multibyte string. */
extern "C" size_t extern "C" size_t
wirtomb (char *s, wint_t wi, mbstate_t *ps) c32rtomb (char *s, char32_t wc, mbstate_t *ps)
{ {
wchar_t wc[3] = { (wchar_t) wi, '\0', '\0' }; /* If s is NULL, behave as if s pointed to an internal buffer and wc
const wchar_t *wcp = wc; was a null wide character (L''). wcrtomb will do that for us*/
size_t nwc = 1; if (wc <= 0xffff || !s)
return wcrtomb (s, (wchar_t) wc, ps);
if (wi >= 0x10000) wchar_t wc_arr[2];
{ const wchar_t *wcp = wc_arr;
wi -= 0x10000;
wc[0] = (wi >> 10) + 0xd800; wc -= 0x10000;
wc[1] = (wi & 0x3ff) + 0xdc00; wc_arr[0] = (wc >> 10) + 0xd800;
nwc = 2; wc_arr[1] = (wc & 0x3ff) + 0xdc00;
} return wcsnrtombs (s, &wcp, 2, SIZE_MAX, ps);
return wcsnrtombs (s, &wcp, nwc, SIZE_MAX, ps);
} }
/* replacement function for mbrtowc, returning a wint_t representing
a UTF-32 value. */
extern "C" size_t extern "C" size_t
mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps) c16rtomb (char *s, char16_t wc, mbstate_t *ps)
{
return wcrtomb (s, (wchar_t) wc, ps);
}
extern "C" size_t
mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
{ {
size_t len, len2; size_t len, len2;
wchar_t w1, w2; wchar_t w1, w2;
@ -159,8 +164,8 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
len = mbrtowc (&w1, s, n, ps); len = mbrtowc (&w1, s, n, ps);
if (len == (size_t) -1 || len == (size_t) -2) if (len == (size_t) -1 || len == (size_t) -2)
return len; return len;
if (pwi) if (pwc && s)
*pwi = w1; *pwc = w1;
/* Convert surrogate pair to wint_t value */ /* Convert surrogate pair to wint_t value */
if (len > 0 && w1 >= 0xd800 && w1 <= 0xdbff) if (len > 0 && w1 >= 0xd800 && w1 <= 0xdbff)
{ {
@ -170,8 +175,8 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
if (len2 > 0 && w2 >= 0xdc00 && w2 <= 0xdfff) if (len2 > 0 && w2 >= 0xdc00 && w2 <= 0xdfff)
{ {
len += len2; len += len2;
if (pwi) if (pwc && s)
*pwi = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000; *pwc = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000;
} }
else else
{ {
@ -182,6 +187,64 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
return len; return len;
} }
/* Like mbrtowc, but we already defined how to return a surrogate, and
the definition of mbrtoc16 differes from that.
Return the high surrogate with a return value representing the length
of the entire multibyte sequence, and in the next call return the low
surrogate with a return value of -3. */
extern "C" size_t
mbrtoc16 (char16_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
int retval = 0;
struct _reent *reent = _REENT;
wchar_t wc;
if (ps == NULL)
{
_REENT_CHECK_MISC(reent);
ps = &(_REENT_MBRTOWC_STATE(reent));
}
if (s == NULL)
retval = __MBTOWC (reent, NULL, "", 1, ps);
else if (ps->__count == 0xdc00)
{
/* Return stored second half of the surrogate. */
if (pwc)
*pwc = ps->__value.__wch;
ps->__count = 0;
return -3;
}
else
retval = __MBTOWC (reent, &wc, s, n, ps);
if (retval == -1)
goto ilseq;
if (pwc)
*pwc = wc;
/* Did we catch the first half of a surrogate? */
if (wc >= 0xd800 && wc <= 0xdbff)
{
if (n <= (size_t) retval)
goto ilseq;
int r2 = __MBTOWC (reent, &wc, s + retval, n, ps);
if (r2 == -1)
goto ilseq;
/* Store second half of the surrogate in state, and return the
length of the entire multibyte sequence. */
ps->__count = 0xdc00;
ps->__value.__wch = wc;
retval += r2;
}
return (size_t)retval;
ilseq:
ps->__count = 0;
_REENT_ERRNO(reent) = EILSEQ;
return (size_t)(-1);
}
extern "C" size_t extern "C" size_t
mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps) mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
{ {

View file

@ -50,6 +50,10 @@ Add support for GB18030 codeset.
posix_spawn_file_actions_addfchdir_np. posix_spawn_file_actions_addfchdir_np.
</para></listitem> </para></listitem>
<listitem><para>
- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
</para></listitem>
</itemizedlist> </itemizedlist>
</sect2> </sect2>