Cygwin: Add ISO C11 functions c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
Add uchar.h accordingly. For the c32 functions, use the internal functions wirtomb and mbrtowi as base, and convert wirtomb and mbrtowi to inline functions calling the c32 functions. Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
parent
68bb3deabd
commit
4f258c55e8
7 changed files with 127 additions and 22 deletions
|
@ -268,6 +268,8 @@ bindresvport_sa = cygwin_bindresvport_sa SIGFE
|
|||
bsearch NOSIGFE
|
||||
btowc NOSIGFE
|
||||
bzero NOSIGFE
|
||||
c16rtomb NOSIGFE
|
||||
c32rtomb NOSIGFE
|
||||
cabs NOSIGFE
|
||||
cabsf NOSIGFE
|
||||
cabsl NOSIGFE
|
||||
|
@ -936,6 +938,8 @@ malloc_trim SIGFE
|
|||
malloc_usable_size SIGFE
|
||||
mallopt SIGFE
|
||||
mblen NOSIGFE
|
||||
mbrtoc16 NOSIGFE
|
||||
mbrtoc32 NOSIGFE
|
||||
mbrlen NOSIGFE
|
||||
mbrtowc NOSIGFE
|
||||
mbsinit NOSIGFE
|
||||
|
|
|
@ -481,6 +481,7 @@ details. */
|
|||
345: Reinstantiate _alloca.
|
||||
346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
|
||||
posix_spawn_file_actions_addfchdir_np.
|
||||
347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
|
||||
|
||||
Note that we forgot to bump the api for ualarm, strtoll, strtoull,
|
||||
sigaltstack, sethostname. */
|
||||
|
|
22
winsup/cygwin/include/uchar.h
Normal file
22
winsup/cygwin/include/uchar.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#ifndef _UCHAR_H
|
||||
#define _UCHAR_H
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#include <wchar.h>
|
||||
|
||||
typedef __uint16_t char16_t;
|
||||
typedef __uint32_t char32_t;
|
||||
|
||||
__BEGIN_DECLS
|
||||
|
||||
size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
|
||||
size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
|
||||
mbstate_t * __restrict);
|
||||
|
||||
size_t c32rtomb(char * __restrict, char32_t, mbstate_t * __restrict);
|
||||
size_t mbrtoc32(char32_t * __restrict, const char * __restrict, size_t,
|
||||
mbstate_t * __restrict);
|
||||
|
||||
__END_DECLS
|
||||
|
||||
#endif /* _UCHAR_H */
|
|
@ -13,6 +13,7 @@ details. */
|
|||
|
||||
/* Internal headers from newlib */
|
||||
#include "../locale/setlocale.h"
|
||||
#include <uchar.h>
|
||||
|
||||
#define ENCODING_LEN 31
|
||||
|
||||
|
@ -46,11 +47,19 @@ void wcintowcs (wchar_t *, wint_t *, size_t);
|
|||
|
||||
/* replacement function for wcrtomb, converting a UTF-32 char to a
|
||||
multibyte string. */
|
||||
size_t wirtomb (char *, wint_t, mbstate_t *);
|
||||
static inline size_t
|
||||
wirtomb (char *s, wint_t wc, mbstate_t *ps)
|
||||
{
|
||||
return c32rtomb (s,(char32_t) wc, ps);
|
||||
}
|
||||
|
||||
/* replacement function for mbrtowc, returning a wint_t representing
|
||||
a UTF-32 value. Defined in strfuncs.cc */
|
||||
extern size_t mbrtowi (wint_t *, const char *, size_t, mbstate_t *);
|
||||
a UTF-32 value. */
|
||||
static inline size_t
|
||||
mbrtowi (wint_t *pwc, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
return mbrtoc32 ((char32_t *) pwc, s, n, ps);
|
||||
}
|
||||
|
||||
/* replacement function for mbsnrtowcs, returning a wint_t representing
|
||||
a UTF-32 value. Defined in strfuncs.cc.
|
||||
|
|
|
@ -27,6 +27,8 @@ What's new:
|
|||
- New API calls: posix_spawn_file_actions_addchdir_np,
|
||||
posix_spawn_file_actions_addfchdir_np.
|
||||
|
||||
- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
|
||||
|
||||
What changed:
|
||||
-------------
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ details. */
|
|||
#include <stdlib.h>
|
||||
#include <sys/param.h>
|
||||
#include <wchar.h>
|
||||
#include <uchar.h>
|
||||
#include <ntdll.h>
|
||||
#include "path.h"
|
||||
#include "fhandler.h"
|
||||
|
@ -132,26 +133,30 @@ wcintowcs (wchar_t *dest, wint_t *src, size_t len)
|
|||
/* replacement function for wcrtomb, converting a UTF-32 char to a
|
||||
multibyte string. */
|
||||
extern "C" size_t
|
||||
wirtomb (char *s, wint_t wi, mbstate_t *ps)
|
||||
c32rtomb (char *s, char32_t wc, mbstate_t *ps)
|
||||
{
|
||||
wchar_t wc[3] = { (wchar_t) wi, '\0', '\0' };
|
||||
const wchar_t *wcp = wc;
|
||||
size_t nwc = 1;
|
||||
/* If s is NULL, behave as if s pointed to an internal buffer and wc
|
||||
was a null wide character (L''). wcrtomb will do that for us*/
|
||||
if (wc <= 0xffff || !s)
|
||||
return wcrtomb (s, (wchar_t) wc, ps);
|
||||
|
||||
if (wi >= 0x10000)
|
||||
{
|
||||
wi -= 0x10000;
|
||||
wc[0] = (wi >> 10) + 0xd800;
|
||||
wc[1] = (wi & 0x3ff) + 0xdc00;
|
||||
nwc = 2;
|
||||
}
|
||||
return wcsnrtombs (s, &wcp, nwc, SIZE_MAX, ps);
|
||||
wchar_t wc_arr[2];
|
||||
const wchar_t *wcp = wc_arr;
|
||||
|
||||
wc -= 0x10000;
|
||||
wc_arr[0] = (wc >> 10) + 0xd800;
|
||||
wc_arr[1] = (wc & 0x3ff) + 0xdc00;
|
||||
return wcsnrtombs (s, &wcp, 2, SIZE_MAX, ps);
|
||||
}
|
||||
|
||||
/* replacement function for mbrtowc, returning a wint_t representing
|
||||
a UTF-32 value. */
|
||||
extern "C" size_t
|
||||
mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
|
||||
c16rtomb (char *s, char16_t wc, mbstate_t *ps)
|
||||
{
|
||||
return wcrtomb (s, (wchar_t) wc, ps);
|
||||
}
|
||||
|
||||
extern "C" size_t
|
||||
mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
size_t len, len2;
|
||||
wchar_t w1, w2;
|
||||
|
@ -159,8 +164,8 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
|
|||
len = mbrtowc (&w1, s, n, ps);
|
||||
if (len == (size_t) -1 || len == (size_t) -2)
|
||||
return len;
|
||||
if (pwi)
|
||||
*pwi = w1;
|
||||
if (pwc && s)
|
||||
*pwc = w1;
|
||||
/* Convert surrogate pair to wint_t value */
|
||||
if (len > 0 && w1 >= 0xd800 && w1 <= 0xdbff)
|
||||
{
|
||||
|
@ -170,8 +175,8 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
|
|||
if (len2 > 0 && w2 >= 0xdc00 && w2 <= 0xdfff)
|
||||
{
|
||||
len += len2;
|
||||
if (pwi)
|
||||
*pwi = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000;
|
||||
if (pwc && s)
|
||||
*pwc = (((w1 & 0x3ff) << 10) | (w2 & 0x3ff)) + 0x10000;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -182,6 +187,64 @@ mbrtowi (wint_t *pwi, const char *s, size_t n, mbstate_t *ps)
|
|||
return len;
|
||||
}
|
||||
|
||||
/* Like mbrtowc, but we already defined how to return a surrogate, and
|
||||
the definition of mbrtoc16 differes from that.
|
||||
Return the high surrogate with a return value representing the length
|
||||
of the entire multibyte sequence, and in the next call return the low
|
||||
surrogate with a return value of -3. */
|
||||
extern "C" size_t
|
||||
mbrtoc16 (char16_t *pwc, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
int retval = 0;
|
||||
struct _reent *reent = _REENT;
|
||||
wchar_t wc;
|
||||
|
||||
if (ps == NULL)
|
||||
{
|
||||
_REENT_CHECK_MISC(reent);
|
||||
ps = &(_REENT_MBRTOWC_STATE(reent));
|
||||
}
|
||||
|
||||
if (s == NULL)
|
||||
retval = __MBTOWC (reent, NULL, "", 1, ps);
|
||||
else if (ps->__count == 0xdc00)
|
||||
{
|
||||
/* Return stored second half of the surrogate. */
|
||||
if (pwc)
|
||||
*pwc = ps->__value.__wch;
|
||||
ps->__count = 0;
|
||||
return -3;
|
||||
}
|
||||
else
|
||||
retval = __MBTOWC (reent, &wc, s, n, ps);
|
||||
|
||||
if (retval == -1)
|
||||
goto ilseq;
|
||||
|
||||
if (pwc)
|
||||
*pwc = wc;
|
||||
/* Did we catch the first half of a surrogate? */
|
||||
if (wc >= 0xd800 && wc <= 0xdbff)
|
||||
{
|
||||
if (n <= (size_t) retval)
|
||||
goto ilseq;
|
||||
int r2 = __MBTOWC (reent, &wc, s + retval, n, ps);
|
||||
if (r2 == -1)
|
||||
goto ilseq;
|
||||
/* Store second half of the surrogate in state, and return the
|
||||
length of the entire multibyte sequence. */
|
||||
ps->__count = 0xdc00;
|
||||
ps->__value.__wch = wc;
|
||||
retval += r2;
|
||||
}
|
||||
return (size_t)retval;
|
||||
|
||||
ilseq:
|
||||
ps->__count = 0;
|
||||
_REENT_ERRNO(reent) = EILSEQ;
|
||||
return (size_t)(-1);
|
||||
}
|
||||
|
||||
extern "C" size_t
|
||||
mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
|
||||
{
|
||||
|
|
|
@ -50,6 +50,10 @@ Add support for GB18030 codeset.
|
|||
posix_spawn_file_actions_addfchdir_np.
|
||||
</para></listitem>
|
||||
|
||||
<listitem><para>
|
||||
- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
|
||||
</para></listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</sect2>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue