encodings.pl: Added `ASCII' alias.
* scripts/encodings.pl: Added `ASCII' alias. * Makefile.in: Rebuilt. * Makefile.am (convert_source_files): Added new files. * gnu/gcj/convert/Input_ASCII.java: New file. * gnu/gcj/convert/Output_ASCII.java: New file. * gnu/gcj/convert/Output_8859_1.java (write): Use `?' to represent out-of-range characters. * gnu/gcj/convert/natIconv.cc (iconv_init): New method. (read): Swap bytes if required. Treat `count' as character count, not byte count. (write): Likewise. Also, handle case where iconv fails on a given character. (init): Put encoding into exception. * gnu/gcj/convert/IOConverter.java (iconv_byte_swap): New global. (static): Call iconv_init. Rebuilt alias list. (iconv_init): New private method. From-SVN: r37190
This commit is contained in:
parent
347b93640a
commit
f92351d76e
10 changed files with 246 additions and 21 deletions
|
@ -1,3 +1,22 @@
|
||||||
|
2000-11-01 Tom Tromey <tromey@cygnus.com>
|
||||||
|
|
||||||
|
* scripts/encodings.pl: Added `ASCII' alias.
|
||||||
|
* Makefile.in: Rebuilt.
|
||||||
|
* Makefile.am (convert_source_files): Added new files.
|
||||||
|
* gnu/gcj/convert/Input_ASCII.java: New file.
|
||||||
|
* gnu/gcj/convert/Output_ASCII.java: New file.
|
||||||
|
* gnu/gcj/convert/Output_8859_1.java (write): Use `?' to represent
|
||||||
|
out-of-range characters.
|
||||||
|
* gnu/gcj/convert/natIconv.cc (iconv_init): New method.
|
||||||
|
(read): Swap bytes if required. Treat `count' as character count,
|
||||||
|
not byte count.
|
||||||
|
(write): Likewise. Also, handle case where iconv fails on a given
|
||||||
|
character.
|
||||||
|
(init): Put encoding into exception.
|
||||||
|
* gnu/gcj/convert/IOConverter.java (iconv_byte_swap): New global.
|
||||||
|
(static): Call iconv_init. Rebuilt alias list.
|
||||||
|
(iconv_init): New private method.
|
||||||
|
|
||||||
2000-11-01 Tom Tromey <tromey@cygnus.com>
|
2000-11-01 Tom Tromey <tromey@cygnus.com>
|
||||||
|
|
||||||
* Makefile.in: Rebuilt.
|
* Makefile.in: Rebuilt.
|
||||||
|
|
|
@ -506,6 +506,7 @@ convert_source_files = \
|
||||||
gnu/gcj/convert/BytesToUnicode.java \
|
gnu/gcj/convert/BytesToUnicode.java \
|
||||||
gnu/gcj/convert/Convert.java \
|
gnu/gcj/convert/Convert.java \
|
||||||
gnu/gcj/convert/Input_8859_1.java \
|
gnu/gcj/convert/Input_8859_1.java \
|
||||||
|
gnu/gcj/convert/Input_ASCII.java \
|
||||||
gnu/gcj/convert/Input_EUCJIS.java \
|
gnu/gcj/convert/Input_EUCJIS.java \
|
||||||
gnu/gcj/convert/Input_JavaSrc.java \
|
gnu/gcj/convert/Input_JavaSrc.java \
|
||||||
gnu/gcj/convert/Input_SJIS.java \
|
gnu/gcj/convert/Input_SJIS.java \
|
||||||
|
@ -513,6 +514,7 @@ gnu/gcj/convert/Input_UTF8.java \
|
||||||
gnu/gcj/convert/Input_iconv.java \
|
gnu/gcj/convert/Input_iconv.java \
|
||||||
gnu/gcj/convert/IOConverter.java \
|
gnu/gcj/convert/IOConverter.java \
|
||||||
gnu/gcj/convert/Output_8859_1.java \
|
gnu/gcj/convert/Output_8859_1.java \
|
||||||
|
gnu/gcj/convert/Output_ASCII.java \
|
||||||
gnu/gcj/convert/Output_EUCJIS.java \
|
gnu/gcj/convert/Output_EUCJIS.java \
|
||||||
gnu/gcj/convert/Output_JavaSrc.java \
|
gnu/gcj/convert/Output_JavaSrc.java \
|
||||||
gnu/gcj/convert/Output_SJIS.java \
|
gnu/gcj/convert/Output_SJIS.java \
|
||||||
|
|
|
@ -280,6 +280,7 @@ convert_source_files = \
|
||||||
gnu/gcj/convert/BytesToUnicode.java \
|
gnu/gcj/convert/BytesToUnicode.java \
|
||||||
gnu/gcj/convert/Convert.java \
|
gnu/gcj/convert/Convert.java \
|
||||||
gnu/gcj/convert/Input_8859_1.java \
|
gnu/gcj/convert/Input_8859_1.java \
|
||||||
|
gnu/gcj/convert/Input_ASCII.java \
|
||||||
gnu/gcj/convert/Input_EUCJIS.java \
|
gnu/gcj/convert/Input_EUCJIS.java \
|
||||||
gnu/gcj/convert/Input_JavaSrc.java \
|
gnu/gcj/convert/Input_JavaSrc.java \
|
||||||
gnu/gcj/convert/Input_SJIS.java \
|
gnu/gcj/convert/Input_SJIS.java \
|
||||||
|
@ -287,6 +288,7 @@ gnu/gcj/convert/Input_UTF8.java \
|
||||||
gnu/gcj/convert/Input_iconv.java \
|
gnu/gcj/convert/Input_iconv.java \
|
||||||
gnu/gcj/convert/IOConverter.java \
|
gnu/gcj/convert/IOConverter.java \
|
||||||
gnu/gcj/convert/Output_8859_1.java \
|
gnu/gcj/convert/Output_8859_1.java \
|
||||||
|
gnu/gcj/convert/Output_ASCII.java \
|
||||||
gnu/gcj/convert/Output_EUCJIS.java \
|
gnu/gcj/convert/Output_EUCJIS.java \
|
||||||
gnu/gcj/convert/Output_JavaSrc.java \
|
gnu/gcj/convert/Output_JavaSrc.java \
|
||||||
gnu/gcj/convert/Output_SJIS.java \
|
gnu/gcj/convert/Output_SJIS.java \
|
||||||
|
@ -1197,6 +1199,7 @@ DEP_FILES = .deps/$(srcdir)/$(CONVERT_DIR)/gen-from-JIS.P \
|
||||||
.deps/gnu/gcj/convert/BytesToUnicode.P .deps/gnu/gcj/convert/Convert.P \
|
.deps/gnu/gcj/convert/BytesToUnicode.P .deps/gnu/gcj/convert/Convert.P \
|
||||||
.deps/gnu/gcj/convert/IOConverter.P \
|
.deps/gnu/gcj/convert/IOConverter.P \
|
||||||
.deps/gnu/gcj/convert/Input_8859_1.P \
|
.deps/gnu/gcj/convert/Input_8859_1.P \
|
||||||
|
.deps/gnu/gcj/convert/Input_ASCII.P \
|
||||||
.deps/gnu/gcj/convert/Input_EUCJIS.P \
|
.deps/gnu/gcj/convert/Input_EUCJIS.P \
|
||||||
.deps/gnu/gcj/convert/Input_JavaSrc.P \
|
.deps/gnu/gcj/convert/Input_JavaSrc.P \
|
||||||
.deps/gnu/gcj/convert/Input_SJIS.P .deps/gnu/gcj/convert/Input_UTF8.P \
|
.deps/gnu/gcj/convert/Input_SJIS.P .deps/gnu/gcj/convert/Input_UTF8.P \
|
||||||
|
@ -1204,6 +1207,7 @@ DEP_FILES = .deps/$(srcdir)/$(CONVERT_DIR)/gen-from-JIS.P \
|
||||||
.deps/gnu/gcj/convert/JIS0208_to_Unicode.P \
|
.deps/gnu/gcj/convert/JIS0208_to_Unicode.P \
|
||||||
.deps/gnu/gcj/convert/JIS0212_to_Unicode.P \
|
.deps/gnu/gcj/convert/JIS0212_to_Unicode.P \
|
||||||
.deps/gnu/gcj/convert/Output_8859_1.P \
|
.deps/gnu/gcj/convert/Output_8859_1.P \
|
||||||
|
.deps/gnu/gcj/convert/Output_ASCII.P \
|
||||||
.deps/gnu/gcj/convert/Output_EUCJIS.P \
|
.deps/gnu/gcj/convert/Output_EUCJIS.P \
|
||||||
.deps/gnu/gcj/convert/Output_JavaSrc.P \
|
.deps/gnu/gcj/convert/Output_JavaSrc.P \
|
||||||
.deps/gnu/gcj/convert/Output_SJIS.P .deps/gnu/gcj/convert/Output_UTF8.P \
|
.deps/gnu/gcj/convert/Output_SJIS.P .deps/gnu/gcj/convert/Output_UTF8.P \
|
||||||
|
|
|
@ -18,6 +18,10 @@ public abstract class IOConverter
|
||||||
// Map encoding aliases to our canonical form.
|
// Map encoding aliases to our canonical form.
|
||||||
static private Hashtable hash = new Hashtable ();
|
static private Hashtable hash = new Hashtable ();
|
||||||
|
|
||||||
|
// True if we have to do byte-order conversions on iconv()
|
||||||
|
// arguments.
|
||||||
|
static protected boolean iconv_byte_swap;
|
||||||
|
|
||||||
static
|
static
|
||||||
{
|
{
|
||||||
// Manually maintained aliases. Note that the value must be our
|
// Manually maintained aliases. Note that the value must be our
|
||||||
|
@ -25,6 +29,17 @@ public abstract class IOConverter
|
||||||
hash.put ("ISO-Latin-1", "8859_1");
|
hash.put ("ISO-Latin-1", "8859_1");
|
||||||
// All aliases after this point are automatically generated by the
|
// All aliases after this point are automatically generated by the
|
||||||
// `encodings.pl' script. Run it to make any corrections.
|
// `encodings.pl' script. Run it to make any corrections.
|
||||||
|
hash.put ("ANSI_X3.4-1968", "ASCII");
|
||||||
|
hash.put ("iso-ir-6", "ASCII");
|
||||||
|
hash.put ("ANSI_X3.4-1986", "ASCII");
|
||||||
|
hash.put ("ISO_646.irv:1991", "ASCII");
|
||||||
|
hash.put ("ASCII", "ASCII");
|
||||||
|
hash.put ("ISO646-US", "ASCII");
|
||||||
|
hash.put ("US-ASCII", "ASCII");
|
||||||
|
hash.put ("us", "ASCII");
|
||||||
|
hash.put ("IBM367", "ASCII");
|
||||||
|
hash.put ("cp367", "ASCII");
|
||||||
|
hash.put ("csASCII", "ASCII");
|
||||||
hash.put ("ISO_8859-1:1987", "8859_1");
|
hash.put ("ISO_8859-1:1987", "8859_1");
|
||||||
hash.put ("iso-ir-100", "8859_1");
|
hash.put ("iso-ir-100", "8859_1");
|
||||||
hash.put ("ISO_8859-1", "8859_1");
|
hash.put ("ISO_8859-1", "8859_1");
|
||||||
|
@ -41,8 +56,12 @@ public abstract class IOConverter
|
||||||
hash.put ("Extended_UNIX_Code_Packed_Format_for_Japanese", "EUCJIS");
|
hash.put ("Extended_UNIX_Code_Packed_Format_for_Japanese", "EUCJIS");
|
||||||
hash.put ("csEUCPkdFmtJapanese", "EUCJIS");
|
hash.put ("csEUCPkdFmtJapanese", "EUCJIS");
|
||||||
hash.put ("EUC-JP", "EUCJIS");
|
hash.put ("EUC-JP", "EUCJIS");
|
||||||
|
|
||||||
|
iconv_byte_swap = iconv_init ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static native boolean iconv_init ();
|
||||||
|
|
||||||
// Turn an alias into the canonical form.
|
// Turn an alias into the canonical form.
|
||||||
protected static final String canonicalize (String name)
|
protected static final String canonicalize (String name)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (C) 1999 Free Software Foundation
|
/* Copyright (C) 1999, 2000 Free Software Foundation
|
||||||
|
|
||||||
This file is part of libgcj.
|
This file is part of libgcj.
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ details. */
|
||||||
package gnu.gcj.convert;
|
package gnu.gcj.convert;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert ISO-Latin-1 (8851-1) text to Unicode.
|
* Convert ISO-Latin-1 (8859-1) text to Unicode.
|
||||||
* @author Per Bothner <bothner@cygnus.com>
|
* @author Per Bothner <bothner@cygnus.com>
|
||||||
* @date March 1999.
|
* @date March 1999.
|
||||||
*/
|
*/
|
||||||
|
|
37
libjava/gnu/gcj/convert/Input_ASCII.java
Normal file
37
libjava/gnu/gcj/convert/Input_ASCII.java
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
/* Copyright (C) 2000 Free Software Foundation
|
||||||
|
|
||||||
|
This file is part of libgcj.
|
||||||
|
|
||||||
|
This software is copyrighted work licensed under the terms of the
|
||||||
|
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||||
|
details. */
|
||||||
|
|
||||||
|
package gnu.gcj.convert;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert ASCII text to Unicode.
|
||||||
|
* @date October 2000
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class Input_ASCII extends BytesToUnicode
|
||||||
|
{
|
||||||
|
public String getName() { return "ASCII"; }
|
||||||
|
|
||||||
|
public int read (char[] outbuffer, int outpos, int count)
|
||||||
|
{
|
||||||
|
int origpos = outpos;
|
||||||
|
// Make sure fields of this are in registers.
|
||||||
|
int inpos = this.inpos;
|
||||||
|
byte[] inbuffer = this.inbuffer;
|
||||||
|
int inavail = this.inlength - inpos;
|
||||||
|
int outavail = count;
|
||||||
|
if (outavail > inavail)
|
||||||
|
outavail = inavail;
|
||||||
|
while (--outavail >= 0)
|
||||||
|
{
|
||||||
|
outbuffer[outpos++] = (char) (inbuffer[inpos++] & 0x7f);
|
||||||
|
}
|
||||||
|
this.inpos = inpos;
|
||||||
|
return outpos - origpos;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (C) 1999 Free Software Foundation
|
/* Copyright (C) 1999, 2000 Free Software Foundation
|
||||||
|
|
||||||
This file is part of libgcj.
|
This file is part of libgcj.
|
||||||
|
|
||||||
|
@ -10,9 +10,9 @@ package gnu.gcj.convert;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert Unicode ISO-Latin-1 (8851-1) text.
|
* Convert Unicode ISO-Latin-1 (8851-1) text.
|
||||||
* The high-order byte of each character is truncated.
|
* Unrecognized characters are printed as `?'.
|
||||||
* @author Per Bothner <bothner@cygnus.com>
|
* @author Per Bothner <bothner@cygnus.com>
|
||||||
* @date Match 1999.
|
* @date March 1999.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class Output_8859_1 extends UnicodeToBytes
|
public class Output_8859_1 extends UnicodeToBytes
|
||||||
|
@ -30,7 +30,8 @@ public class Output_8859_1 extends UnicodeToBytes
|
||||||
inlength = avail;
|
inlength = avail;
|
||||||
for (int i = inlength; --i >= 0; )
|
for (int i = inlength; --i >= 0; )
|
||||||
{
|
{
|
||||||
buf[count++] = (byte) inbuffer[inpos++];
|
char c = inbuffer[inpos++];
|
||||||
|
buf[count++] = (byte) ((c > 0xff) ? '?' : c);
|
||||||
}
|
}
|
||||||
this.count = count;
|
this.count = count;
|
||||||
return inlength;
|
return inlength;
|
||||||
|
@ -45,7 +46,8 @@ public class Output_8859_1 extends UnicodeToBytes
|
||||||
inlength = avail;
|
inlength = avail;
|
||||||
for (int i = inlength; --i >= 0; )
|
for (int i = inlength; --i >= 0; )
|
||||||
{
|
{
|
||||||
buf[count++] = (byte) str.charAt(inpos++);
|
char c = str.charAt(inpos++);
|
||||||
|
buf[count++] = (byte) ((c > 0xff) ? '?' : c);
|
||||||
}
|
}
|
||||||
this.count = count;
|
this.count = count;
|
||||||
return inlength;
|
return inlength;
|
||||||
|
|
54
libjava/gnu/gcj/convert/Output_ASCII.java
Normal file
54
libjava/gnu/gcj/convert/Output_ASCII.java
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
/* Copyright (C) 2000 Free Software Foundation
|
||||||
|
|
||||||
|
This file is part of libgcj.
|
||||||
|
|
||||||
|
This software is copyrighted work licensed under the terms of the
|
||||||
|
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||||
|
details. */
|
||||||
|
|
||||||
|
package gnu.gcj.convert;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert Unicode ASCII
|
||||||
|
* Unrecognized characters are printed as `?'.
|
||||||
|
* @date October 2000
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class Output_ASCII extends UnicodeToBytes
|
||||||
|
{
|
||||||
|
public String getName() { return "ASCII"; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return number of chars converted. */
|
||||||
|
public int write (char[] inbuffer, int inpos, int inlength)
|
||||||
|
{
|
||||||
|
int count = this.count;
|
||||||
|
byte[] buf = this.buf;
|
||||||
|
int avail = buf.length - count;
|
||||||
|
if (inlength > avail)
|
||||||
|
inlength = avail;
|
||||||
|
for (int i = inlength; --i >= 0; )
|
||||||
|
{
|
||||||
|
char c = inbuffer[inpos++];
|
||||||
|
buf[count++] = (byte) ((c > 0x7f) ? '?' : c);
|
||||||
|
}
|
||||||
|
this.count = count;
|
||||||
|
return inlength;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int write (String str, int inpos, int inlength, char[] work)
|
||||||
|
{
|
||||||
|
int count = this.count;
|
||||||
|
byte[] buf = this.buf;
|
||||||
|
int avail = buf.length - count;
|
||||||
|
if (inlength > avail)
|
||||||
|
inlength = avail;
|
||||||
|
for (int i = inlength; --i >= 0; )
|
||||||
|
{
|
||||||
|
char c = str.charAt(inpos++);
|
||||||
|
buf[count++] = (byte) ((c > 0x7f) ? '?' : c);
|
||||||
|
}
|
||||||
|
this.count = count;
|
||||||
|
return inlength;
|
||||||
|
}
|
||||||
|
}
|
|
@ -44,13 +44,13 @@ gnu::gcj::convert::Input_iconv::init (jstring encoding)
|
||||||
|
|
||||||
iconv_t h = iconv_open ("UCS-2", buffer);
|
iconv_t h = iconv_open ("UCS-2", buffer);
|
||||||
if (h == (iconv_t) -1)
|
if (h == (iconv_t) -1)
|
||||||
JvThrow (new java::io::UnsupportedEncodingException);
|
throw new java::io::UnsupportedEncodingException (encoding);
|
||||||
|
|
||||||
JvAssert (h != NULL);
|
JvAssert (h != NULL);
|
||||||
handle = reinterpret_cast<gnu::gcj::RawData *> (h);
|
handle = reinterpret_cast<gnu::gcj::RawData *> (h);
|
||||||
#else /* HAVE_ICONV */
|
#else /* HAVE_ICONV */
|
||||||
// If no iconv, just throw an exception.
|
// If no iconv, just throw an exception.
|
||||||
JvThrow (new java::io::UnsupportedEncodingException);
|
throw new java::io::UnsupportedEncodingException (encoding);
|
||||||
#endif /* HAVE_ICONV */
|
#endif /* HAVE_ICONV */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
|
||||||
jchar *out = elements (outbuffer);
|
jchar *out = elements (outbuffer);
|
||||||
size_t inavail = inlength - inpos;
|
size_t inavail = inlength - inpos;
|
||||||
size_t old_in = inavail;
|
size_t old_in = inavail;
|
||||||
size_t outavail = count;
|
size_t outavail = count * sizeof (jchar);
|
||||||
size_t old_out = outavail;
|
size_t old_out = outavail;
|
||||||
|
|
||||||
char *inbuf = (char *) &bytes[inpos];
|
char *inbuf = (char *) &bytes[inpos];
|
||||||
|
@ -86,8 +86,20 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
|
||||||
&outbuf, &outavail);
|
&outbuf, &outavail);
|
||||||
// FIXME: what if R==-1?
|
// FIXME: what if R==-1?
|
||||||
|
|
||||||
|
if (iconv_byte_swap)
|
||||||
|
{
|
||||||
|
size_t max = (old_out - outavail) / sizeof (jchar);
|
||||||
|
for (size_t i = 0; i < max; ++i)
|
||||||
|
{
|
||||||
|
// Byte swap.
|
||||||
|
jchar c = (((out[outpos + i] & 0xff) << 8)
|
||||||
|
| ((out[outpos + i] >> 8) & 0xff));
|
||||||
|
outbuf[i] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inpos += old_in - inavail;
|
inpos += old_in - inavail;
|
||||||
return old_out - outavail;
|
return (old_out - outavail) / sizeof (jchar);
|
||||||
#else /* HAVE_ICONV */
|
#else /* HAVE_ICONV */
|
||||||
return -1;
|
return -1;
|
||||||
#endif /* HAVE_ICONV */
|
#endif /* HAVE_ICONV */
|
||||||
|
@ -104,13 +116,13 @@ gnu::gcj::convert::Output_iconv::init (jstring encoding)
|
||||||
|
|
||||||
iconv_t h = iconv_open (buffer, "UCS-2");
|
iconv_t h = iconv_open (buffer, "UCS-2");
|
||||||
if (h == (iconv_t) -1)
|
if (h == (iconv_t) -1)
|
||||||
JvThrow (new java::io::UnsupportedEncodingException);
|
throw new java::io::UnsupportedEncodingException (encoding);
|
||||||
|
|
||||||
JvAssert (h != NULL);
|
JvAssert (h != NULL);
|
||||||
handle = reinterpret_cast<gnu::gcj::RawData *> (h);
|
handle = reinterpret_cast<gnu::gcj::RawData *> (h);
|
||||||
#else /* HAVE_ICONV */
|
#else /* HAVE_ICONV */
|
||||||
// If no iconv, just throw an exception.
|
// If no iconv, just throw an exception.
|
||||||
JvThrow (new java::io::UnsupportedEncodingException);
|
throw new java::io::UnsupportedEncodingException (encoding);
|
||||||
#endif /* HAVE_ICONV */
|
#endif /* HAVE_ICONV */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -128,14 +140,15 @@ gnu::gcj::convert::Output_iconv::finalize (void)
|
||||||
|
|
||||||
jint
|
jint
|
||||||
gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
|
gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
|
||||||
jint inpos, jint count)
|
jint inpos, jint inlength)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_ICONV
|
#ifdef HAVE_ICONV
|
||||||
jchar *chars = elements (inbuffer);
|
jchar *chars = elements (inbuffer);
|
||||||
jbyte *out = elements (buf);
|
jbyte *out = elements (buf);
|
||||||
|
jchar *temp_buffer = NULL;
|
||||||
|
|
||||||
size_t inavail = count;
|
size_t inavail = inlength * sizeof (jchar);
|
||||||
size_t old_in = count;
|
size_t old_in = inavail;
|
||||||
|
|
||||||
size_t outavail = buf->length - count;
|
size_t outavail = buf->length - count;
|
||||||
size_t old_out = outavail;
|
size_t old_out = outavail;
|
||||||
|
@ -143,14 +156,88 @@ gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
|
||||||
char *inbuf = (char *) &chars[inpos];
|
char *inbuf = (char *) &chars[inpos];
|
||||||
char *outbuf = (char *) &out[count];
|
char *outbuf = (char *) &out[count];
|
||||||
|
|
||||||
|
if (iconv_byte_swap)
|
||||||
|
{
|
||||||
|
// Ugly performance penalty -- don't use losing systems!
|
||||||
|
temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar));
|
||||||
|
for (int i = 0; i < inlength; ++i)
|
||||||
|
{
|
||||||
|
// Byte swap.
|
||||||
|
jchar c = (((chars[inpos + i] & 0xff) << 8)
|
||||||
|
| ((chars[inpos + i] >> 8) & 0xff));
|
||||||
|
temp_buffer[i] = c;
|
||||||
|
}
|
||||||
|
inbuf = (char *) temp_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the conversion fails on the very first character, then we
|
||||||
|
// assume that the character can't be represented in the output
|
||||||
|
// encoding. There's nothing useful we can do here, so we simply
|
||||||
|
// omit that character. Note that we can't check `errno' because
|
||||||
|
// glibc 2.1.3 doesn't set it correctly. We could check it if we
|
||||||
|
// really needed to, but we'd have to disable support for 2.1.3.
|
||||||
|
size_t loop_old_in = old_in;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
size_t r = iconv_adapter (iconv, (iconv_t) handle,
|
size_t r = iconv_adapter (iconv, (iconv_t) handle,
|
||||||
&inbuf, &inavail,
|
&inbuf, &inavail,
|
||||||
&outbuf, &outavail);
|
&outbuf, &outavail);
|
||||||
// FIXME: what if R==-1?
|
if (r == -1 && inavail == loop_old_in)
|
||||||
|
{
|
||||||
|
inavail -= 2;
|
||||||
|
if (inavail == 0)
|
||||||
|
break;
|
||||||
|
loop_old_in -= 2;
|
||||||
|
inbuf += 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (temp_buffer != NULL)
|
||||||
|
_Jv_Free (temp_buffer);
|
||||||
|
|
||||||
count += old_out - outavail;
|
count += old_out - outavail;
|
||||||
return old_in - inavail;
|
return (old_in - inavail) / sizeof (jchar);
|
||||||
#else /* HAVE_ICONV */
|
#else /* HAVE_ICONV */
|
||||||
return -1;
|
return -1;
|
||||||
#endif /* HAVE_ICONV */
|
#endif /* HAVE_ICONV */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
jboolean
|
||||||
|
gnu::gcj::convert::IOConverter::iconv_init (void)
|
||||||
|
{
|
||||||
|
// Some versions of iconv() always return their UCS-2 results in
|
||||||
|
// big-endian order, and they also require UCS-2 inputs to be in
|
||||||
|
// big-endian order. For instance, glibc 2.1.3 does this. If the
|
||||||
|
// UTF-8=>UCS-2 iconv converter has this feature, then we assume
|
||||||
|
// that all UCS-2 converters do. (This might not be the best
|
||||||
|
// heuristic, but is is all we've got.)
|
||||||
|
jboolean result = false;
|
||||||
|
#ifdef HAVE_ICONV
|
||||||
|
iconv_t handle = iconv_open ("UCS-2", "UTF-8");
|
||||||
|
if (handle != (iconv_t) -1)
|
||||||
|
{
|
||||||
|
jchar c;
|
||||||
|
unsigned char in[3];
|
||||||
|
char *inp, *outp;
|
||||||
|
size_t inc, outc, r;
|
||||||
|
|
||||||
|
// This is the UTF-8 encoding of \ufeff.
|
||||||
|
in[0] = 0xef;
|
||||||
|
in[1] = 0xbb;
|
||||||
|
in[2] = 0xbf;
|
||||||
|
|
||||||
|
inp = (char *) in;
|
||||||
|
inc = 3;
|
||||||
|
outp = (char *) &c;
|
||||||
|
outc = 2;
|
||||||
|
|
||||||
|
r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc);
|
||||||
|
// Conversion must be complete for us to use the result.
|
||||||
|
if (r != (size_t) -1 && inc == 0 && outc == 0)
|
||||||
|
result = (c != 0xfeff);
|
||||||
|
}
|
||||||
|
#endif /* HAVE_ICONV */
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
# Map IANA canonical names onto our canonical names.
|
# Map IANA canonical names onto our canonical names.
|
||||||
%map = (
|
%map = (
|
||||||
|
'ANSI_X3.4-1968' => 'ASCII',
|
||||||
'ISO_8859-1:1987' => '8859_1',
|
'ISO_8859-1:1987' => '8859_1',
|
||||||
'UTF-8' => 'UTF8',
|
'UTF-8' => 'UTF8',
|
||||||
'Shift_JIS' => 'SJIS',
|
'Shift_JIS' => 'SJIS',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue