Initial revision

From-SVN: r26263
1999-04-07 14:42:40 +00:00 · 1999-04-07 14:42:40 +00:00 · ee9dd3721b
commit ee9dd3721b
parent 140fa895c6
370 changed files with 173494 additions and 0 deletions
--- a/libjava/gnu/gcj/convert/BytesToUnicode.java
+++ b/libjava/gnu/gcj/convert/BytesToUnicode.java
@ -0,0 +1,105 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+
+public abstract class BytesToUnicode
+{
+  /** Buffer to read bytes from.
+   * The characters inbuffer[inpos] ... inbuffer[inlength-1] are available. */
+  public byte[] inbuffer;
+  /** Starting index in buffer to read bytes from. */
+  public int inpos;
+  /** End of valid bytes in buffer. */
+  public int inlength;
+
+  static Class defaultDecodingClass;
+
+  static synchronized void getDefaultDecodingClass()
+  {
+    // Test (defaultDecodingClass == null) again in case of race condition.
+    if (defaultDecodingClass == null)
+      {
+	String encoding = System.getProperty("file.encoding");
+	String className = "gnu.gcj.convert.Input_"+encoding;
+	try
+	  {
+	    defaultDecodingClass = Class.forName(className);
+	  }
+	catch (ClassNotFoundException ex)
+	  {
+	    throw new NoClassDefFoundError("missing default encoding "
+					   + encoding + " (class "
+					   + className + " not found)");
+	  }
+      }
+  }
+
+  public abstract String getName();
+
+  public static BytesToUnicode getDefaultDecoder()
+  {
+    try
+      {
+	if (defaultDecodingClass == null)
+	  getDefaultDecodingClass();
+	return (BytesToUnicode) defaultDecodingClass.newInstance();
+      }
+    catch (Throwable ex)
+      {
+	return new Input_8859_1();
+      }
+  }
+
+  /** Get a byte-stream->char-stream converter given an encoding name. */
+  public static BytesToUnicode getDecoder (String encoding)
+    throws java.io.UnsupportedEncodingException
+  {
+    String className = "gnu.gcj.convert.Input_"+encoding;
+    Class decodingClass;
+    try 
+      { 
+	decodingClass = Class.forName(className); 
+	return (BytesToUnicode) decodingClass.newInstance();
+      } 
+    catch (Throwable ex) 
+      { 
+	throw new java.io.UnsupportedEncodingException(encoding
+						       + " (" + ex + ')');
+      }
+  }
+
+  /** Make input bytes available to the conversion.
+   * @param buffer source of input bytes
+   * @param pos index of first available byte
+   * @param length one more than index of last available byte
+   */
+  public final void setInput(byte[] buffer, int pos, int length)
+  {
+    inbuffer = buffer;
+    inpos = pos;
+    inlength = length;
+  }
+
+  /** Convert bytes to chars.
+   * Input bytes are taken from this.inbuffer.  The available input
+   * bytes start at inbuffer[inpos], and end at inbuffer[inlength-1].
+   * @param outbuffer buffer for the converted character
+   * @param outpos position in buffer to start putting converted characters
+   * @param outlength the maximum number of characters to read
+   * @return number of chars placed in outbuffer.
+   * Also, this.inpos is incremented by the number of bytes consumed.
+   *
+   * (Note the asymmetry in that the input upper bound is inbuffer[inlength-1],
+   * while the output upper bound is outbuffer[outpos+outlength-1].  The
+   * justification is that inlength is like the count field of a
+   * BufferedInputStream, while the outlength parameter is like the
+   * length parameter of a read request.)
+   */
+  public abstract int read (char[] outbuffer, int outpos, int outlength);
+}
--- a/libjava/gnu/gcj/convert/Convert.java
+++ b/libjava/gnu/gcj/convert/Convert.java
@ -0,0 +1,151 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+import java.io.*;
+
+public class Convert
+{
+  static void error (String message)
+  {
+    System.err.print("convert: ");
+    System.err.println(message);
+    System.err.println("Usage:  convert [--from srcEncoding] [--to dstEncoding]");
+    System.err.println("  [inputfile [outputfile]]");
+    System.exit(-1);
+  }
+
+  static void missing (String arg)
+  {
+    error("missing arg after `" + arg + "' option");
+  }
+
+  public static void main (String[] args)
+  {
+    String inName = "-";
+    String outName = "-";
+    String inEncodingName = null;
+    String outEncodingName = "JavaSrc";
+    int seenNames = 0;
+    boolean reverse = false;
+
+    for (int i = 0;  i < args.length;  i++)
+      {
+	String arg = args[i];
+	if (arg.length() == 0)
+	  error("zero-length argument");
+	if (arg.charAt(0) == '-')
+	  {
+	    if (arg.equals("-encoding") || arg.equals("--encoding")
+		|| args.equals("-from") || arg.equals("--from"))
+	      {
+		if (++i == args.length) missing(arg);
+		inEncodingName = args[i];
+	      }
+	    else if (arg.equals("-to") || arg.equals("--to"))
+	      {
+		if (++i == args.length) missing(arg);
+		outEncodingName = args[i];
+	      }
+	    else if (arg.equals("-i"))
+	      {
+		if (++i == args.length) missing(arg);
+		inName = args[i];
+	      }
+	    else if (arg.equals("-o"))
+	      {
+		if (++i == args.length) missing(arg);
+		outName = args[i];
+	      }
+	    else if (arg.equals("-reverse") || arg.equals("--reverse"))
+	      {
+		reverse = true;
+	      }
+	    else if (arg.equals("-"))
+	      {
+		switch (seenNames)
+		  {
+		  case 0:
+		    inName = "-";
+		    seenNames++;
+		    break;
+		  case 1:
+		    outName = "-";
+		    seenNames++;
+		    break;
+		  default:
+		    error("too many `-' arguments");
+		  }
+	      }
+	    else
+	      error("unrecognized argument `" + arg + "'");
+	  }
+	else
+	  {
+	    switch (seenNames)
+	      {
+	      case 0:
+		inName = arg;
+		seenNames++;
+		break;
+	      case 1:
+		outName = arg;
+		seenNames++;
+		break;
+	      default:
+		error("too many filename arguments");
+	      }
+	  }
+      }
+
+    if (reverse)
+      {
+	String tmp = inEncodingName;
+	inEncodingName = outEncodingName;
+	outEncodingName = tmp;
+      }
+
+    try
+      {
+	BytesToUnicode inDecoder
+	  = inEncodingName == null ? BytesToUnicode.getDefaultDecoder()
+	  : BytesToUnicode.getDecoder(inEncodingName);
+	UnicodeToBytes outEncoder
+	  = outEncodingName == null ? UnicodeToBytes.getDefaultEncoder()
+	  : UnicodeToBytes.getEncoder(outEncodingName);
+	InputStream inStream = inName == "-" ? System.in
+	  : new FileInputStream(inName);
+	OutputStream outStream;
+	if (outName == "-")
+	  outStream = System.out;
+	else
+	  outStream = new FileOutputStream(outName);
+	InputStreamReader in
+	  = new InputStreamReader(inStream, inEncodingName);
+	OutputStreamWriter out
+	  = new OutputStreamWriter(outStream, outEncodingName);
+	char[] buffer = new char[2048];
+	for (;;)
+	  {
+	    int count = in.read(buffer);
+	    if (count < 0)
+	      break;
+	    out.write(buffer, 0, count);
+	  }
+
+	in.close();
+	out.close();
+      }
+    catch (java.io.IOException ex)
+      {
+	System.err.print("convert exception: ");
+	System.err.println(ex);
+	System.exit(-1);
+      }
+  }
+}
--- a/libjava/gnu/gcj/convert/Input_8859_1.java
+++ b/libjava/gnu/gcj/convert/Input_8859_1.java
@ -0,0 +1,32 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+
+public class Input_8859_1 extends BytesToUnicode
+{
+  public String getName() { return "8859_1"; }
+
+  public int read (char[] outbuffer, int outpos, int outlength)
+  {
+    int origpos = outpos;
+    // Make sure fields of this are in registers.
+    int inpos = this.inpos;
+    byte[] inbuffer = this.inbuffer;
+    int inavail = this.inlength - inpos;
+    int outavail = outlength - outpos;
+    if (outavail > inavail)
+      outavail = inavail;
+    while (--outavail >= 0)
+      {
+	outbuffer[outpos++] = (char) (inbuffer[inpos++] & 0xFF);
+      }
+    this.inpos = inpos;
+    return outpos - origpos;
+  }
+}
--- a/libjava/gnu/gcj/convert/Input_EUCJIS.java
+++ b/libjava/gnu/gcj/convert/Input_EUCJIS.java
@ -0,0 +1,19 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+
+public class Input_EUCJIS extends BytesToUnicode
+{
+  public String getName() { return "EUCJIS"; }
+
+  int codeset = 0;
+  int first_byte;
+
+  public native int read (char[] outbuffer, int outpos, int outlength);
+}
--- a/libjava/gnu/gcj/convert/Input_UTF8.java
+++ b/libjava/gnu/gcj/convert/Input_UTF8.java
@ -0,0 +1,107 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+
+public class Input_UTF8 extends BytesToUnicode
+{
+  public String getName() { return "UTF8"; }
+
+  int partial = 0;
+  int partial_bytes_expected = 0;
+  //int suggogate_second = -1;
+
+  public int read (char[] outbuffer, int outpos, int outlength)
+  {
+    int origpos = outpos;
+    for (;;)
+      {
+	if (outpos >= outlength)
+	  break;
+	if (inpos >= inlength)
+	  break;
+	int b = inbuffer[inpos++];
+	if (b >= 0)
+	  outbuffer[outpos++] = (char) b;
+	else
+	  {
+	    if ((b & 0xC0) == 0x80) // Continuation byte
+	      {
+		partial = (partial << 6) | (b & 0x3F);
+		--partial_bytes_expected;
+		if (partial_bytes_expected == 1)
+		  {
+		    if (partial > (0xFFFF>>6))
+		      {
+			// The next continuation byte will cause the result
+			// to exceed 0xFFFF, so we must use a surrogate pair.
+			// The "Unicode scalar value" (see D28 in section 3.7
+			// of the Unicode Standard 2.0) is defined as:
+			// value == (hi-0xD800)*0x400+(lo-0xDC00)+0x10000,
+			// where (hi, lo) is the Unicode surrogate pair.
+			// After reading the first three bytes, we have:
+			// partial == (value >> 6).
+			// Substituting and simplifying, we get:
+			// partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400.
+			// The definition lo>=0xDC00 && lo<=0xDFFF implies
+			// that (lo-0xDC00)>>6 is in the range 0..15.
+			// Hence we can infer (partial-0x400)>>4 == (hi-0xDB00)
+			// and we can emit the high-surrogate without waiting
+			// for the final byte:
+			outbuffer[outpos++] = (char) (0xDA00+(partial>>4));
+
+			// Now we want to set it up so that when we read
+			// the final byte on the next iteration, we will
+			// get the low-surrogate without special handling.
+			// I.e. we want:
+			// lo == (next_partial << 6) | (next & 0x3F)
+			// where next is the next input byte and next_partial
+			// is the value of partial at the end of this
+			// iteration.  This implies:  next_partial == lo >> 6.
+			// We can simplify the previous:
+			// partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400,
+			// to: partial == (hi-0xD800)*0x10+(lo>>6)+0x90.
+			// Inserting the values of hi and next_partial,
+			// and simplifying, we get:  partial ==
+			// ( (partial-0x400)&~0xF) + next_partial + 0x90.
+			// Solving for next_partial, we get:
+			// next_partial = partial+0x400-0x90-(partial&~0xF):
+			// or: next_partial = (partial&0xF) + 0x370.  Hence:
+			partial = (partial & 0xF) + 0x370;
+		      }
+		  }
+		else if (partial_bytes_expected == 0)
+		  {
+		    outbuffer[outpos++] = (char) partial;
+		    partial = 0;
+		    partial_bytes_expected = 0;
+		  }
+	      }
+	    else // prefix byte
+	      {
+		if ((b & 0xE) == 0xC0)
+		  {
+		    partial = b & 0x1F;
+		    partial_bytes_expected = 1;
+		  }
+		else if ((b & 0xF) == 0xF0)
+		  {
+		    partial = b & 0xF;
+		    partial_bytes_expected = 2;
+		  }
+		else
+		  {
+		    partial = b & 7;
+		    partial_bytes_expected = 3;
+		  }
+	      }
+	  }
+      }
+    return outpos - origpos;
+  }
+}
--- a/libjava/gnu/gcj/convert/JIS0208.h
+++ b/libjava/gnu/gcj/convert/JIS0208.h
--- a/libjava/gnu/gcj/convert/JIS0208_to_Unicode.cc
+++ b/libjava/gnu/gcj/convert/JIS0208_to_Unicode.cc
--- a/libjava/gnu/gcj/convert/JIS0212.h
+++ b/libjava/gnu/gcj/convert/JIS0212.h
--- a/libjava/gnu/gcj/convert/JIS0212_to_Unicode.cc
+++ b/libjava/gnu/gcj/convert/JIS0212_to_Unicode.cc
--- a/libjava/gnu/gcj/convert/Output_8859_1.java
+++ b/libjava/gnu/gcj/convert/Output_8859_1.java
@ -0,0 +1,31 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert; 
+ 
+public class Output_8859_1 extends UnicodeToBytes
+{
+  public String getName() { return "8859_1"; }
+
+  /**
+   * @return number of chars converted. */
+  public int write (char[] inbuffer, int inpos, int inlength)
+  {
+    int count = this.count;
+    byte[] buf = this.buf;
+    int avail = buf.length - count;
+    if (inlength > avail)
+      inlength = avail;
+    for (int i = inlength;  --i >= 0;  )
+      {
+	buf[count++] = (byte) inbuffer[inpos++];
+      }
+    this.count = count;
+    return inlength;
+  }
+}
--- a/libjava/gnu/gcj/convert/Output_JavaSrc.java
+++ b/libjava/gnu/gcj/convert/Output_JavaSrc.java
@ -0,0 +1,82 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert; 
+ 
+/** Convert Unicode to Ascii with \ u XXXX-escapes. */
+
+public class Output_JavaSrc extends UnicodeToBytes
+{
+  public String getName() { return "JavaSrc"; }
+
+  // Number of bytes remaining before pending_char has been written.
+  int todo;
+ int pending_char;
+
+  public int write (char[] inbuffer, int inpos, int inlength)
+  {
+    int start_pos = inpos;
+    int avail = buf.length - count;
+    for (;;)
+      {
+	if (avail == 0)
+	  break;
+	switch (todo)
+	  {
+	  case 1:
+	    if (pending_char == '\\')
+	      {
+		buf[count++] = (byte) '\\';
+		avail--;
+		todo = 0;
+		continue;
+	      }
+	    /* ... else fall through ... */
+	  case 2:
+	  case 3:
+	  case 4:
+	    todo--;
+	    int digit = ((int) pending_char >> (todo * 4)) & 0xF;
+	    buf[count++] = (byte) Character.forDigit(digit, 16);
+	    avail--;
+	    continue;
+	  case 5:
+	    buf[count++] = (byte) 'u';
+	    avail--;
+	    todo = 4;
+	    continue;
+	  default:
+	    ;
+	  }
+	if (inlength == 0)
+	  break;
+	char ch = inbuffer[inpos++];
+	inlength--;
+	if (ch == '\\')
+	  {
+	    buf[count++] = (byte) '\\';
+	    pending_char = ch;
+	    todo = 1;
+	    avail--;
+	  }
+	else if (ch < 127)
+	  {
+	    buf[count++] = (byte) ch;
+	    avail--;
+	  }
+	else
+	  {
+	    buf[count++] = (byte) '\\';
+	    pending_char = ch;
+	    todo = 5;
+	    avail--;
+	  }
+      }
+    return inpos - start_pos;
+  }
+}
--- a/libjava/gnu/gcj/convert/Output_UTF8.java
+++ b/libjava/gnu/gcj/convert/Output_UTF8.java
@ -0,0 +1,108 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert;
+
+public class Output_UTF8 extends UnicodeToBytes
+{
+  public String getName() { return "UTF8"; }
+
+  /** True if a surrogate pair should be emitted as a single UTF8 sequence.
+   * Otherwise, a surrogate pair is treated as two separate characters.
+   * Also, '\0' is emitted as {0} if true, and as {0xC0,0x80} if false. */
+  public boolean standardUTF8;
+
+  // Saves the previous char if it was a high-surrogate.
+  char hi_part;
+  // Value of imcomplete character.
+  int value;
+  // Number of continuation bytes still to emit.
+  int bytes_todo;
+
+  public int write (char[] inbuffer, int inpos, int inlength)
+  {
+    int start_pos = inpos;
+    int avail = buf.length - count;
+    for (;;)
+      {
+	if (inlength == 0 || avail == 0)
+	  break;
+	// The algororith is made more complicated because we want to write
+	// at least one byte in the output buffer, if there is room for
+	// that byte, and at least one input character is available.
+	// This makes the code more robust, since client code will
+	// always "make progress", even in the complicated cases,
+	// where the output buffer only has room for only *part* of a
+	// multi-byte sequence, or the input char buffer only has half
+	// of a surrogate pair (when standardUTF8 is set), or both.
+
+	// Handle continuation characters we did not have room for before.
+	if (bytes_todo > 0)
+	  {
+	    do
+	      {
+		bytes_todo--;
+		buf[count++] = (byte)
+		  (((value >> (bytes_todo * 6)) & 0xC0) | 0x80);
+		avail--;
+	      }
+	    while (bytes_todo > 0 && avail > 0);
+	    continue;
+	  }
+	char ch = inbuffer[inpos++];
+	inlength--;
+	if (ch < 128 && (ch != 0 || standardUTF8))
+	  {
+	    avail--;
+	    buf[count++] = (byte) ch;
+	  }
+	else if (ch <= 0x07FF)
+	  {
+	    buf[count++] = (byte) (0xC0 | (ch >> 6));
+	    if (--avail > 0)
+	      {
+		buf[count++] = (byte) ((ch & 0x3F) | 0x80);
+		avail--;
+	      }
+	    else
+	    {
+	      value = ch;
+	      bytes_todo = 1;
+	      break;
+	    }
+	  }
+	else if (ch >= 0xD800 && ch <= 0xDFFF && standardUTF8)
+	  {
+	    if (ch <= 0xDBFF)  // High surrogates
+	      {
+		// The first byte is (0xF0 | value>>18), where value is the
+		// Unicode scalar value of the combine character - which
+		// we may not know yet.  But from substituting:
+		// value == (hi-0xD800)*0x400+(lo-0xDC00)+0x10000,
+		// hi==ch, and cancelling we get:
+		buf[count++] = (byte) (0xF0 | ((ch-0xD800) >> 8));
+		avail--;
+		hi_part = ch;
+	      }
+	    else // Low surrogates
+	      {
+		value = (hi_part - 0xD800) * 0x400 + (ch - 0xDC00) + 0x10000;
+		bytes_todo = 3;
+	      }
+	  }
+	else
+	  {
+	    buf[count++] = (byte) (0xE0 | (ch >> 12));
+	    value = ch;
+	    avail--;
+	    bytes_todo = 2;
+	  }
+      }
+    return inpos - start_pos;
+  }
+}
--- a/libjava/gnu/gcj/convert/UnicodeToBytes.java
+++ b/libjava/gnu/gcj/convert/UnicodeToBytes.java
@ -0,0 +1,90 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.convert; 
+ 
+public abstract class UnicodeToBytes
+{
+  /** Buffer to emit bytes to.
+   * The locations buf[count] ... buf[buf.length-1] are available. */
+  public byte[] buf;
+  public int count;
+
+  static Class defaultEncodingClass;
+
+  static synchronized void getDefaultEncodingClass()
+  {
+    // Test (defaultEncodingClass == null) again in case of race condition.
+    if (defaultEncodingClass == null)
+      {
+	String encoding = System.getProperty("file.encoding");
+	String className = "gnu.gcj.convert.Output_"+encoding;
+	try
+	  {
+	    defaultEncodingClass = Class.forName(className);
+	  }
+	catch (ClassNotFoundException ex)
+	  {
+	    throw new NoClassDefFoundError("missing default encoding "
+					   + encoding + " (class "
+					   + className + " not found)");
+	    
+	  }
+      }
+  }
+
+  public abstract String getName();
+
+  public static UnicodeToBytes getDefaultEncoder()
+  {
+    try
+      {
+	if (defaultEncodingClass == null)
+	  getDefaultEncodingClass();
+	return (UnicodeToBytes) defaultEncodingClass.newInstance();
+      }
+    catch (Throwable ex)
+      {
+	return new Output_8859_1();
+      }
+  }
+
+  /** Get a char-stream->byte-stream converter given an encoding name. */
+  public static UnicodeToBytes getEncoder (String encoding)
+    throws java.io.UnsupportedEncodingException
+  {
+    String className = "gnu.gcj.convert.Output_"+encoding;
+    Class encodingClass;
+    try 
+      { 
+	encodingClass = Class.forName(className); 
+	return (UnicodeToBytes) encodingClass.newInstance();
+      } 
+    catch (Throwable ex) 
+      { 
+	throw new java.io.UnsupportedEncodingException(encoding + " ("
+						       + ex + ')');
+      }
+  }
+
+  public final void setOutput(byte[] buffer, int count)
+  {
+    this.buf = buffer;
+    this.count = count;
+  }
+
+  /** Convert chars to bytes.
+    * Converted bytes are written to buf, starting at count.
+    * @param inbuffer sources of characters to convert
+    * @param inpos index of initial character ininbuffer to convert
+    * @param inlength number of characters to convert
+    * @return number of chars converted
+    * Also, this.count is increment by the number of bytes converted.
+    */
+  public abstract int write (char[] inbuffer, int inpos, int inlength);
+}
--- a/libjava/gnu/gcj/convert/gen-from-JIS.c
+++ b/libjava/gnu/gcj/convert/gen-from-JIS.c
@ -0,0 +1,154 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+#include <stdio.h>
+struct chval
+{
+  unsigned char b1;            /* 1st byte */
+  unsigned char b2;            /* 2nd byte */
+  unsigned short uc;  /* unicode value */
+};
+
+#define MAP(B1, B2, C) { B1, B2, C },
+
+struct chval chtab_0208[] = {
+#include "JIS0208.h"
+  { 255, 255, 0}
+};
+
+struct chval chtab_0212[] = {
+#include "JIS0212.h"
+  { 255, 255, 0}
+};
+#undef MAP
+
+struct chval sorted[] = {
+#define MAP(B1, B2, C) { B1, B2, C },
+#include "JIS0208.h"
+#undef MAP
+#define MAP(B1, B2, C) { 0x80|B1, B2, C },
+#include "JIS0212.h"
+#undef MAP
+};
+
+struct chval *chtab;
+
+int
+compare (void *p1, void *p2)
+{
+  struct chval *c1 = (struct chval *) p1;
+  struct chval *c2 = (struct chval *) p2;
+  return (int) c1->uc - (int) c2->uc;
+}
+
+int
+main(int argc, char** argv)
+{
+  FILE *out = stdout;
+  unsigned min1 = 256, max1 = 0, min2 = 256, max2 = 0, count = 0;
+  unsigned short low1_uc = 0xFFFF, high1_uc = 0;
+  unsigned short low2_uc = 0xFFFF, high2_uc = 0;
+  int i;  int row, col;
+  if (strcmp (argv[1], "JIS0208") == 0)
+    chtab = chtab_0208;
+  else if (strcmp (argv[1], "JIS0212") == 0)
+    chtab = chtab_0212;
+  else if (strcmp (argv[1], "toJIS") == 0)
+    {
+      int i;
+      int count = sizeof(sorted)/sizeof(struct chval);
+      qsort (sorted, count, sizeof(struct chval),
+	     compare);
+      for (i = 0;  i < count;  i++)
+	{
+	  fprintf (out, "  0x%04x -> 0x%02x, 0x%02x\n",
+		   sorted[i].uc, sorted[i].b1, sorted[i].b2);
+	}
+      exit(0);
+    }
+  else
+    {
+      fprintf (stderr, "bad argument!");
+      exit (-1);
+    }
+  for (i = 0;  chtab[i].b1 != 255; i++)
+    {
+      if (chtab[i].b1 < min1) min1 = chtab[i].b1;
+      if (chtab[i].b2 < min2) min2 = chtab[i].b2;
+      if (chtab[i].b1 > max1) max1 = chtab[i].b1;
+      if (chtab[i].b2 > max2) max2 = chtab[i].b2;
+      count++;
+    }
+  fprintf(stderr, "1st byte ranges from %d to %d.\n", min1, max1);
+  fprintf(stderr, "2nd byte ranges from %d to %d.\n", min2, max2);
+
+  fprintf(out,"/* This file is automatically generated from %s.TXT. */\n",
+	  argv[1]);
+  fprintf(out, "unsigned short %s_to_Unicode[%d][%d] = {\n",
+	  argv[1], max1 - min1 + 1,  max2 - min2 + 1);
+  i = 0;
+  for (row = min1;  row <= max1;  row++)
+    {
+      fprintf(out, "/* 1st byte: %d */ { ", row);
+      if (row < chtab[i].b1)
+	{
+	  fprintf(out, "0 }, /* unused row */\n");
+	}
+      else if (row > chtab[i].b1)
+	{
+	  fprintf (stderr, "error - char table out of order!\n");
+	  exit (-1);
+	}
+      else
+	{
+	  fprintf(out, "\n");
+	  for (col = min2;  col <= max2;  col++)
+	    {
+	      if (row == chtab[i].b1 && col == chtab[i].b2)
+		{
+		  unsigned uc = chtab[i].uc;
+		  if (uc < 0x2000)
+		    {
+		      if (uc > high1_uc)
+			high1_uc = uc;
+		      if (uc < low1_uc)
+			low1_uc = uc;
+		    }
+		  else
+		    {
+		      if (uc > high2_uc)
+			high2_uc = uc;
+		      if (uc < low2_uc)
+			low2_uc = uc;
+		    }
+		  fprintf (out, "  /* 2nd byte: %d */ 0x%04x",
+			   chtab[i].b2, uc);
+		  i++;
+		}
+	      else if (row < chtab[i].b1
+		  || (row == chtab[i].b1 && col < chtab[i].b2))
+		{
+		  fprintf (out, "  0");
+		}
+	      else
+		{
+		  fprintf (stderr, "error - char table our of order!\n");
+		  exit (-1);
+		}
+	      if (col != max2)
+		fprintf (out, ",\n");
+	    }
+	  fprintf(out, row == max1 ? "}\n" : "},\n");
+	}
+    }
+  fprintf(out, "};\n");
+  fprintf(stderr, "total number of characters is %d.\n", count);
+  fprintf(stderr, "Range is 0x%04x-0x%04x and 0x%04x-0x%04x.\n",
+	  low1_uc, high1_uc, low2_uc, high2_uc);
+  return 0;
+}
--- a/libjava/gnu/gcj/convert/natInput_EUCJIS.cc
+++ b/libjava/gnu/gcj/convert/natInput_EUCJIS.cc
@ -0,0 +1,101 @@
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+#include <config.h>
+#include <cni.h>
+#include <gnu/gcj/convert/Input_EUCJIS.h>
+
+#define ERROR_CHAR 0xFFFD
+
+extern unsigned short JIS0208_to_Unicode[84][94];
+extern unsigned short JIS0212_to_Unicode[76][94];
+
+jint
+gnu::gcj::convert::Input_EUCJIS::read(jcharArray outbuffer, jint outpos,
+				      jint outlength)
+{
+  jint start_outpos = outpos;
+  for (;;)
+    {
+      if (outpos >= outlength)
+	break;
+      if (inpos >= inlength)
+	break;
+      int b = ((unsigned char*) elements(inbuffer))[inpos++];
+      if (codeset == 0)  // ASCII or JIS-Roman
+	{
+	  if (b < 128)
+	    {
+#if 0
+	      // Technically, we should translate 0x5c to Yen symbol;
+	      // in practice, it is not clear.
+	      if (b == 0x5c)
+		b = 0x00A5;  // Yen sign.
+#endif
+	      elements(outbuffer)[outpos++] = (char) b;
+	    }
+	  else
+	    {
+	      if (b == 0x8E) // SS2
+		codeset = 2;
+	      else if (b == 0x8F) // SS3
+		codeset = 3;
+	      else
+		{
+		  codeset = 1;
+		  first_byte = b;
+		}
+	    }
+	}
+      else if (codeset == 1) // JIS X 0208:1997
+	{
+	  first_byte -= 0x80 + 33;
+	  b -= 0x80 + 33;
+	  if ((unsigned) first_byte >= 84 || (unsigned) b >= 94)
+	    b = ERROR_CHAR;
+	  else
+	    {
+	      b = JIS0208_to_Unicode[first_byte][b];
+	      if (b == 0)
+		b = ERROR_CHAR;
+	    }
+	  elements(outbuffer)[outpos++] = b;
+	  codeset = 0;
+	}
+      else if (codeset == 2) // Half-width katakana
+	{
+	  if (b >= 0xA1 && b <= 0xDF)
+	    b += 0xFF61 - 0xA1;
+	  else
+	    b = ERROR_CHAR;
+	  elements(outbuffer)[outpos++] = b;
+	  codeset = 0;
+	}
+      else if (codeset == 3) // second byte of JIS X 0212-1990
+	{
+	  first_byte = b;
+	  codeset = 4;
+	}
+      else // codeset == 4 // third byte of JIS X 0212-1990
+	{
+	  first_byte -= 0x80 + 34;
+	  b -= 0x80 + 33;
+	  if ((unsigned) first_byte >= 76 || (unsigned) b >= 94)
+	    b = ERROR_CHAR;
+	  else
+	    {
+	      b = JIS0208_to_Unicode[first_byte][b];
+	      if (b == 0)
+		b = ERROR_CHAR;
+	    }
+	  elements(outbuffer)[outpos++] = b;
+	  codeset = 0;
+	}
+    }
+  return outpos - start_outpos;
+}
--- a/libjava/gnu/gcj/protocol/http/Connection.java
+++ b/libjava/gnu/gcj/protocol/http/Connection.java
@ -0,0 +1,285 @@
+// Connection.java - Implementation of HttpURLConnection for http protocol.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.protocol.http;
+
+import java.net.*;
+import java.io.*;
+import java.util.Vector;
+import java.util.Hashtable;
+import java.util.Enumeration;
+
+/**
+ * @author Warren Levy <warrenl@cygnus.com>
+ * @date March 29, 1999.
+ */
+
+/**
+ * Written using on-line Java Platform 1.2 API Specification, as well
+ * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
+ * Status:  Minimal subset of functionality.  Proxies and Redirects
+ *	not yet handled.  FileNameMap handling needs to be considered.
+ *	useCaches, ifModifiedSince, and allowUserInteraction need
+ *	consideration as well as doInput and doOutput.
+ */
+
+class Connection extends HttpURLConnection
+{
+  protected Socket sock = null;
+  private static Hashtable defRequestProperties = new Hashtable();
+  private Hashtable requestProperties;
+  private Hashtable hdrHash = new Hashtable();
+  private Vector hdrVec = new Vector();
+  private boolean gotHeaders = false;
+  private BufferedInputStream bufferedIn;
+
+  public Connection(URL url)
+  {
+    super(url);
+    requestProperties = (Hashtable) defRequestProperties.clone();
+  }
+
+  // Override method in URLConnection.
+  public static void setDefaultRequestProperty(String key, String value)
+  {
+    defRequestProperties.put(key, value);
+  }
+
+  // Override method in URLConnection.
+  public static String getDefaultRequestProperty(String key)
+  {
+    return (String) defRequestProperties.get(key);
+  }
+
+  // Override method in URLConnection.
+  public void setRequestProperty(String key, String value)
+  {
+    if (connected)
+      throw new IllegalAccessError("Connection already established.");
+
+    requestProperties.put(key, value);
+  }
+
+  // Override method in URLConnection.
+  public String getRequestProperty(String key)
+  {
+    if (connected)
+      throw new IllegalAccessError("Connection already established.");
+
+    return (String) requestProperties.get(key);
+  }
+
+  // Implementation of abstract method.
+  public void connect() throws IOException
+  {
+    // Call is ignored if already connected.
+    if (connected)
+      return;
+
+    // Get address and port number.
+    int port;
+    InetAddress destAddr = InetAddress.getByName(url.getHost());
+    if ((port = url.getPort()) == -1)
+      port = 80;
+
+    // Open socket and output stream.
+    sock = new Socket(destAddr, port);
+    PrintWriter out = new PrintWriter(sock.getOutputStream());
+
+    // Send request including any request properties that were set.
+    out.print(getRequestMethod() + " " + url.getFile() + " HTTP/1.1\n");
+    out.print("Host: " + url.getHost() + ":" + port + "\n");
+    Enumeration reqKeys = requestProperties.keys();
+    Enumeration reqVals = requestProperties.elements();
+    while (reqKeys.hasMoreElements())
+      out.print(reqKeys.nextElement() + ": " + reqVals.nextElement() + "\n");
+    out.print("\n");
+    out.flush();
+    connected = true;
+  }
+
+  // Implementation of abstract method.
+  public void disconnect()
+  {
+    if (sock != null)
+      {
+	try
+	  {
+	    sock.close();
+	  }
+	catch (IOException ex)
+	  {
+	    ; // Ignore errors in closing socket.
+	  }
+	sock = null;
+      }
+    connected = false;
+  }
+
+  // TODO: public boolean usingProxy()
+  public boolean usingProxy()
+  {
+    throw new InternalError("HttpURLConnection.usingProxy not implemented");
+  }
+
+  // Override default method in URLConnection.
+  public InputStream getInputStream() throws IOException
+  {
+    if (!connected)
+      connect();
+
+    if (bufferedIn == null)
+      bufferedIn = new BufferedInputStream(sock.getInputStream());
+    return bufferedIn;
+  }
+
+  // Override default method in URLConnection.
+  public OutputStream getOutputStream() throws IOException
+  {
+    if (!connected)
+      connect();
+
+    return sock.getOutputStream();
+  }
+
+  // Override default method in URLConnection.
+  public String getHeaderField(String name)
+  {
+    try
+      {
+	getHttpHeaders();
+      }
+    catch (IOException x)
+      {
+	return null;
+      }
+    return (String) hdrHash.get(name.toLowerCase());
+  }
+
+  // Override default method in URLConnection.
+  public String getHeaderField(int n)
+  {
+    try
+      {
+	getHttpHeaders();
+      }
+    catch (IOException x)
+      {
+	return null;
+      }
+    if (n < hdrVec.size())
+      return getField((String) hdrVec.elementAt(n));
+
+    return null;
+  }
+
+  // Override default method in URLConnection.
+  public String getHeaderFieldKey(int n)
+  {
+    try
+      {
+	getHttpHeaders();
+      }
+    catch (IOException x)
+      {
+	return null;
+      }
+    if (n < hdrVec.size())
+      return getKey((String) hdrVec.elementAt(n));
+
+    return null;
+  }
+
+  private String getKey(String str)
+  {
+    if (str == null)
+      return null;
+    int index = str.indexOf(':');
+    if (index >= 0)
+      return str.substring(0, index);
+    else
+      return null;
+  }
+
+  private String getField(String str)
+  {
+    if (str == null)
+      return null;
+    int index = str.indexOf(':');
+    if (index >= 0)
+      return str.substring(index + 1).trim();
+    else
+      return str;
+  }
+
+  private void getHttpHeaders() throws IOException
+  {
+    if (gotHeaders)
+      return;
+    gotHeaders = true;
+
+    connect();
+
+    // Originally tried using a BufferedReader here to take advantage of
+    // the readLine method and avoid the following, but the buffer read
+    // past the end of the headers so the first part of the content was lost.
+    // It is probably more robust than it needs to be, e.g. the byte[]
+    // is unlikely to overflow and a '\r' should always be followed by a '\n',
+    // but it is better to be safe just in case.
+    if (bufferedIn == null)
+      bufferedIn = new BufferedInputStream(sock.getInputStream());
+
+    int buflen = 100;
+    byte[] buf = new byte[buflen];
+    String line = "";
+    boolean gotnl = false;
+    byte[] ch = new byte[1];
+    ch[0] = (byte) '\n';
+    while (true)
+      {
+	// Check for leftover byte from non-'\n' after a '\r'.
+	if (ch[0] != '\n')
+	  line = line + '\r' + new String(ch, 0, 1);
+
+	int i;
+	for (i = 0; i < buflen; i++)
+	  {
+	    bufferedIn.read(buf, i, 1);
+	    if (buf[i] == '\r')
+	      {
+	        bufferedIn.read(ch, 0, 1);
+		if (ch[0] == '\n')
+		  gotnl = true;
+		break;
+	      }
+	  }
+	line = line + new String(buf, 0, i);
+
+	// A '\r' '\n' combo indicates the end of the header entry.
+	// If it wasn't found, cycle back through the loop and append
+	// to 'line' until one is found.
+	if (gotnl)
+	  {
+	    // A zero length entry signals the end of the headers.
+	    if (line.length() == 0)
+	      break;
+
+	    // Store the header and reinitialize for next cycle.
+	    hdrVec.addElement(line);
+	    String key = getKey(line);
+	    if (key != null)
+	      hdrHash.put(key.toLowerCase(), getField(line));
+	    line = "";
+	    ch[0] = (byte) '\n';
+	    gotnl = false;
+	  }
+      }
+  }
+}
--- a/libjava/gnu/gcj/protocol/http/Handler.java
+++ b/libjava/gnu/gcj/protocol/http/Handler.java
@ -0,0 +1,35 @@
+// Handler.java - URLStreamHandler for http protocol.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.protocol.http;
+
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLStreamHandler;
+import java.io.IOException;
+
+/**
+ * @author Warren Levy <warrenl@cygnus.com>
+ * @date March 26, 1999.
+ */
+
+/**
+ * Written using on-line Java Platform 1.2 API Specification, as well
+ * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
+ * Status:  Minimal functionality.
+ */
+
+public class Handler extends URLStreamHandler
+{
+  protected URLConnection openConnection(URL url) throws IOException
+  {
+    return new Connection(url);
+  }
+}
--- a/libjava/gnu/gcj/text/BaseBreakIterator.java
+++ b/libjava/gnu/gcj/text/BaseBreakIterator.java
@ -0,0 +1,82 @@
+// Base class for default BreakIterators.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.text;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 22, 1999
+ */
+
+public abstract class BaseBreakIterator extends BreakIterator
+{
+  public int current ()
+  {
+    return iter.getIndex();
+  }
+
+  public int first ()
+  {
+    iter.first();
+    return iter.getBeginIndex();
+  }
+
+  public int following (int pos)
+  {
+    int save = iter.getIndex();
+    iter.setIndex(pos);
+    int r = next ();
+    iter.setIndex(save);
+    return r;
+  }
+
+  public CharacterIterator getText ()
+  {
+    return iter;
+  }
+
+  public int last ()
+  {
+    iter.last();
+    return iter.getEndIndex();
+  }
+
+  public int next (int n)
+  {
+    int r = iter.getIndex ();
+    if (n > 0)
+      {
+	while (n > 0 && r != DONE)
+	  {
+	    r = next ();
+	    --n;
+	  }
+      }
+    else if (n < 0)
+      {
+	while (n < 0 && r != DONE)
+	  {
+	    r = previous ();
+	    ++n;
+	  }
+      }
+    return r;
+  }
+
+  public void setText (CharacterIterator newText)
+  {
+    iter = newText;
+  }
+
+  protected CharacterIterator iter;
+}
--- a/libjava/gnu/gcj/text/CharacterBreakIterator.java
+++ b/libjava/gnu/gcj/text/CharacterBreakIterator.java
@ -0,0 +1,188 @@
+// Default character BreakIterator.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.text;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 19, 1999
+ * Written using The Unicode Standard, Version 2.0.
+ */
+
+public class CharacterBreakIterator extends BaseBreakIterator
+{
+  // Hangul Jamo constants from Unicode book.
+  private static final int LBase = 0x1100;
+  private static final int VBase = 0x1161;
+  private static final int TBase = 0x11a7;
+  private static final int LCount = 19;
+  private static final int VCount = 21;
+  private static final int TCount = 28;
+
+  // Information about surrogates.
+  private static final int highSurrogateStart = 0xD800;
+  private static final int highSurrogateEnd = 0xDBFF;
+  private static final int lowSurrogateStart = 0xDC00;
+  private static final int lowSurrogateEnd = 0xDFFF;
+
+  public Object clone ()
+  {
+    return new CharacterBreakIterator (this);
+  }
+
+  public CharacterBreakIterator ()
+  {
+    iter = null;		// FIXME?
+  }
+
+  private CharacterBreakIterator (CharacterBreakIterator other)
+  {
+    iter = (CharacterIterator) other.iter.clone();
+  }
+
+  // Some methods to tell us different properties of characters.
+  private final boolean isL (char c)
+  {
+    return c >= LBase && c <= LBase + LCount;
+  }
+  private final boolean isV (char c)
+  {
+    return c >= VBase && c <= VBase + VCount;
+  }
+  private final boolean isT (char c)
+  {
+    return c >= TBase && c <= TBase + TCount;
+  }
+  private final boolean isLVT (char c)
+  {
+    return isL (c) || isV (c) || isT (c);
+  }
+  private final boolean isHighSurrogate (char c)
+  {
+    return c >= highSurrogateStart && c <= highSurrogateEnd;
+  }
+  private final boolean isLowSurrogate (char c)
+  {
+    return c >= lowSurrogateStart && c <= lowSurrogateEnd;
+  }
+
+  public int next ()
+  {
+    int end = iter.getEndIndex();
+    if (iter.getIndex() == end)
+      return DONE;
+
+    char c;
+    for (char prev = CharacterIterator.DONE; iter.getIndex() < end; prev = c)
+      {
+	c = iter.next();
+	if (c == CharacterIterator.DONE)
+	  break;
+	int type = Character.getType(c);
+
+	// Break after paragraph separators.
+	if (type == Character.PARAGRAPH_SEPARATOR)
+	  break;
+
+	// Now we need some lookahead.
+	char ahead = iter.next();
+	iter.previous();
+	if (ahead == CharacterIterator.DONE)
+	  break;
+	int aheadType = Character.getType(ahead);
+
+	if (aheadType != Character.NON_SPACING_MARK
+	    && ! isLowSurrogate (ahead)
+	    && ! isLVT (ahead))
+	  break;
+	if (! isLVT (c) && isLVT (ahead))
+	  break;
+	if (isL (c) && ! isLVT (ahead)
+	    && aheadType != Character.NON_SPACING_MARK)
+	  break;
+	if (isV (c) && ! isV (ahead) && !isT (ahead)
+	    && aheadType != Character.NON_SPACING_MARK)
+	  break;
+	if (isT (c) && ! isT (ahead)
+	    && aheadType != Character.NON_SPACING_MARK)
+	  break;
+
+	if (! isHighSurrogate (c) && isLowSurrogate (ahead))
+	  break;
+	if (isHighSurrogate (c) && ! isLowSurrogate (ahead))
+	  break;
+	if (! isHighSurrogate (prev) && isLowSurrogate (c))
+	  break;
+      }
+
+    return iter.getIndex();
+  }
+
+  public int previous ()
+  {
+    if (iter.getIndex() == iter.getBeginIndex())
+      return DONE;
+
+    int start = iter.getBeginIndex();
+    while (iter.getIndex() >= iter.getBeginIndex())
+      {
+	char c = iter.previous();
+	if (c == CharacterIterator.DONE)
+	  break;
+	int type = Character.getType(c);
+
+	if (type != Character.NON_SPACING_MARK
+	    && ! isLowSurrogate (c)
+	    && ! isLVT (c))
+	  break;
+
+	// Now we need some lookahead.
+	char ahead = iter.previous();
+	if (ahead == CharacterIterator.DONE)
+	  {
+	    iter.next();
+	    break;
+	  }
+	char ahead2 = iter.previous();
+	iter.next();
+	iter.next();
+	if (ahead2 == CharacterIterator.DONE)
+	  break;
+	int aheadType = Character.getType(ahead);
+
+	if (aheadType == Character.PARAGRAPH_SEPARATOR)
+	  break;
+
+	if (isLVT (c) && ! isLVT (ahead))
+	  break;
+	if (! isLVT (c) && type != Character.NON_SPACING_MARK
+	    && isL (ahead))
+	  break;
+	if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK
+	    && isV (ahead))
+	  break;
+	if (! isT (c) && type != Character.NON_SPACING_MARK
+	    && isT (ahead))
+	  break;
+
+	if (isLowSurrogate (c) && ! isHighSurrogate (ahead))
+	  break;
+	if (! isLowSurrogate (c) && isHighSurrogate (ahead))
+	  break;
+	if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2))
+	  break;
+      }
+
+    return iter.getIndex();
+  }
+}
--- a/libjava/gnu/gcj/text/LineBreakIterator.java
+++ b/libjava/gnu/gcj/text/LineBreakIterator.java
@ -0,0 +1,168 @@
+// Default word BreakIterator.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.text;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 22, 1999
+ * Written using The Unicode Standard, Version 2.0.
+ */
+
+public class LineBreakIterator extends BaseBreakIterator
+{
+  public Object clone ()
+  {
+    return new LineBreakIterator (this);
+  }
+
+  public LineBreakIterator ()
+  {
+    iter = null;
+  }
+
+  private LineBreakIterator (LineBreakIterator other)
+  {
+    iter = (CharacterIterator) other.iter.clone();
+  }
+
+  // Some methods to tell us different properties of characters.
+  private final boolean isNb (char c)
+  {
+    return (c == 0x00a0		// NO-BREAK SPACE
+	    || c == 0x2011	// NON-BREAKING HYPHEN
+	    || c == 0xfeff);	// ZERO WITH NO-BREAK SPACE
+  }
+  private final boolean isClose (int type)
+  {
+    return (type == Character.END_PUNCTUATION
+	    // Unicode book says "comma, period, ...", which I take to
+	    // mean "Po" class.
+	    || type == Character.OTHER_PUNCTUATION);
+  }
+  private final boolean isIdeo (char c)
+  {
+    return (c >= 0x3040 && c <= 0x309f	       // Hiragana
+	    || c >= 0x30a0 && c <= 0x30ff      // Katakana
+	    || c >= 0x4e00 && c <= 0x9fff      // Han
+	    || c >= 0x3100 && c <= 0x312f);    // Bopomofo
+  }
+
+  public int next ()
+  {
+    int end = iter.getEndIndex();
+    if (iter.getIndex() == end)
+      return DONE;
+
+    while (iter.getIndex() < end)
+      {
+	char c = iter.current();
+	int type = Character.getType(c);
+
+	char n = iter.next();
+
+	if (n == CharacterIterator.DONE
+	    || type == Character.PARAGRAPH_SEPARATOR
+	    || type == Character.LINE_SEPARATOR)
+	  break;
+
+	// Handle two cases where we must scan for non-spacing marks.
+	int start = iter.getIndex();
+	if (type == Character.SPACE_SEPARATOR
+	    || type == Character.START_PUNCTUATION
+	    || isIdeo (c))
+	  {
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.NON_SPACING_MARK)
+	      n = iter.next();
+	    if (n == CharacterIterator.DONE)
+	      break;
+
+	    if (type == Character.SPACE_SEPARATOR)
+	      {
+		int nt = Character.getType(n);
+		if (nt != Character.NON_SPACING_MARK
+		    && nt != Character.SPACE_SEPARATOR
+		    && ! isNb (n))
+		  break;
+	      }
+	    else if (type == Character.START_PUNCTUATION)
+	      {
+		if (isIdeo (n))
+		  {
+		    // Open punctuation followed by non spacing marks
+		    // and then ideograph does not have a break in
+		    // it.  So skip all this.
+		    start = iter.getIndex();
+		  }
+	      }
+	    else
+	      {
+		// Ideograph preceded this character.
+		if (isClose (Character.getType(n)))
+		  break;
+	      }
+	  }
+	iter.setIndex(start);
+      }
+
+    return iter.getIndex();
+  }
+
+  public int previous ()
+  {
+    int start = iter.getBeginIndex();
+    if (iter.getIndex() == start)
+      return DONE;
+
+    while (iter.getIndex() >= start)
+      {
+	char c = iter.previous();
+	if (c == CharacterIterator.DONE)
+	  break;
+	int type = Character.getType(c);
+
+	char n = iter.previous();
+	if (n == CharacterIterator.DONE)
+	  break;
+	iter.next();
+
+	int nt = Character.getType(n);
+	// Break after paragraph separators.
+	if (nt == Character.PARAGRAPH_SEPARATOR
+	    || nt == Character.LINE_SEPARATOR)
+	  break;
+
+	// Skip non-spacing marks.
+	int init = iter.getIndex();
+	while (n != CharacterIterator.DONE && nt == Character.NON_SPACING_MARK)
+	  {
+	    n = iter.previous();
+	    nt = Character.getType(n);
+	  }
+
+	if (nt == Character.SPACE_SEPARATOR
+	    && type != Character.SPACE_SEPARATOR
+	    && type != Character.NON_SPACING_MARK
+	    && ! isNb (c))
+	  break;
+	if (! isClose (type) && isIdeo (n))
+	  break;
+	if (isIdeo (c) && nt != Character.START_PUNCTUATION)
+	  break;
+	iter.setIndex(init);
+      }
+
+    return iter.getIndex();
+  }
+}
--- a/libjava/gnu/gcj/text/LocaleData_en.java
+++ b/libjava/gnu/gcj/text/LocaleData_en.java
@ -0,0 +1,75 @@
+// Generic English locale data for java.text.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.text;
+
+import java.util.ListResourceBundle;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 4, 1999
+ */
+
+public final class LocaleData_en extends ListResourceBundle
+{
+  // These are for DateFormatSymbols.
+  static final String[] ampmsDefault = {"AM", "PM" };
+  static final String[] erasDefault = {"BC", "AD" };
+  static final String localPatternCharsDefault = "GyMdkHmsSEDFwWahKz";
+  static final String[] monthsDefault = {
+    "January", "February", "March", "April", "May", "June",
+    "July", "August", "September", "October", "November", "December", ""
+  };
+  static final String[] shortMonthsDefault = {
+    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ""
+  };
+  static final String[] shortWeekdaysDefault = {
+    "", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+  };
+  static final String[] weekdaysDefault = {
+    "", "Sunday", "Monday", "Tuesday",
+    "Wednesday", "Thursday", "Friday", "Saturday"
+  };
+
+  private static final Object[][] contents =
+  {
+    // These are for DecimalFormatSymbols.
+    { "decimalSeparator", "." },
+    { "digit", "#" },
+    { "exponential", "E" },
+    { "groupingSeparator", "," },
+    { "infinity", "\u221e" },
+    { "minusSign", "-" },
+    { "NaN", "\ufffd" },
+    { "patternSeparator", ";" },
+    { "percent", "%" },
+    { "perMill", "\u2030" },
+    { "zeroDigit", "0" },
+
+    // These are for NumberFormat.
+    { "numberFormat", "#,##0.###" },
+    { "percentFormat", "#,##0%" },
+
+    // These are for DateFormatSymbols.
+    { "ampm", ampmsDefault },
+    { "eras", erasDefault },
+    { "datePatternChars", localPatternCharsDefault },
+    { "months", monthsDefault },
+    { "shortMonths", shortMonthsDefault },
+    { "shortWeekdays", shortWeekdaysDefault },
+    { "weekdays", weekdaysDefault }
+  };
+
+  protected Object[][] getContents ()
+    {
+      return contents;
+    }
+}
--- a/libjava/gnu/gcj/text/LocaleData_en_US.java
+++ b/libjava/gnu/gcj/text/LocaleData_en_US.java
@ -0,0 +1,71 @@
+// US English locale data for java.text.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.text;
+
+import java.util.ListResourceBundle;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 4, 1999
+ */
+
+public final class LocaleData_en_US extends ListResourceBundle
+{
+  // These are for DateFormatSymbols.
+  static String[][] zoneStringsDefault = {
+    { "PST", "Pacific Standard Time", "PST",
+      /**/   "Pacific Daylight Time", "PDT", "San Francisco" },
+    { "MST", "Mountain Standard Time", "MST",
+      /**/   "Mountain Daylight Time", "MDT", "Denver" },
+    { "PNT", "Mountain Standard Time", "MST",
+      /**/   "Mountain Standard Time", "MST", "Phoenix" },
+    { "CST", "Central Standard Time", "CST",
+      /**/   "Central Daylight Time", "CDT", "Chicago" },
+    { "EST", "Eastern Standard Time", "EST",
+      /**/   "Eastern Daylight Time", "EDT", "Boston" },
+    { "IET", "Eastern Standard Time", "EST",
+      /**/   "Eastern Standard Time", "EST", "Indianapolis" },
+    { "PRT", "Atlantic Standard Time", "AST",
+      /**/   "Atlantic Daylight Time", "ADT", "Halifax" },
+    { "HST", "Hawaii Standard Time", "HST",
+      /**/   "Hawaii Daylight Time", "HDT", "Honolulu" },
+    { "AST", "Alaska Standard Time", "AST",
+      /**/   "Alaska Daylight Time", "ADT", "Anchorage" }
+  };
+
+  private static final Object[][] contents =
+  {
+    // These are for DecimalFormatSymbols.
+    { "currency", "$" },
+    { "intlCurrencySymbol", "$" },	      // FIXME?
+
+    // These are for NumberFormat.
+    { "currencyFormat", "$#,##0.00;($#,##0.00)" },
+
+    // These are for DateFormatSymbols.
+    { "zoneStrings", zoneStringsDefault },
+
+    // These are for DateFormat.
+    { "shortDateFormat", "M/d/yy" },	      // Java's Y2K bug.
+    { "mediumDateFormat", "d-MMM-yy" },
+    { "longDateFormat", "MMMM d, yyyy" },
+    { "fullDateFormat", "EEEE MMMM d, yyyy G" },
+    { "shortTimeFormat", "h:mm a" },
+    { "mediumTimeFormat", "h:mm:ss a" },
+    { "longTimeFormat", "h:mm:ss a z" },
+    { "fullTimeFormat", "h:mm:ss;S 'o''clock' a z" }
+  };
+
+  protected Object[][] getContents ()
+    {
+      return contents;
+    }
+}
--- a/libjava/gnu/gcj/text/SentenceBreakIterator.java
+++ b/libjava/gnu/gcj/text/SentenceBreakIterator.java
@ -0,0 +1,226 @@
+// Default sentence BreakIterator.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.text;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 23, 1999
+ * Written using The Unicode Standard, Version 2.0.
+ */
+
+public class SentenceBreakIterator extends BaseBreakIterator
+{
+  public Object clone ()
+  {
+    return new SentenceBreakIterator (this);
+  }
+
+  public SentenceBreakIterator ()
+  {
+    iter = null;
+  }
+
+  private SentenceBreakIterator (SentenceBreakIterator other)
+  {
+    iter = (CharacterIterator) other.iter.clone();
+  }
+
+  public int next ()
+  {
+    int end = iter.getEndIndex();
+    if (iter.getIndex() == end)
+      return DONE;
+
+    while (iter.getIndex() < end)
+      {
+	char c = iter.current();
+	if (c == CharacterIterator.DONE)
+	  break;
+	int type = Character.getType(c);
+
+	char n = iter.next();
+	if (n == CharacterIterator.DONE)
+	  break;
+
+	// Always break after paragraph separator.
+	if (type == Character.PARAGRAPH_SEPARATOR)
+	  break;
+
+	if (c == '!' || c == '?')
+	  {
+	    // Skip close punctuation.
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.END_PUNCTUATION)
+	      n = iter.next();
+	    // Skip spaces.
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.SPACE_SEPARATOR)
+	      n = iter.next();
+	    // Skip optional paragraph separator.
+	    if (n != CharacterIterator.DONE
+		&& Character.getType(n) == Character.PARAGRAPH_SEPARATOR)
+	      n = iter.next();
+
+	    // There's always a break somewhere after `!' or `?'.
+	    break;
+	  }
+
+	if (c == '.')
+	  {
+	    int save = iter.getIndex();
+	    // Skip close punctuation.
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.END_PUNCTUATION)
+	      n = iter.next();
+	    // Skip spaces.  We keep count because we need at least
+	    // one for this period to represent a terminator.
+	    int spcount = 0;
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.SPACE_SEPARATOR)
+	      {
+		n = iter.next();
+		++spcount;
+	      }
+	    if (spcount > 0)
+	      {
+		int save2 = iter.getIndex();
+		// Skip over open puncutation.
+		while (n != CharacterIterator.DONE
+		       && Character.getType(n) == Character.START_PUNCTUATION)
+		  n = iter.next();
+		// Next character must not be lower case.
+		if (n == CharacterIterator.DONE
+		    || ! Character.isLowerCase(n))
+		  {
+		    iter.setIndex(save2);
+		    break;
+		  }
+	      }
+	    iter.setIndex(save);
+	  }
+      }
+
+    return iter.getIndex();
+  }
+
+  private final int previous_internal ()
+  {
+    int start = iter.getBeginIndex();
+    if (iter.getIndex() == start)
+      return DONE;
+
+    while (iter.getIndex() >= start)
+      {
+	char c = iter.previous();
+	if (c == CharacterIterator.DONE)
+	  break;
+
+	char n = iter.previous();
+	if (n == CharacterIterator.DONE)
+	  break;
+	iter.next();
+	int nt = Character.getType(n);
+
+	if (! Character.isLowerCase(c)
+	    && (nt == Character.START_PUNCTUATION
+		|| nt == Character.SPACE_SEPARATOR))
+	  {
+	    int save = iter.getIndex();
+	    int save_nt = nt;
+	    char save_n = n;
+	    // Skip open punctuation.
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.START_PUNCTUATION)
+	      n = iter.previous();
+	    if (n == CharacterIterator.DONE)
+	      break;
+	    if (Character.getType(n) == Character.SPACE_SEPARATOR)
+	      {
+		// Must have at least once space after the `.'.
+		int save2 = iter.getIndex();
+		while (n != CharacterIterator.DONE
+		       && Character.getType(n) == Character.SPACE_SEPARATOR)
+		  n = iter.previous();
+		// Skip close punctuation.
+		while (n != CharacterIterator.DONE
+		       && Character.getType(n) == Character.END_PUNCTUATION)
+		  n = iter.previous();
+		if (n == CharacterIterator.DONE || n == '.')
+		  {
+		    // Communicate location of actual end.
+		    period = iter.getIndex();
+		    iter.setIndex(save2);
+		    break;
+		  }
+	      }
+	    iter.setIndex(save);
+	    nt = save_nt;
+	    n = save_n;
+	  }
+
+	if (nt == Character.PARAGRAPH_SEPARATOR)
+	  {
+	    // Communicate location of actual end.
+	    period = iter.getIndex();
+	    break;
+	  }
+	else if (nt == Character.SPACE_SEPARATOR
+		 || nt == Character.END_PUNCTUATION)
+	  {
+	    int save = iter.getIndex();
+	    // Skip spaces.
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.SPACE_SEPARATOR)
+	      n = iter.previous();
+	    // Skip close punctuation.
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.END_PUNCTUATION)
+	      n = iter.previous();
+	    int here = iter.getIndex();
+	    iter.setIndex(save);
+	    if (n == CharacterIterator.DONE || n == '!' || n == '?')
+	      {
+		// Communicate location of actual end.
+		period = here;
+		break;
+	      }
+	  }
+	else if (n == '!' || n == '?')
+	  {
+	    // Communicate location of actual end.
+	    period = iter.getIndex();
+	    break;
+	  }
+      }
+
+    return iter.getIndex();
+  }
+
+  public int previous ()
+  {
+    // We want to skip over the first sentence end to the second one.
+    // However, at the end of the string we want the first end.
+    int here = iter.getIndex();
+    period = here;
+    int first = previous_internal ();
+    if (here == iter.getEndIndex() || first == DONE)
+      return first;
+    iter.setIndex(period);
+    return previous_internal ();
+  }
+
+  // This is used for communication between previous and
+  // previous_internal.
+  private int period;
+}
--- a/libjava/gnu/gcj/text/WordBreakIterator.java
+++ b/libjava/gnu/gcj/text/WordBreakIterator.java
@ -0,0 +1,224 @@
+// Default word BreakIterator.
+
+/* Copyright (C) 1999  Cygnus Solutions
+
+   This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
+details.  */
+
+package gnu.gcj.text;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+/**
+ * @author Tom Tromey <tromey@cygnus.com>
+ * @date March 22, 1999
+ * Written using The Unicode Standard, Version 2.0.
+ */
+
+public class WordBreakIterator extends BaseBreakIterator
+{
+  public Object clone ()
+  {
+    return new WordBreakIterator (this);
+  }
+
+  public WordBreakIterator ()
+  {
+    iter = null;
+  }
+
+  private WordBreakIterator (WordBreakIterator other)
+  {
+    iter = (CharacterIterator) other.iter.clone();
+  }
+
+  // Some methods to tell us different properties of characters.
+  private final boolean isHira (char c)
+  {
+    return c >= 0x3040 && c <= 0x309f;
+  }
+  private final boolean isKata (char c)
+  {
+    return c >= 0x30a0 && c <= 0x30ff;
+  }
+  private final boolean isHan (char c)
+  {
+    return c >= 0x4e00 && c <= 0x9fff;
+  }
+
+  public int next ()
+  {
+    int end = iter.getEndIndex();
+    if (iter.getIndex() == end)
+      return DONE;
+
+    while (iter.getIndex() < end)
+      {
+	char c = iter.current();
+	if (c == CharacterIterator.DONE)
+	  break;
+	int type = Character.getType(c);
+
+	char n = iter.next();
+	if (n == CharacterIterator.DONE)
+	  break;
+
+	// Break after paragraph separators.
+	if (type == Character.PARAGRAPH_SEPARATOR
+	    || type == Character.LINE_SEPARATOR)
+	  break;
+
+	// Break between letters and non-letters.
+	// FIXME: we treat apostrophe as part of a word.  This
+	// is an English-ism.
+	boolean is_letter = Character.isLetter(c);
+	if (c != '\'' && ! is_letter && type != Character.NON_SPACING_MARK
+	    && Character.isLetter(n))
+	  break;
+
+	// Always break after certain symbols, such as punctuation.
+	// This heuristic is derived from hints in the JCL book and is
+	// not part of Unicode.  It seems to be right, however.
+	// FIXME: we treat apostrophe as part of a word.  This
+	// is an English-ism.
+	if (c != '\''
+	    && (type == Character.DASH_PUNCTUATION
+		|| type == Character.START_PUNCTUATION
+		|| type == Character.END_PUNCTUATION
+		|| type == Character.CONNECTOR_PUNCTUATION
+		|| type == Character.OTHER_PUNCTUATION
+		|| type == Character.MATH_SYMBOL
+		|| type == Character.CURRENCY_SYMBOL
+		|| type == Character.MODIFIER_SYMBOL
+		|| type == Character.OTHER_SYMBOL
+		|| type == Character.FORMAT
+		|| type == Character.CONTROL))
+	  break;
+
+	boolean is_hira = isHira (c);
+	boolean is_kata = isKata (c);
+	boolean is_han = isHan (c);
+
+	// Special case Japanese.
+	if (! is_hira && ! is_kata && ! is_han
+	    && type != Character.NON_SPACING_MARK
+	    && (isHira (n) || isKata (n) || isHan (n)))
+	  break;
+
+	if (is_hira || is_kata || is_han || is_letter)
+	  {
+	    // Now we need to do some lookahead.  We might need to do
+	    // quite a bit of lookahead, so we save our position and
+	    // restore it later.
+	    int save = iter.getIndex();
+	    // Skip string of non spacing marks.
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.NON_SPACING_MARK)
+	      n = iter.next();
+	    if (n == CharacterIterator.DONE)
+	      break;
+	    if ((is_hira && ! isHira (n))
+		|| (is_kata && ! isHira (n) && ! isKata (n))
+		|| (is_han && ! isHira (n) && ! isHan (n))
+		// FIXME: we treat apostrophe as part of a word.  This
+		// is an English-ism.
+		|| (is_letter && ! Character.isLetter(n) && n != '\''))
+	      break;
+	    iter.setIndex(save);
+	  }
+      }
+
+    return iter.getIndex();
+  }
+
+  public int previous ()
+  {
+    int start = iter.getBeginIndex();
+    if (iter.getIndex() == start)
+      return DONE;
+
+    while (iter.getIndex() >= start)
+      {
+	char c = iter.previous();
+	if (c == CharacterIterator.DONE)
+	  break;
+
+	boolean is_hira = isHira (c);
+	boolean is_kata = isKata (c);
+	boolean is_han = isHan (c);
+	boolean is_letter = Character.isLetter(c);
+
+	char n = iter.previous();
+	if (n == CharacterIterator.DONE)
+	  break;
+	iter.next();
+	int type = Character.getType(n);
+	// Break after paragraph separators.
+	if (type == Character.PARAGRAPH_SEPARATOR
+	    || type == Character.LINE_SEPARATOR)
+	  break;
+
+	// Break between letters and non-letters.
+	// FIXME: we treat apostrophe as part of a word.  This
+	// is an English-ism.
+	if (n != '\'' && ! Character.isLetter(n)
+	    && type != Character.NON_SPACING_MARK
+	    && is_letter)
+	  break;
+
+	// Always break after certain symbols, such as punctuation.
+	// This heuristic is derived from hints in the JCL book and is
+	// not part of Unicode.  It seems to be right, however.
+	// FIXME: we treat apostrophe as part of a word.  This
+	// is an English-ism.
+	if (n != '\''
+	    && (type == Character.DASH_PUNCTUATION
+		|| type == Character.START_PUNCTUATION
+		|| type == Character.END_PUNCTUATION
+		|| type == Character.CONNECTOR_PUNCTUATION
+		|| type == Character.OTHER_PUNCTUATION
+		|| type == Character.MATH_SYMBOL
+		|| type == Character.CURRENCY_SYMBOL
+		|| type == Character.MODIFIER_SYMBOL
+		|| type == Character.OTHER_SYMBOL
+		|| type == Character.FORMAT
+		|| type == Character.CONTROL))
+	  break;
+
+	// Special case Japanese.
+	if ((is_hira || is_kata || is_han)
+	    && ! isHira (n) && ! isKata (n) && ! isHan (n)
+	    && type != Character.NON_SPACING_MARK)
+	  break;
+
+	// We might have to skip over non spacing marks to see what's
+	// on the other side.
+	if (! is_hira || (! is_letter && c != '\''))
+	  {
+	    int save = iter.getIndex();
+	    while (n != CharacterIterator.DONE
+		   && Character.getType(n) == Character.NON_SPACING_MARK)
+	      n = iter.previous();
+	    iter.setIndex(save);
+	    // This is a strange case: a bunch of non-spacing marks at
+	    // the beginning.  We treat the current location as a word
+	    // break.
+	    if (n == CharacterIterator.DONE)
+	      break;
+	    if ((isHira (n) && ! is_hira)
+		|| (isKata (n) && ! is_hira && ! is_kata)
+		|| (isHan (n) && ! is_hira && ! is_han)
+		// FIXME: we treat apostrophe as part of a word.  This
+		// is an English-ism.
+		|| (! is_letter && c != '\'' && Character.isLetter(n)))
+	      break;
+	  }
+      }
+
+    return iter.getIndex();
+  }
+}