Initial revision
From-SVN: r26263
This commit is contained in:
parent
140fa895c6
commit
ee9dd3721b
370 changed files with 173494 additions and 0 deletions
105
libjava/gnu/gcj/convert/BytesToUnicode.java
Normal file
105
libjava/gnu/gcj/convert/BytesToUnicode.java
Normal file
|
@ -0,0 +1,105 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
public abstract class BytesToUnicode
|
||||
{
|
||||
/** Buffer to read bytes from.
|
||||
* The characters inbuffer[inpos] ... inbuffer[inlength-1] are available. */
|
||||
public byte[] inbuffer;
|
||||
/** Starting index in buffer to read bytes from. */
|
||||
public int inpos;
|
||||
/** End of valid bytes in buffer. */
|
||||
public int inlength;
|
||||
|
||||
static Class defaultDecodingClass;
|
||||
|
||||
static synchronized void getDefaultDecodingClass()
|
||||
{
|
||||
// Test (defaultDecodingClass == null) again in case of race condition.
|
||||
if (defaultDecodingClass == null)
|
||||
{
|
||||
String encoding = System.getProperty("file.encoding");
|
||||
String className = "gnu.gcj.convert.Input_"+encoding;
|
||||
try
|
||||
{
|
||||
defaultDecodingClass = Class.forName(className);
|
||||
}
|
||||
catch (ClassNotFoundException ex)
|
||||
{
|
||||
throw new NoClassDefFoundError("missing default encoding "
|
||||
+ encoding + " (class "
|
||||
+ className + " not found)");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public abstract String getName();
|
||||
|
||||
public static BytesToUnicode getDefaultDecoder()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (defaultDecodingClass == null)
|
||||
getDefaultDecodingClass();
|
||||
return (BytesToUnicode) defaultDecodingClass.newInstance();
|
||||
}
|
||||
catch (Throwable ex)
|
||||
{
|
||||
return new Input_8859_1();
|
||||
}
|
||||
}
|
||||
|
||||
/** Get a byte-stream->char-stream converter given an encoding name. */
|
||||
public static BytesToUnicode getDecoder (String encoding)
|
||||
throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
String className = "gnu.gcj.convert.Input_"+encoding;
|
||||
Class decodingClass;
|
||||
try
|
||||
{
|
||||
decodingClass = Class.forName(className);
|
||||
return (BytesToUnicode) decodingClass.newInstance();
|
||||
}
|
||||
catch (Throwable ex)
|
||||
{
|
||||
throw new java.io.UnsupportedEncodingException(encoding
|
||||
+ " (" + ex + ')');
|
||||
}
|
||||
}
|
||||
|
||||
/** Make input bytes available to the conversion.
|
||||
* @param buffer source of input bytes
|
||||
* @param pos index of first available byte
|
||||
* @param length one more than index of last available byte
|
||||
*/
|
||||
public final void setInput(byte[] buffer, int pos, int length)
|
||||
{
|
||||
inbuffer = buffer;
|
||||
inpos = pos;
|
||||
inlength = length;
|
||||
}
|
||||
|
||||
/** Convert bytes to chars.
|
||||
* Input bytes are taken from this.inbuffer. The available input
|
||||
* bytes start at inbuffer[inpos], and end at inbuffer[inlength-1].
|
||||
* @param outbuffer buffer for the converted character
|
||||
* @param outpos position in buffer to start putting converted characters
|
||||
* @param outlength the maximum number of characters to read
|
||||
* @return number of chars placed in outbuffer.
|
||||
* Also, this.inpos is incremented by the number of bytes consumed.
|
||||
*
|
||||
* (Note the asymmetry in that the input upper bound is inbuffer[inlength-1],
|
||||
* while the output upper bound is outbuffer[outpos+outlength-1]. The
|
||||
* justification is that inlength is like the count field of a
|
||||
* BufferedInputStream, while the outlength parameter is like the
|
||||
* length parameter of a read request.)
|
||||
*/
|
||||
public abstract int read (char[] outbuffer, int outpos, int outlength);
|
||||
}
|
151
libjava/gnu/gcj/convert/Convert.java
Normal file
151
libjava/gnu/gcj/convert/Convert.java
Normal file
|
@ -0,0 +1,151 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
import java.io.*;
|
||||
|
||||
public class Convert
|
||||
{
|
||||
static void error (String message)
|
||||
{
|
||||
System.err.print("convert: ");
|
||||
System.err.println(message);
|
||||
System.err.println("Usage: convert [--from srcEncoding] [--to dstEncoding]");
|
||||
System.err.println(" [inputfile [outputfile]]");
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
static void missing (String arg)
|
||||
{
|
||||
error("missing arg after `" + arg + "' option");
|
||||
}
|
||||
|
||||
public static void main (String[] args)
|
||||
{
|
||||
String inName = "-";
|
||||
String outName = "-";
|
||||
String inEncodingName = null;
|
||||
String outEncodingName = "JavaSrc";
|
||||
int seenNames = 0;
|
||||
boolean reverse = false;
|
||||
|
||||
for (int i = 0; i < args.length; i++)
|
||||
{
|
||||
String arg = args[i];
|
||||
if (arg.length() == 0)
|
||||
error("zero-length argument");
|
||||
if (arg.charAt(0) == '-')
|
||||
{
|
||||
if (arg.equals("-encoding") || arg.equals("--encoding")
|
||||
|| args.equals("-from") || arg.equals("--from"))
|
||||
{
|
||||
if (++i == args.length) missing(arg);
|
||||
inEncodingName = args[i];
|
||||
}
|
||||
else if (arg.equals("-to") || arg.equals("--to"))
|
||||
{
|
||||
if (++i == args.length) missing(arg);
|
||||
outEncodingName = args[i];
|
||||
}
|
||||
else if (arg.equals("-i"))
|
||||
{
|
||||
if (++i == args.length) missing(arg);
|
||||
inName = args[i];
|
||||
}
|
||||
else if (arg.equals("-o"))
|
||||
{
|
||||
if (++i == args.length) missing(arg);
|
||||
outName = args[i];
|
||||
}
|
||||
else if (arg.equals("-reverse") || arg.equals("--reverse"))
|
||||
{
|
||||
reverse = true;
|
||||
}
|
||||
else if (arg.equals("-"))
|
||||
{
|
||||
switch (seenNames)
|
||||
{
|
||||
case 0:
|
||||
inName = "-";
|
||||
seenNames++;
|
||||
break;
|
||||
case 1:
|
||||
outName = "-";
|
||||
seenNames++;
|
||||
break;
|
||||
default:
|
||||
error("too many `-' arguments");
|
||||
}
|
||||
}
|
||||
else
|
||||
error("unrecognized argument `" + arg + "'");
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (seenNames)
|
||||
{
|
||||
case 0:
|
||||
inName = arg;
|
||||
seenNames++;
|
||||
break;
|
||||
case 1:
|
||||
outName = arg;
|
||||
seenNames++;
|
||||
break;
|
||||
default:
|
||||
error("too many filename arguments");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reverse)
|
||||
{
|
||||
String tmp = inEncodingName;
|
||||
inEncodingName = outEncodingName;
|
||||
outEncodingName = tmp;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
BytesToUnicode inDecoder
|
||||
= inEncodingName == null ? BytesToUnicode.getDefaultDecoder()
|
||||
: BytesToUnicode.getDecoder(inEncodingName);
|
||||
UnicodeToBytes outEncoder
|
||||
= outEncodingName == null ? UnicodeToBytes.getDefaultEncoder()
|
||||
: UnicodeToBytes.getEncoder(outEncodingName);
|
||||
InputStream inStream = inName == "-" ? System.in
|
||||
: new FileInputStream(inName);
|
||||
OutputStream outStream;
|
||||
if (outName == "-")
|
||||
outStream = System.out;
|
||||
else
|
||||
outStream = new FileOutputStream(outName);
|
||||
InputStreamReader in
|
||||
= new InputStreamReader(inStream, inEncodingName);
|
||||
OutputStreamWriter out
|
||||
= new OutputStreamWriter(outStream, outEncodingName);
|
||||
char[] buffer = new char[2048];
|
||||
for (;;)
|
||||
{
|
||||
int count = in.read(buffer);
|
||||
if (count < 0)
|
||||
break;
|
||||
out.write(buffer, 0, count);
|
||||
}
|
||||
|
||||
in.close();
|
||||
out.close();
|
||||
}
|
||||
catch (java.io.IOException ex)
|
||||
{
|
||||
System.err.print("convert exception: ");
|
||||
System.err.println(ex);
|
||||
System.exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
32
libjava/gnu/gcj/convert/Input_8859_1.java
Normal file
32
libjava/gnu/gcj/convert/Input_8859_1.java
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
public class Input_8859_1 extends BytesToUnicode
|
||||
{
|
||||
public String getName() { return "8859_1"; }
|
||||
|
||||
public int read (char[] outbuffer, int outpos, int outlength)
|
||||
{
|
||||
int origpos = outpos;
|
||||
// Make sure fields of this are in registers.
|
||||
int inpos = this.inpos;
|
||||
byte[] inbuffer = this.inbuffer;
|
||||
int inavail = this.inlength - inpos;
|
||||
int outavail = outlength - outpos;
|
||||
if (outavail > inavail)
|
||||
outavail = inavail;
|
||||
while (--outavail >= 0)
|
||||
{
|
||||
outbuffer[outpos++] = (char) (inbuffer[inpos++] & 0xFF);
|
||||
}
|
||||
this.inpos = inpos;
|
||||
return outpos - origpos;
|
||||
}
|
||||
}
|
19
libjava/gnu/gcj/convert/Input_EUCJIS.java
Normal file
19
libjava/gnu/gcj/convert/Input_EUCJIS.java
Normal file
|
@ -0,0 +1,19 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
public class Input_EUCJIS extends BytesToUnicode
|
||||
{
|
||||
public String getName() { return "EUCJIS"; }
|
||||
|
||||
int codeset = 0;
|
||||
int first_byte;
|
||||
|
||||
public native int read (char[] outbuffer, int outpos, int outlength);
|
||||
}
|
107
libjava/gnu/gcj/convert/Input_UTF8.java
Normal file
107
libjava/gnu/gcj/convert/Input_UTF8.java
Normal file
|
@ -0,0 +1,107 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
public class Input_UTF8 extends BytesToUnicode
|
||||
{
|
||||
public String getName() { return "UTF8"; }
|
||||
|
||||
int partial = 0;
|
||||
int partial_bytes_expected = 0;
|
||||
//int suggogate_second = -1;
|
||||
|
||||
public int read (char[] outbuffer, int outpos, int outlength)
|
||||
{
|
||||
int origpos = outpos;
|
||||
for (;;)
|
||||
{
|
||||
if (outpos >= outlength)
|
||||
break;
|
||||
if (inpos >= inlength)
|
||||
break;
|
||||
int b = inbuffer[inpos++];
|
||||
if (b >= 0)
|
||||
outbuffer[outpos++] = (char) b;
|
||||
else
|
||||
{
|
||||
if ((b & 0xC0) == 0x80) // Continuation byte
|
||||
{
|
||||
partial = (partial << 6) | (b & 0x3F);
|
||||
--partial_bytes_expected;
|
||||
if (partial_bytes_expected == 1)
|
||||
{
|
||||
if (partial > (0xFFFF>>6))
|
||||
{
|
||||
// The next continuation byte will cause the result
|
||||
// to exceed 0xFFFF, so we must use a surrogate pair.
|
||||
// The "Unicode scalar value" (see D28 in section 3.7
|
||||
// of the Unicode Standard 2.0) is defined as:
|
||||
// value == (hi-0xD800)*0x400+(lo-0xDC00)+0x10000,
|
||||
// where (hi, lo) is the Unicode surrogate pair.
|
||||
// After reading the first three bytes, we have:
|
||||
// partial == (value >> 6).
|
||||
// Substituting and simplifying, we get:
|
||||
// partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400.
|
||||
// The definition lo>=0xDC00 && lo<=0xDFFF implies
|
||||
// that (lo-0xDC00)>>6 is in the range 0..15.
|
||||
// Hence we can infer (partial-0x400)>>4 == (hi-0xDB00)
|
||||
// and we can emit the high-surrogate without waiting
|
||||
// for the final byte:
|
||||
outbuffer[outpos++] = (char) (0xDA00+(partial>>4));
|
||||
|
||||
// Now we want to set it up so that when we read
|
||||
// the final byte on the next iteration, we will
|
||||
// get the low-surrogate without special handling.
|
||||
// I.e. we want:
|
||||
// lo == (next_partial << 6) | (next & 0x3F)
|
||||
// where next is the next input byte and next_partial
|
||||
// is the value of partial at the end of this
|
||||
// iteration. This implies: next_partial == lo >> 6.
|
||||
// We can simplify the previous:
|
||||
// partial == (hi-0xD800)*0x10+((lo-0xDC00)>>6)+0x400,
|
||||
// to: partial == (hi-0xD800)*0x10+(lo>>6)+0x90.
|
||||
// Inserting the values of hi and next_partial,
|
||||
// and simplifying, we get: partial ==
|
||||
// ( (partial-0x400)&~0xF) + next_partial + 0x90.
|
||||
// Solving for next_partial, we get:
|
||||
// next_partial = partial+0x400-0x90-(partial&~0xF):
|
||||
// or: next_partial = (partial&0xF) + 0x370. Hence:
|
||||
partial = (partial & 0xF) + 0x370;
|
||||
}
|
||||
}
|
||||
else if (partial_bytes_expected == 0)
|
||||
{
|
||||
outbuffer[outpos++] = (char) partial;
|
||||
partial = 0;
|
||||
partial_bytes_expected = 0;
|
||||
}
|
||||
}
|
||||
else // prefix byte
|
||||
{
|
||||
if ((b & 0xE) == 0xC0)
|
||||
{
|
||||
partial = b & 0x1F;
|
||||
partial_bytes_expected = 1;
|
||||
}
|
||||
else if ((b & 0xF) == 0xF0)
|
||||
{
|
||||
partial = b & 0xF;
|
||||
partial_bytes_expected = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
partial = b & 7;
|
||||
partial_bytes_expected = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return outpos - origpos;
|
||||
}
|
||||
}
|
6880
libjava/gnu/gcj/convert/JIS0208.h
Normal file
6880
libjava/gnu/gcj/convert/JIS0208.h
Normal file
File diff suppressed because it is too large
Load diff
7325
libjava/gnu/gcj/convert/JIS0208_to_Unicode.cc
Normal file
7325
libjava/gnu/gcj/convert/JIS0208_to_Unicode.cc
Normal file
File diff suppressed because it is too large
Load diff
6068
libjava/gnu/gcj/convert/JIS0212.h
Normal file
6068
libjava/gnu/gcj/convert/JIS0212.h
Normal file
File diff suppressed because it is too large
Load diff
6471
libjava/gnu/gcj/convert/JIS0212_to_Unicode.cc
Normal file
6471
libjava/gnu/gcj/convert/JIS0212_to_Unicode.cc
Normal file
File diff suppressed because it is too large
Load diff
31
libjava/gnu/gcj/convert/Output_8859_1.java
Normal file
31
libjava/gnu/gcj/convert/Output_8859_1.java
Normal file
|
@ -0,0 +1,31 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
public class Output_8859_1 extends UnicodeToBytes
|
||||
{
|
||||
public String getName() { return "8859_1"; }
|
||||
|
||||
/**
|
||||
* @return number of chars converted. */
|
||||
public int write (char[] inbuffer, int inpos, int inlength)
|
||||
{
|
||||
int count = this.count;
|
||||
byte[] buf = this.buf;
|
||||
int avail = buf.length - count;
|
||||
if (inlength > avail)
|
||||
inlength = avail;
|
||||
for (int i = inlength; --i >= 0; )
|
||||
{
|
||||
buf[count++] = (byte) inbuffer[inpos++];
|
||||
}
|
||||
this.count = count;
|
||||
return inlength;
|
||||
}
|
||||
}
|
82
libjava/gnu/gcj/convert/Output_JavaSrc.java
Normal file
82
libjava/gnu/gcj/convert/Output_JavaSrc.java
Normal file
|
@ -0,0 +1,82 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
/** Convert Unicode to Ascii with \ u XXXX-escapes. */
|
||||
|
||||
public class Output_JavaSrc extends UnicodeToBytes
|
||||
{
|
||||
public String getName() { return "JavaSrc"; }
|
||||
|
||||
// Number of bytes remaining before pending_char has been written.
|
||||
int todo;
|
||||
int pending_char;
|
||||
|
||||
public int write (char[] inbuffer, int inpos, int inlength)
|
||||
{
|
||||
int start_pos = inpos;
|
||||
int avail = buf.length - count;
|
||||
for (;;)
|
||||
{
|
||||
if (avail == 0)
|
||||
break;
|
||||
switch (todo)
|
||||
{
|
||||
case 1:
|
||||
if (pending_char == '\\')
|
||||
{
|
||||
buf[count++] = (byte) '\\';
|
||||
avail--;
|
||||
todo = 0;
|
||||
continue;
|
||||
}
|
||||
/* ... else fall through ... */
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
todo--;
|
||||
int digit = ((int) pending_char >> (todo * 4)) & 0xF;
|
||||
buf[count++] = (byte) Character.forDigit(digit, 16);
|
||||
avail--;
|
||||
continue;
|
||||
case 5:
|
||||
buf[count++] = (byte) 'u';
|
||||
avail--;
|
||||
todo = 4;
|
||||
continue;
|
||||
default:
|
||||
;
|
||||
}
|
||||
if (inlength == 0)
|
||||
break;
|
||||
char ch = inbuffer[inpos++];
|
||||
inlength--;
|
||||
if (ch == '\\')
|
||||
{
|
||||
buf[count++] = (byte) '\\';
|
||||
pending_char = ch;
|
||||
todo = 1;
|
||||
avail--;
|
||||
}
|
||||
else if (ch < 127)
|
||||
{
|
||||
buf[count++] = (byte) ch;
|
||||
avail--;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf[count++] = (byte) '\\';
|
||||
pending_char = ch;
|
||||
todo = 5;
|
||||
avail--;
|
||||
}
|
||||
}
|
||||
return inpos - start_pos;
|
||||
}
|
||||
}
|
108
libjava/gnu/gcj/convert/Output_UTF8.java
Normal file
108
libjava/gnu/gcj/convert/Output_UTF8.java
Normal file
|
@ -0,0 +1,108 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
public class Output_UTF8 extends UnicodeToBytes
|
||||
{
|
||||
public String getName() { return "UTF8"; }
|
||||
|
||||
/** True if a surrogate pair should be emitted as a single UTF8 sequence.
|
||||
* Otherwise, a surrogate pair is treated as two separate characters.
|
||||
* Also, '\0' is emitted as {0} if true, and as {0xC0,0x80} if false. */
|
||||
public boolean standardUTF8;
|
||||
|
||||
// Saves the previous char if it was a high-surrogate.
|
||||
char hi_part;
|
||||
// Value of imcomplete character.
|
||||
int value;
|
||||
// Number of continuation bytes still to emit.
|
||||
int bytes_todo;
|
||||
|
||||
public int write (char[] inbuffer, int inpos, int inlength)
|
||||
{
|
||||
int start_pos = inpos;
|
||||
int avail = buf.length - count;
|
||||
for (;;)
|
||||
{
|
||||
if (inlength == 0 || avail == 0)
|
||||
break;
|
||||
// The algororith is made more complicated because we want to write
|
||||
// at least one byte in the output buffer, if there is room for
|
||||
// that byte, and at least one input character is available.
|
||||
// This makes the code more robust, since client code will
|
||||
// always "make progress", even in the complicated cases,
|
||||
// where the output buffer only has room for only *part* of a
|
||||
// multi-byte sequence, or the input char buffer only has half
|
||||
// of a surrogate pair (when standardUTF8 is set), or both.
|
||||
|
||||
// Handle continuation characters we did not have room for before.
|
||||
if (bytes_todo > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
bytes_todo--;
|
||||
buf[count++] = (byte)
|
||||
(((value >> (bytes_todo * 6)) & 0xC0) | 0x80);
|
||||
avail--;
|
||||
}
|
||||
while (bytes_todo > 0 && avail > 0);
|
||||
continue;
|
||||
}
|
||||
char ch = inbuffer[inpos++];
|
||||
inlength--;
|
||||
if (ch < 128 && (ch != 0 || standardUTF8))
|
||||
{
|
||||
avail--;
|
||||
buf[count++] = (byte) ch;
|
||||
}
|
||||
else if (ch <= 0x07FF)
|
||||
{
|
||||
buf[count++] = (byte) (0xC0 | (ch >> 6));
|
||||
if (--avail > 0)
|
||||
{
|
||||
buf[count++] = (byte) ((ch & 0x3F) | 0x80);
|
||||
avail--;
|
||||
}
|
||||
else
|
||||
{
|
||||
value = ch;
|
||||
bytes_todo = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (ch >= 0xD800 && ch <= 0xDFFF && standardUTF8)
|
||||
{
|
||||
if (ch <= 0xDBFF) // High surrogates
|
||||
{
|
||||
// The first byte is (0xF0 | value>>18), where value is the
|
||||
// Unicode scalar value of the combine character - which
|
||||
// we may not know yet. But from substituting:
|
||||
// value == (hi-0xD800)*0x400+(lo-0xDC00)+0x10000,
|
||||
// hi==ch, and cancelling we get:
|
||||
buf[count++] = (byte) (0xF0 | ((ch-0xD800) >> 8));
|
||||
avail--;
|
||||
hi_part = ch;
|
||||
}
|
||||
else // Low surrogates
|
||||
{
|
||||
value = (hi_part - 0xD800) * 0x400 + (ch - 0xDC00) + 0x10000;
|
||||
bytes_todo = 3;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buf[count++] = (byte) (0xE0 | (ch >> 12));
|
||||
value = ch;
|
||||
avail--;
|
||||
bytes_todo = 2;
|
||||
}
|
||||
}
|
||||
return inpos - start_pos;
|
||||
}
|
||||
}
|
90
libjava/gnu/gcj/convert/UnicodeToBytes.java
Normal file
90
libjava/gnu/gcj/convert/UnicodeToBytes.java
Normal file
|
@ -0,0 +1,90 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.convert;
|
||||
|
||||
public abstract class UnicodeToBytes
|
||||
{
|
||||
/** Buffer to emit bytes to.
|
||||
* The locations buf[count] ... buf[buf.length-1] are available. */
|
||||
public byte[] buf;
|
||||
public int count;
|
||||
|
||||
static Class defaultEncodingClass;
|
||||
|
||||
static synchronized void getDefaultEncodingClass()
|
||||
{
|
||||
// Test (defaultEncodingClass == null) again in case of race condition.
|
||||
if (defaultEncodingClass == null)
|
||||
{
|
||||
String encoding = System.getProperty("file.encoding");
|
||||
String className = "gnu.gcj.convert.Output_"+encoding;
|
||||
try
|
||||
{
|
||||
defaultEncodingClass = Class.forName(className);
|
||||
}
|
||||
catch (ClassNotFoundException ex)
|
||||
{
|
||||
throw new NoClassDefFoundError("missing default encoding "
|
||||
+ encoding + " (class "
|
||||
+ className + " not found)");
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public abstract String getName();
|
||||
|
||||
public static UnicodeToBytes getDefaultEncoder()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (defaultEncodingClass == null)
|
||||
getDefaultEncodingClass();
|
||||
return (UnicodeToBytes) defaultEncodingClass.newInstance();
|
||||
}
|
||||
catch (Throwable ex)
|
||||
{
|
||||
return new Output_8859_1();
|
||||
}
|
||||
}
|
||||
|
||||
/** Get a char-stream->byte-stream converter given an encoding name. */
|
||||
public static UnicodeToBytes getEncoder (String encoding)
|
||||
throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
String className = "gnu.gcj.convert.Output_"+encoding;
|
||||
Class encodingClass;
|
||||
try
|
||||
{
|
||||
encodingClass = Class.forName(className);
|
||||
return (UnicodeToBytes) encodingClass.newInstance();
|
||||
}
|
||||
catch (Throwable ex)
|
||||
{
|
||||
throw new java.io.UnsupportedEncodingException(encoding + " ("
|
||||
+ ex + ')');
|
||||
}
|
||||
}
|
||||
|
||||
public final void setOutput(byte[] buffer, int count)
|
||||
{
|
||||
this.buf = buffer;
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
/** Convert chars to bytes.
|
||||
* Converted bytes are written to buf, starting at count.
|
||||
* @param inbuffer sources of characters to convert
|
||||
* @param inpos index of initial character ininbuffer to convert
|
||||
* @param inlength number of characters to convert
|
||||
* @return number of chars converted
|
||||
* Also, this.count is increment by the number of bytes converted.
|
||||
*/
|
||||
public abstract int write (char[] inbuffer, int inpos, int inlength);
|
||||
}
|
154
libjava/gnu/gcj/convert/gen-from-JIS.c
Normal file
154
libjava/gnu/gcj/convert/gen-from-JIS.c
Normal file
|
@ -0,0 +1,154 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
#include <stdio.h>
|
||||
struct chval
|
||||
{
|
||||
unsigned char b1; /* 1st byte */
|
||||
unsigned char b2; /* 2nd byte */
|
||||
unsigned short uc; /* unicode value */
|
||||
};
|
||||
|
||||
#define MAP(B1, B2, C) { B1, B2, C },
|
||||
|
||||
struct chval chtab_0208[] = {
|
||||
#include "JIS0208.h"
|
||||
{ 255, 255, 0}
|
||||
};
|
||||
|
||||
struct chval chtab_0212[] = {
|
||||
#include "JIS0212.h"
|
||||
{ 255, 255, 0}
|
||||
};
|
||||
#undef MAP
|
||||
|
||||
struct chval sorted[] = {
|
||||
#define MAP(B1, B2, C) { B1, B2, C },
|
||||
#include "JIS0208.h"
|
||||
#undef MAP
|
||||
#define MAP(B1, B2, C) { 0x80|B1, B2, C },
|
||||
#include "JIS0212.h"
|
||||
#undef MAP
|
||||
};
|
||||
|
||||
struct chval *chtab;
|
||||
|
||||
int
|
||||
compare (void *p1, void *p2)
|
||||
{
|
||||
struct chval *c1 = (struct chval *) p1;
|
||||
struct chval *c2 = (struct chval *) p2;
|
||||
return (int) c1->uc - (int) c2->uc;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
FILE *out = stdout;
|
||||
unsigned min1 = 256, max1 = 0, min2 = 256, max2 = 0, count = 0;
|
||||
unsigned short low1_uc = 0xFFFF, high1_uc = 0;
|
||||
unsigned short low2_uc = 0xFFFF, high2_uc = 0;
|
||||
int i; int row, col;
|
||||
if (strcmp (argv[1], "JIS0208") == 0)
|
||||
chtab = chtab_0208;
|
||||
else if (strcmp (argv[1], "JIS0212") == 0)
|
||||
chtab = chtab_0212;
|
||||
else if (strcmp (argv[1], "toJIS") == 0)
|
||||
{
|
||||
int i;
|
||||
int count = sizeof(sorted)/sizeof(struct chval);
|
||||
qsort (sorted, count, sizeof(struct chval),
|
||||
compare);
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
fprintf (out, " 0x%04x -> 0x%02x, 0x%02x\n",
|
||||
sorted[i].uc, sorted[i].b1, sorted[i].b2);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (stderr, "bad argument!");
|
||||
exit (-1);
|
||||
}
|
||||
for (i = 0; chtab[i].b1 != 255; i++)
|
||||
{
|
||||
if (chtab[i].b1 < min1) min1 = chtab[i].b1;
|
||||
if (chtab[i].b2 < min2) min2 = chtab[i].b2;
|
||||
if (chtab[i].b1 > max1) max1 = chtab[i].b1;
|
||||
if (chtab[i].b2 > max2) max2 = chtab[i].b2;
|
||||
count++;
|
||||
}
|
||||
fprintf(stderr, "1st byte ranges from %d to %d.\n", min1, max1);
|
||||
fprintf(stderr, "2nd byte ranges from %d to %d.\n", min2, max2);
|
||||
|
||||
fprintf(out,"/* This file is automatically generated from %s.TXT. */\n",
|
||||
argv[1]);
|
||||
fprintf(out, "unsigned short %s_to_Unicode[%d][%d] = {\n",
|
||||
argv[1], max1 - min1 + 1, max2 - min2 + 1);
|
||||
i = 0;
|
||||
for (row = min1; row <= max1; row++)
|
||||
{
|
||||
fprintf(out, "/* 1st byte: %d */ { ", row);
|
||||
if (row < chtab[i].b1)
|
||||
{
|
||||
fprintf(out, "0 }, /* unused row */\n");
|
||||
}
|
||||
else if (row > chtab[i].b1)
|
||||
{
|
||||
fprintf (stderr, "error - char table out of order!\n");
|
||||
exit (-1);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(out, "\n");
|
||||
for (col = min2; col <= max2; col++)
|
||||
{
|
||||
if (row == chtab[i].b1 && col == chtab[i].b2)
|
||||
{
|
||||
unsigned uc = chtab[i].uc;
|
||||
if (uc < 0x2000)
|
||||
{
|
||||
if (uc > high1_uc)
|
||||
high1_uc = uc;
|
||||
if (uc < low1_uc)
|
||||
low1_uc = uc;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (uc > high2_uc)
|
||||
high2_uc = uc;
|
||||
if (uc < low2_uc)
|
||||
low2_uc = uc;
|
||||
}
|
||||
fprintf (out, " /* 2nd byte: %d */ 0x%04x",
|
||||
chtab[i].b2, uc);
|
||||
i++;
|
||||
}
|
||||
else if (row < chtab[i].b1
|
||||
|| (row == chtab[i].b1 && col < chtab[i].b2))
|
||||
{
|
||||
fprintf (out, " 0");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (stderr, "error - char table our of order!\n");
|
||||
exit (-1);
|
||||
}
|
||||
if (col != max2)
|
||||
fprintf (out, ",\n");
|
||||
}
|
||||
fprintf(out, row == max1 ? "}\n" : "},\n");
|
||||
}
|
||||
}
|
||||
fprintf(out, "};\n");
|
||||
fprintf(stderr, "total number of characters is %d.\n", count);
|
||||
fprintf(stderr, "Range is 0x%04x-0x%04x and 0x%04x-0x%04x.\n",
|
||||
low1_uc, high1_uc, low2_uc, high2_uc);
|
||||
return 0;
|
||||
}
|
101
libjava/gnu/gcj/convert/natInput_EUCJIS.cc
Normal file
101
libjava/gnu/gcj/convert/natInput_EUCJIS.cc
Normal file
|
@ -0,0 +1,101 @@
|
|||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
#include <config.h>
|
||||
#include <cni.h>
|
||||
#include <gnu/gcj/convert/Input_EUCJIS.h>
|
||||
|
||||
#define ERROR_CHAR 0xFFFD
|
||||
|
||||
extern unsigned short JIS0208_to_Unicode[84][94];
|
||||
extern unsigned short JIS0212_to_Unicode[76][94];
|
||||
|
||||
jint
|
||||
gnu::gcj::convert::Input_EUCJIS::read(jcharArray outbuffer, jint outpos,
|
||||
jint outlength)
|
||||
{
|
||||
jint start_outpos = outpos;
|
||||
for (;;)
|
||||
{
|
||||
if (outpos >= outlength)
|
||||
break;
|
||||
if (inpos >= inlength)
|
||||
break;
|
||||
int b = ((unsigned char*) elements(inbuffer))[inpos++];
|
||||
if (codeset == 0) // ASCII or JIS-Roman
|
||||
{
|
||||
if (b < 128)
|
||||
{
|
||||
#if 0
|
||||
// Technically, we should translate 0x5c to Yen symbol;
|
||||
// in practice, it is not clear.
|
||||
if (b == 0x5c)
|
||||
b = 0x00A5; // Yen sign.
|
||||
#endif
|
||||
elements(outbuffer)[outpos++] = (char) b;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (b == 0x8E) // SS2
|
||||
codeset = 2;
|
||||
else if (b == 0x8F) // SS3
|
||||
codeset = 3;
|
||||
else
|
||||
{
|
||||
codeset = 1;
|
||||
first_byte = b;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (codeset == 1) // JIS X 0208:1997
|
||||
{
|
||||
first_byte -= 0x80 + 33;
|
||||
b -= 0x80 + 33;
|
||||
if ((unsigned) first_byte >= 84 || (unsigned) b >= 94)
|
||||
b = ERROR_CHAR;
|
||||
else
|
||||
{
|
||||
b = JIS0208_to_Unicode[first_byte][b];
|
||||
if (b == 0)
|
||||
b = ERROR_CHAR;
|
||||
}
|
||||
elements(outbuffer)[outpos++] = b;
|
||||
codeset = 0;
|
||||
}
|
||||
else if (codeset == 2) // Half-width katakana
|
||||
{
|
||||
if (b >= 0xA1 && b <= 0xDF)
|
||||
b += 0xFF61 - 0xA1;
|
||||
else
|
||||
b = ERROR_CHAR;
|
||||
elements(outbuffer)[outpos++] = b;
|
||||
codeset = 0;
|
||||
}
|
||||
else if (codeset == 3) // second byte of JIS X 0212-1990
|
||||
{
|
||||
first_byte = b;
|
||||
codeset = 4;
|
||||
}
|
||||
else // codeset == 4 // third byte of JIS X 0212-1990
|
||||
{
|
||||
first_byte -= 0x80 + 34;
|
||||
b -= 0x80 + 33;
|
||||
if ((unsigned) first_byte >= 76 || (unsigned) b >= 94)
|
||||
b = ERROR_CHAR;
|
||||
else
|
||||
{
|
||||
b = JIS0208_to_Unicode[first_byte][b];
|
||||
if (b == 0)
|
||||
b = ERROR_CHAR;
|
||||
}
|
||||
elements(outbuffer)[outpos++] = b;
|
||||
codeset = 0;
|
||||
}
|
||||
}
|
||||
return outpos - start_outpos;
|
||||
}
|
285
libjava/gnu/gcj/protocol/http/Connection.java
Normal file
285
libjava/gnu/gcj/protocol/http/Connection.java
Normal file
|
@ -0,0 +1,285 @@
|
|||
// Connection.java - Implementation of HttpURLConnection for http protocol.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.protocol.http;
|
||||
|
||||
import java.net.*;
|
||||
import java.io.*;
|
||||
import java.util.Vector;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Enumeration;
|
||||
|
||||
/**
|
||||
* @author Warren Levy <warrenl@cygnus.com>
|
||||
* @date March 29, 1999.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Written using on-line Java Platform 1.2 API Specification, as well
|
||||
* as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
|
||||
* Status: Minimal subset of functionality. Proxies and Redirects
|
||||
* not yet handled. FileNameMap handling needs to be considered.
|
||||
* useCaches, ifModifiedSince, and allowUserInteraction need
|
||||
* consideration as well as doInput and doOutput.
|
||||
*/
|
||||
|
||||
class Connection extends HttpURLConnection
|
||||
{
|
||||
protected Socket sock = null;
|
||||
private static Hashtable defRequestProperties = new Hashtable();
|
||||
private Hashtable requestProperties;
|
||||
private Hashtable hdrHash = new Hashtable();
|
||||
private Vector hdrVec = new Vector();
|
||||
private boolean gotHeaders = false;
|
||||
private BufferedInputStream bufferedIn;
|
||||
|
||||
public Connection(URL url)
|
||||
{
|
||||
super(url);
|
||||
requestProperties = (Hashtable) defRequestProperties.clone();
|
||||
}
|
||||
|
||||
// Override method in URLConnection.
|
||||
public static void setDefaultRequestProperty(String key, String value)
|
||||
{
|
||||
defRequestProperties.put(key, value);
|
||||
}
|
||||
|
||||
// Override method in URLConnection.
|
||||
public static String getDefaultRequestProperty(String key)
|
||||
{
|
||||
return (String) defRequestProperties.get(key);
|
||||
}
|
||||
|
||||
// Override method in URLConnection.
|
||||
public void setRequestProperty(String key, String value)
|
||||
{
|
||||
if (connected)
|
||||
throw new IllegalAccessError("Connection already established.");
|
||||
|
||||
requestProperties.put(key, value);
|
||||
}
|
||||
|
||||
// Override method in URLConnection.
|
||||
public String getRequestProperty(String key)
|
||||
{
|
||||
if (connected)
|
||||
throw new IllegalAccessError("Connection already established.");
|
||||
|
||||
return (String) requestProperties.get(key);
|
||||
}
|
||||
|
||||
// Implementation of abstract method.
|
||||
public void connect() throws IOException
|
||||
{
|
||||
// Call is ignored if already connected.
|
||||
if (connected)
|
||||
return;
|
||||
|
||||
// Get address and port number.
|
||||
int port;
|
||||
InetAddress destAddr = InetAddress.getByName(url.getHost());
|
||||
if ((port = url.getPort()) == -1)
|
||||
port = 80;
|
||||
|
||||
// Open socket and output stream.
|
||||
sock = new Socket(destAddr, port);
|
||||
PrintWriter out = new PrintWriter(sock.getOutputStream());
|
||||
|
||||
// Send request including any request properties that were set.
|
||||
out.print(getRequestMethod() + " " + url.getFile() + " HTTP/1.1\n");
|
||||
out.print("Host: " + url.getHost() + ":" + port + "\n");
|
||||
Enumeration reqKeys = requestProperties.keys();
|
||||
Enumeration reqVals = requestProperties.elements();
|
||||
while (reqKeys.hasMoreElements())
|
||||
out.print(reqKeys.nextElement() + ": " + reqVals.nextElement() + "\n");
|
||||
out.print("\n");
|
||||
out.flush();
|
||||
connected = true;
|
||||
}
|
||||
|
||||
// Implementation of abstract method.
|
||||
public void disconnect()
|
||||
{
|
||||
if (sock != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
sock.close();
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
; // Ignore errors in closing socket.
|
||||
}
|
||||
sock = null;
|
||||
}
|
||||
connected = false;
|
||||
}
|
||||
|
||||
// TODO: public boolean usingProxy()
|
||||
public boolean usingProxy()
|
||||
{
|
||||
throw new InternalError("HttpURLConnection.usingProxy not implemented");
|
||||
}
|
||||
|
||||
// Override default method in URLConnection.
|
||||
public InputStream getInputStream() throws IOException
|
||||
{
|
||||
if (!connected)
|
||||
connect();
|
||||
|
||||
if (bufferedIn == null)
|
||||
bufferedIn = new BufferedInputStream(sock.getInputStream());
|
||||
return bufferedIn;
|
||||
}
|
||||
|
||||
// Override default method in URLConnection.
|
||||
public OutputStream getOutputStream() throws IOException
|
||||
{
|
||||
if (!connected)
|
||||
connect();
|
||||
|
||||
return sock.getOutputStream();
|
||||
}
|
||||
|
||||
// Override default method in URLConnection.
|
||||
public String getHeaderField(String name)
|
||||
{
|
||||
try
|
||||
{
|
||||
getHttpHeaders();
|
||||
}
|
||||
catch (IOException x)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
return (String) hdrHash.get(name.toLowerCase());
|
||||
}
|
||||
|
||||
// Override default method in URLConnection.
|
||||
public String getHeaderField(int n)
|
||||
{
|
||||
try
|
||||
{
|
||||
getHttpHeaders();
|
||||
}
|
||||
catch (IOException x)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
if (n < hdrVec.size())
|
||||
return getField((String) hdrVec.elementAt(n));
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// Override default method in URLConnection.
|
||||
public String getHeaderFieldKey(int n)
|
||||
{
|
||||
try
|
||||
{
|
||||
getHttpHeaders();
|
||||
}
|
||||
catch (IOException x)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
if (n < hdrVec.size())
|
||||
return getKey((String) hdrVec.elementAt(n));
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getKey(String str)
|
||||
{
|
||||
if (str == null)
|
||||
return null;
|
||||
int index = str.indexOf(':');
|
||||
if (index >= 0)
|
||||
return str.substring(0, index);
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getField(String str)
|
||||
{
|
||||
if (str == null)
|
||||
return null;
|
||||
int index = str.indexOf(':');
|
||||
if (index >= 0)
|
||||
return str.substring(index + 1).trim();
|
||||
else
|
||||
return str;
|
||||
}
|
||||
|
||||
private void getHttpHeaders() throws IOException
|
||||
{
|
||||
if (gotHeaders)
|
||||
return;
|
||||
gotHeaders = true;
|
||||
|
||||
connect();
|
||||
|
||||
// Originally tried using a BufferedReader here to take advantage of
|
||||
// the readLine method and avoid the following, but the buffer read
|
||||
// past the end of the headers so the first part of the content was lost.
|
||||
// It is probably more robust than it needs to be, e.g. the byte[]
|
||||
// is unlikely to overflow and a '\r' should always be followed by a '\n',
|
||||
// but it is better to be safe just in case.
|
||||
if (bufferedIn == null)
|
||||
bufferedIn = new BufferedInputStream(sock.getInputStream());
|
||||
|
||||
int buflen = 100;
|
||||
byte[] buf = new byte[buflen];
|
||||
String line = "";
|
||||
boolean gotnl = false;
|
||||
byte[] ch = new byte[1];
|
||||
ch[0] = (byte) '\n';
|
||||
while (true)
|
||||
{
|
||||
// Check for leftover byte from non-'\n' after a '\r'.
|
||||
if (ch[0] != '\n')
|
||||
line = line + '\r' + new String(ch, 0, 1);
|
||||
|
||||
int i;
|
||||
for (i = 0; i < buflen; i++)
|
||||
{
|
||||
bufferedIn.read(buf, i, 1);
|
||||
if (buf[i] == '\r')
|
||||
{
|
||||
bufferedIn.read(ch, 0, 1);
|
||||
if (ch[0] == '\n')
|
||||
gotnl = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
line = line + new String(buf, 0, i);
|
||||
|
||||
// A '\r' '\n' combo indicates the end of the header entry.
|
||||
// If it wasn't found, cycle back through the loop and append
|
||||
// to 'line' until one is found.
|
||||
if (gotnl)
|
||||
{
|
||||
// A zero length entry signals the end of the headers.
|
||||
if (line.length() == 0)
|
||||
break;
|
||||
|
||||
// Store the header and reinitialize for next cycle.
|
||||
hdrVec.addElement(line);
|
||||
String key = getKey(line);
|
||||
if (key != null)
|
||||
hdrHash.put(key.toLowerCase(), getField(line));
|
||||
line = "";
|
||||
ch[0] = (byte) '\n';
|
||||
gotnl = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
35
libjava/gnu/gcj/protocol/http/Handler.java
Normal file
35
libjava/gnu/gcj/protocol/http/Handler.java
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Handler.java - URLStreamHandler for http protocol.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.protocol.http;
|
||||
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.net.URLStreamHandler;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @author Warren Levy <warrenl@cygnus.com>
|
||||
* @date March 26, 1999.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Written using on-line Java Platform 1.2 API Specification, as well
|
||||
* as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998).
|
||||
* Status: Minimal functionality.
|
||||
*/
|
||||
|
||||
public class Handler extends URLStreamHandler
|
||||
{
|
||||
protected URLConnection openConnection(URL url) throws IOException
|
||||
{
|
||||
return new Connection(url);
|
||||
}
|
||||
}
|
82
libjava/gnu/gcj/text/BaseBreakIterator.java
Normal file
82
libjava/gnu/gcj/text/BaseBreakIterator.java
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Base class for default BreakIterators.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.text;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* @author Tom Tromey <tromey@cygnus.com>
|
||||
* @date March 22, 1999
|
||||
*/
|
||||
|
||||
public abstract class BaseBreakIterator extends BreakIterator
|
||||
{
|
||||
public int current ()
|
||||
{
|
||||
return iter.getIndex();
|
||||
}
|
||||
|
||||
public int first ()
|
||||
{
|
||||
iter.first();
|
||||
return iter.getBeginIndex();
|
||||
}
|
||||
|
||||
public int following (int pos)
|
||||
{
|
||||
int save = iter.getIndex();
|
||||
iter.setIndex(pos);
|
||||
int r = next ();
|
||||
iter.setIndex(save);
|
||||
return r;
|
||||
}
|
||||
|
||||
public CharacterIterator getText ()
|
||||
{
|
||||
return iter;
|
||||
}
|
||||
|
||||
public int last ()
|
||||
{
|
||||
iter.last();
|
||||
return iter.getEndIndex();
|
||||
}
|
||||
|
||||
public int next (int n)
|
||||
{
|
||||
int r = iter.getIndex ();
|
||||
if (n > 0)
|
||||
{
|
||||
while (n > 0 && r != DONE)
|
||||
{
|
||||
r = next ();
|
||||
--n;
|
||||
}
|
||||
}
|
||||
else if (n < 0)
|
||||
{
|
||||
while (n < 0 && r != DONE)
|
||||
{
|
||||
r = previous ();
|
||||
++n;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
public void setText (CharacterIterator newText)
|
||||
{
|
||||
iter = newText;
|
||||
}
|
||||
|
||||
protected CharacterIterator iter;
|
||||
}
|
188
libjava/gnu/gcj/text/CharacterBreakIterator.java
Normal file
188
libjava/gnu/gcj/text/CharacterBreakIterator.java
Normal file
|
@ -0,0 +1,188 @@
|
|||
// Default character BreakIterator.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.text;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* @author Tom Tromey <tromey@cygnus.com>
|
||||
* @date March 19, 1999
|
||||
* Written using The Unicode Standard, Version 2.0.
|
||||
*/
|
||||
|
||||
public class CharacterBreakIterator extends BaseBreakIterator
|
||||
{
|
||||
// Hangul Jamo constants from Unicode book.
|
||||
private static final int LBase = 0x1100;
|
||||
private static final int VBase = 0x1161;
|
||||
private static final int TBase = 0x11a7;
|
||||
private static final int LCount = 19;
|
||||
private static final int VCount = 21;
|
||||
private static final int TCount = 28;
|
||||
|
||||
// Information about surrogates.
|
||||
private static final int highSurrogateStart = 0xD800;
|
||||
private static final int highSurrogateEnd = 0xDBFF;
|
||||
private static final int lowSurrogateStart = 0xDC00;
|
||||
private static final int lowSurrogateEnd = 0xDFFF;
|
||||
|
||||
public Object clone ()
|
||||
{
|
||||
return new CharacterBreakIterator (this);
|
||||
}
|
||||
|
||||
public CharacterBreakIterator ()
|
||||
{
|
||||
iter = null; // FIXME?
|
||||
}
|
||||
|
||||
private CharacterBreakIterator (CharacterBreakIterator other)
|
||||
{
|
||||
iter = (CharacterIterator) other.iter.clone();
|
||||
}
|
||||
|
||||
// Some methods to tell us different properties of characters.
|
||||
private final boolean isL (char c)
|
||||
{
|
||||
return c >= LBase && c <= LBase + LCount;
|
||||
}
|
||||
private final boolean isV (char c)
|
||||
{
|
||||
return c >= VBase && c <= VBase + VCount;
|
||||
}
|
||||
private final boolean isT (char c)
|
||||
{
|
||||
return c >= TBase && c <= TBase + TCount;
|
||||
}
|
||||
private final boolean isLVT (char c)
|
||||
{
|
||||
return isL (c) || isV (c) || isT (c);
|
||||
}
|
||||
private final boolean isHighSurrogate (char c)
|
||||
{
|
||||
return c >= highSurrogateStart && c <= highSurrogateEnd;
|
||||
}
|
||||
private final boolean isLowSurrogate (char c)
|
||||
{
|
||||
return c >= lowSurrogateStart && c <= lowSurrogateEnd;
|
||||
}
|
||||
|
||||
public int next ()
|
||||
{
|
||||
int end = iter.getEndIndex();
|
||||
if (iter.getIndex() == end)
|
||||
return DONE;
|
||||
|
||||
char c;
|
||||
for (char prev = CharacterIterator.DONE; iter.getIndex() < end; prev = c)
|
||||
{
|
||||
c = iter.next();
|
||||
if (c == CharacterIterator.DONE)
|
||||
break;
|
||||
int type = Character.getType(c);
|
||||
|
||||
// Break after paragraph separators.
|
||||
if (type == Character.PARAGRAPH_SEPARATOR)
|
||||
break;
|
||||
|
||||
// Now we need some lookahead.
|
||||
char ahead = iter.next();
|
||||
iter.previous();
|
||||
if (ahead == CharacterIterator.DONE)
|
||||
break;
|
||||
int aheadType = Character.getType(ahead);
|
||||
|
||||
if (aheadType != Character.NON_SPACING_MARK
|
||||
&& ! isLowSurrogate (ahead)
|
||||
&& ! isLVT (ahead))
|
||||
break;
|
||||
if (! isLVT (c) && isLVT (ahead))
|
||||
break;
|
||||
if (isL (c) && ! isLVT (ahead)
|
||||
&& aheadType != Character.NON_SPACING_MARK)
|
||||
break;
|
||||
if (isV (c) && ! isV (ahead) && !isT (ahead)
|
||||
&& aheadType != Character.NON_SPACING_MARK)
|
||||
break;
|
||||
if (isT (c) && ! isT (ahead)
|
||||
&& aheadType != Character.NON_SPACING_MARK)
|
||||
break;
|
||||
|
||||
if (! isHighSurrogate (c) && isLowSurrogate (ahead))
|
||||
break;
|
||||
if (isHighSurrogate (c) && ! isLowSurrogate (ahead))
|
||||
break;
|
||||
if (! isHighSurrogate (prev) && isLowSurrogate (c))
|
||||
break;
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
|
||||
public int previous ()
|
||||
{
|
||||
if (iter.getIndex() == iter.getBeginIndex())
|
||||
return DONE;
|
||||
|
||||
int start = iter.getBeginIndex();
|
||||
while (iter.getIndex() >= iter.getBeginIndex())
|
||||
{
|
||||
char c = iter.previous();
|
||||
if (c == CharacterIterator.DONE)
|
||||
break;
|
||||
int type = Character.getType(c);
|
||||
|
||||
if (type != Character.NON_SPACING_MARK
|
||||
&& ! isLowSurrogate (c)
|
||||
&& ! isLVT (c))
|
||||
break;
|
||||
|
||||
// Now we need some lookahead.
|
||||
char ahead = iter.previous();
|
||||
if (ahead == CharacterIterator.DONE)
|
||||
{
|
||||
iter.next();
|
||||
break;
|
||||
}
|
||||
char ahead2 = iter.previous();
|
||||
iter.next();
|
||||
iter.next();
|
||||
if (ahead2 == CharacterIterator.DONE)
|
||||
break;
|
||||
int aheadType = Character.getType(ahead);
|
||||
|
||||
if (aheadType == Character.PARAGRAPH_SEPARATOR)
|
||||
break;
|
||||
|
||||
if (isLVT (c) && ! isLVT (ahead))
|
||||
break;
|
||||
if (! isLVT (c) && type != Character.NON_SPACING_MARK
|
||||
&& isL (ahead))
|
||||
break;
|
||||
if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK
|
||||
&& isV (ahead))
|
||||
break;
|
||||
if (! isT (c) && type != Character.NON_SPACING_MARK
|
||||
&& isT (ahead))
|
||||
break;
|
||||
|
||||
if (isLowSurrogate (c) && ! isHighSurrogate (ahead))
|
||||
break;
|
||||
if (! isLowSurrogate (c) && isHighSurrogate (ahead))
|
||||
break;
|
||||
if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2))
|
||||
break;
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
}
|
168
libjava/gnu/gcj/text/LineBreakIterator.java
Normal file
168
libjava/gnu/gcj/text/LineBreakIterator.java
Normal file
|
@ -0,0 +1,168 @@
|
|||
// Default word BreakIterator.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.text;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* @author Tom Tromey <tromey@cygnus.com>
|
||||
* @date March 22, 1999
|
||||
* Written using The Unicode Standard, Version 2.0.
|
||||
*/
|
||||
|
||||
public class LineBreakIterator extends BaseBreakIterator
|
||||
{
|
||||
public Object clone ()
|
||||
{
|
||||
return new LineBreakIterator (this);
|
||||
}
|
||||
|
||||
public LineBreakIterator ()
|
||||
{
|
||||
iter = null;
|
||||
}
|
||||
|
||||
private LineBreakIterator (LineBreakIterator other)
|
||||
{
|
||||
iter = (CharacterIterator) other.iter.clone();
|
||||
}
|
||||
|
||||
// Some methods to tell us different properties of characters.
|
||||
private final boolean isNb (char c)
|
||||
{
|
||||
return (c == 0x00a0 // NO-BREAK SPACE
|
||||
|| c == 0x2011 // NON-BREAKING HYPHEN
|
||||
|| c == 0xfeff); // ZERO WITH NO-BREAK SPACE
|
||||
}
|
||||
private final boolean isClose (int type)
|
||||
{
|
||||
return (type == Character.END_PUNCTUATION
|
||||
// Unicode book says "comma, period, ...", which I take to
|
||||
// mean "Po" class.
|
||||
|| type == Character.OTHER_PUNCTUATION);
|
||||
}
|
||||
private final boolean isIdeo (char c)
|
||||
{
|
||||
return (c >= 0x3040 && c <= 0x309f // Hiragana
|
||||
|| c >= 0x30a0 && c <= 0x30ff // Katakana
|
||||
|| c >= 0x4e00 && c <= 0x9fff // Han
|
||||
|| c >= 0x3100 && c <= 0x312f); // Bopomofo
|
||||
}
|
||||
|
||||
public int next ()
|
||||
{
|
||||
int end = iter.getEndIndex();
|
||||
if (iter.getIndex() == end)
|
||||
return DONE;
|
||||
|
||||
while (iter.getIndex() < end)
|
||||
{
|
||||
char c = iter.current();
|
||||
int type = Character.getType(c);
|
||||
|
||||
char n = iter.next();
|
||||
|
||||
if (n == CharacterIterator.DONE
|
||||
|| type == Character.PARAGRAPH_SEPARATOR
|
||||
|| type == Character.LINE_SEPARATOR)
|
||||
break;
|
||||
|
||||
// Handle two cases where we must scan for non-spacing marks.
|
||||
int start = iter.getIndex();
|
||||
if (type == Character.SPACE_SEPARATOR
|
||||
|| type == Character.START_PUNCTUATION
|
||||
|| isIdeo (c))
|
||||
{
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.NON_SPACING_MARK)
|
||||
n = iter.next();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
|
||||
if (type == Character.SPACE_SEPARATOR)
|
||||
{
|
||||
int nt = Character.getType(n);
|
||||
if (nt != Character.NON_SPACING_MARK
|
||||
&& nt != Character.SPACE_SEPARATOR
|
||||
&& ! isNb (n))
|
||||
break;
|
||||
}
|
||||
else if (type == Character.START_PUNCTUATION)
|
||||
{
|
||||
if (isIdeo (n))
|
||||
{
|
||||
// Open punctuation followed by non spacing marks
|
||||
// and then ideograph does not have a break in
|
||||
// it. So skip all this.
|
||||
start = iter.getIndex();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Ideograph preceded this character.
|
||||
if (isClose (Character.getType(n)))
|
||||
break;
|
||||
}
|
||||
}
|
||||
iter.setIndex(start);
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
|
||||
public int previous ()
|
||||
{
|
||||
int start = iter.getBeginIndex();
|
||||
if (iter.getIndex() == start)
|
||||
return DONE;
|
||||
|
||||
while (iter.getIndex() >= start)
|
||||
{
|
||||
char c = iter.previous();
|
||||
if (c == CharacterIterator.DONE)
|
||||
break;
|
||||
int type = Character.getType(c);
|
||||
|
||||
char n = iter.previous();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
iter.next();
|
||||
|
||||
int nt = Character.getType(n);
|
||||
// Break after paragraph separators.
|
||||
if (nt == Character.PARAGRAPH_SEPARATOR
|
||||
|| nt == Character.LINE_SEPARATOR)
|
||||
break;
|
||||
|
||||
// Skip non-spacing marks.
|
||||
int init = iter.getIndex();
|
||||
while (n != CharacterIterator.DONE && nt == Character.NON_SPACING_MARK)
|
||||
{
|
||||
n = iter.previous();
|
||||
nt = Character.getType(n);
|
||||
}
|
||||
|
||||
if (nt == Character.SPACE_SEPARATOR
|
||||
&& type != Character.SPACE_SEPARATOR
|
||||
&& type != Character.NON_SPACING_MARK
|
||||
&& ! isNb (c))
|
||||
break;
|
||||
if (! isClose (type) && isIdeo (n))
|
||||
break;
|
||||
if (isIdeo (c) && nt != Character.START_PUNCTUATION)
|
||||
break;
|
||||
iter.setIndex(init);
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
}
|
75
libjava/gnu/gcj/text/LocaleData_en.java
Normal file
75
libjava/gnu/gcj/text/LocaleData_en.java
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Generic English locale data for java.text.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.text;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
/**
|
||||
* @author Tom Tromey <tromey@cygnus.com>
|
||||
* @date March 4, 1999
|
||||
*/
|
||||
|
||||
public final class LocaleData_en extends ListResourceBundle
|
||||
{
|
||||
// These are for DateFormatSymbols.
|
||||
static final String[] ampmsDefault = {"AM", "PM" };
|
||||
static final String[] erasDefault = {"BC", "AD" };
|
||||
static final String localPatternCharsDefault = "GyMdkHmsSEDFwWahKz";
|
||||
static final String[] monthsDefault = {
|
||||
"January", "February", "March", "April", "May", "June",
|
||||
"July", "August", "September", "October", "November", "December", ""
|
||||
};
|
||||
static final String[] shortMonthsDefault = {
|
||||
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ""
|
||||
};
|
||||
static final String[] shortWeekdaysDefault = {
|
||||
"", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
|
||||
};
|
||||
static final String[] weekdaysDefault = {
|
||||
"", "Sunday", "Monday", "Tuesday",
|
||||
"Wednesday", "Thursday", "Friday", "Saturday"
|
||||
};
|
||||
|
||||
private static final Object[][] contents =
|
||||
{
|
||||
// These are for DecimalFormatSymbols.
|
||||
{ "decimalSeparator", "." },
|
||||
{ "digit", "#" },
|
||||
{ "exponential", "E" },
|
||||
{ "groupingSeparator", "," },
|
||||
{ "infinity", "\u221e" },
|
||||
{ "minusSign", "-" },
|
||||
{ "NaN", "\ufffd" },
|
||||
{ "patternSeparator", ";" },
|
||||
{ "percent", "%" },
|
||||
{ "perMill", "\u2030" },
|
||||
{ "zeroDigit", "0" },
|
||||
|
||||
// These are for NumberFormat.
|
||||
{ "numberFormat", "#,##0.###" },
|
||||
{ "percentFormat", "#,##0%" },
|
||||
|
||||
// These are for DateFormatSymbols.
|
||||
{ "ampm", ampmsDefault },
|
||||
{ "eras", erasDefault },
|
||||
{ "datePatternChars", localPatternCharsDefault },
|
||||
{ "months", monthsDefault },
|
||||
{ "shortMonths", shortMonthsDefault },
|
||||
{ "shortWeekdays", shortWeekdaysDefault },
|
||||
{ "weekdays", weekdaysDefault }
|
||||
};
|
||||
|
||||
protected Object[][] getContents ()
|
||||
{
|
||||
return contents;
|
||||
}
|
||||
}
|
71
libjava/gnu/gcj/text/LocaleData_en_US.java
Normal file
71
libjava/gnu/gcj/text/LocaleData_en_US.java
Normal file
|
@ -0,0 +1,71 @@
|
|||
// US English locale data for java.text.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.text;
|
||||
|
||||
import java.util.ListResourceBundle;
|
||||
|
||||
/**
|
||||
* @author Tom Tromey <tromey@cygnus.com>
|
||||
* @date March 4, 1999
|
||||
*/
|
||||
|
||||
public final class LocaleData_en_US extends ListResourceBundle
|
||||
{
|
||||
// These are for DateFormatSymbols.
|
||||
static String[][] zoneStringsDefault = {
|
||||
{ "PST", "Pacific Standard Time", "PST",
|
||||
/**/ "Pacific Daylight Time", "PDT", "San Francisco" },
|
||||
{ "MST", "Mountain Standard Time", "MST",
|
||||
/**/ "Mountain Daylight Time", "MDT", "Denver" },
|
||||
{ "PNT", "Mountain Standard Time", "MST",
|
||||
/**/ "Mountain Standard Time", "MST", "Phoenix" },
|
||||
{ "CST", "Central Standard Time", "CST",
|
||||
/**/ "Central Daylight Time", "CDT", "Chicago" },
|
||||
{ "EST", "Eastern Standard Time", "EST",
|
||||
/**/ "Eastern Daylight Time", "EDT", "Boston" },
|
||||
{ "IET", "Eastern Standard Time", "EST",
|
||||
/**/ "Eastern Standard Time", "EST", "Indianapolis" },
|
||||
{ "PRT", "Atlantic Standard Time", "AST",
|
||||
/**/ "Atlantic Daylight Time", "ADT", "Halifax" },
|
||||
{ "HST", "Hawaii Standard Time", "HST",
|
||||
/**/ "Hawaii Daylight Time", "HDT", "Honolulu" },
|
||||
{ "AST", "Alaska Standard Time", "AST",
|
||||
/**/ "Alaska Daylight Time", "ADT", "Anchorage" }
|
||||
};
|
||||
|
||||
private static final Object[][] contents =
|
||||
{
|
||||
// These are for DecimalFormatSymbols.
|
||||
{ "currency", "$" },
|
||||
{ "intlCurrencySymbol", "$" }, // FIXME?
|
||||
|
||||
// These are for NumberFormat.
|
||||
{ "currencyFormat", "$#,##0.00;($#,##0.00)" },
|
||||
|
||||
// These are for DateFormatSymbols.
|
||||
{ "zoneStrings", zoneStringsDefault },
|
||||
|
||||
// These are for DateFormat.
|
||||
{ "shortDateFormat", "M/d/yy" }, // Java's Y2K bug.
|
||||
{ "mediumDateFormat", "d-MMM-yy" },
|
||||
{ "longDateFormat", "MMMM d, yyyy" },
|
||||
{ "fullDateFormat", "EEEE MMMM d, yyyy G" },
|
||||
{ "shortTimeFormat", "h:mm a" },
|
||||
{ "mediumTimeFormat", "h:mm:ss a" },
|
||||
{ "longTimeFormat", "h:mm:ss a z" },
|
||||
{ "fullTimeFormat", "h:mm:ss;S 'o''clock' a z" }
|
||||
};
|
||||
|
||||
protected Object[][] getContents ()
|
||||
{
|
||||
return contents;
|
||||
}
|
||||
}
|
226
libjava/gnu/gcj/text/SentenceBreakIterator.java
Normal file
226
libjava/gnu/gcj/text/SentenceBreakIterator.java
Normal file
|
@ -0,0 +1,226 @@
|
|||
// Default sentence BreakIterator.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.text;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* @author Tom Tromey <tromey@cygnus.com>
|
||||
* @date March 23, 1999
|
||||
* Written using The Unicode Standard, Version 2.0.
|
||||
*/
|
||||
|
||||
public class SentenceBreakIterator extends BaseBreakIterator
|
||||
{
|
||||
public Object clone ()
|
||||
{
|
||||
return new SentenceBreakIterator (this);
|
||||
}
|
||||
|
||||
public SentenceBreakIterator ()
|
||||
{
|
||||
iter = null;
|
||||
}
|
||||
|
||||
private SentenceBreakIterator (SentenceBreakIterator other)
|
||||
{
|
||||
iter = (CharacterIterator) other.iter.clone();
|
||||
}
|
||||
|
||||
public int next ()
|
||||
{
|
||||
int end = iter.getEndIndex();
|
||||
if (iter.getIndex() == end)
|
||||
return DONE;
|
||||
|
||||
while (iter.getIndex() < end)
|
||||
{
|
||||
char c = iter.current();
|
||||
if (c == CharacterIterator.DONE)
|
||||
break;
|
||||
int type = Character.getType(c);
|
||||
|
||||
char n = iter.next();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
|
||||
// Always break after paragraph separator.
|
||||
if (type == Character.PARAGRAPH_SEPARATOR)
|
||||
break;
|
||||
|
||||
if (c == '!' || c == '?')
|
||||
{
|
||||
// Skip close punctuation.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.END_PUNCTUATION)
|
||||
n = iter.next();
|
||||
// Skip spaces.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.SPACE_SEPARATOR)
|
||||
n = iter.next();
|
||||
// Skip optional paragraph separator.
|
||||
if (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.PARAGRAPH_SEPARATOR)
|
||||
n = iter.next();
|
||||
|
||||
// There's always a break somewhere after `!' or `?'.
|
||||
break;
|
||||
}
|
||||
|
||||
if (c == '.')
|
||||
{
|
||||
int save = iter.getIndex();
|
||||
// Skip close punctuation.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.END_PUNCTUATION)
|
||||
n = iter.next();
|
||||
// Skip spaces. We keep count because we need at least
|
||||
// one for this period to represent a terminator.
|
||||
int spcount = 0;
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.SPACE_SEPARATOR)
|
||||
{
|
||||
n = iter.next();
|
||||
++spcount;
|
||||
}
|
||||
if (spcount > 0)
|
||||
{
|
||||
int save2 = iter.getIndex();
|
||||
// Skip over open puncutation.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.START_PUNCTUATION)
|
||||
n = iter.next();
|
||||
// Next character must not be lower case.
|
||||
if (n == CharacterIterator.DONE
|
||||
|| ! Character.isLowerCase(n))
|
||||
{
|
||||
iter.setIndex(save2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
iter.setIndex(save);
|
||||
}
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
|
||||
private final int previous_internal ()
|
||||
{
|
||||
int start = iter.getBeginIndex();
|
||||
if (iter.getIndex() == start)
|
||||
return DONE;
|
||||
|
||||
while (iter.getIndex() >= start)
|
||||
{
|
||||
char c = iter.previous();
|
||||
if (c == CharacterIterator.DONE)
|
||||
break;
|
||||
|
||||
char n = iter.previous();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
iter.next();
|
||||
int nt = Character.getType(n);
|
||||
|
||||
if (! Character.isLowerCase(c)
|
||||
&& (nt == Character.START_PUNCTUATION
|
||||
|| nt == Character.SPACE_SEPARATOR))
|
||||
{
|
||||
int save = iter.getIndex();
|
||||
int save_nt = nt;
|
||||
char save_n = n;
|
||||
// Skip open punctuation.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.START_PUNCTUATION)
|
||||
n = iter.previous();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
if (Character.getType(n) == Character.SPACE_SEPARATOR)
|
||||
{
|
||||
// Must have at least once space after the `.'.
|
||||
int save2 = iter.getIndex();
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.SPACE_SEPARATOR)
|
||||
n = iter.previous();
|
||||
// Skip close punctuation.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.END_PUNCTUATION)
|
||||
n = iter.previous();
|
||||
if (n == CharacterIterator.DONE || n == '.')
|
||||
{
|
||||
// Communicate location of actual end.
|
||||
period = iter.getIndex();
|
||||
iter.setIndex(save2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
iter.setIndex(save);
|
||||
nt = save_nt;
|
||||
n = save_n;
|
||||
}
|
||||
|
||||
if (nt == Character.PARAGRAPH_SEPARATOR)
|
||||
{
|
||||
// Communicate location of actual end.
|
||||
period = iter.getIndex();
|
||||
break;
|
||||
}
|
||||
else if (nt == Character.SPACE_SEPARATOR
|
||||
|| nt == Character.END_PUNCTUATION)
|
||||
{
|
||||
int save = iter.getIndex();
|
||||
// Skip spaces.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.SPACE_SEPARATOR)
|
||||
n = iter.previous();
|
||||
// Skip close punctuation.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.END_PUNCTUATION)
|
||||
n = iter.previous();
|
||||
int here = iter.getIndex();
|
||||
iter.setIndex(save);
|
||||
if (n == CharacterIterator.DONE || n == '!' || n == '?')
|
||||
{
|
||||
// Communicate location of actual end.
|
||||
period = here;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (n == '!' || n == '?')
|
||||
{
|
||||
// Communicate location of actual end.
|
||||
period = iter.getIndex();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
|
||||
public int previous ()
|
||||
{
|
||||
// We want to skip over the first sentence end to the second one.
|
||||
// However, at the end of the string we want the first end.
|
||||
int here = iter.getIndex();
|
||||
period = here;
|
||||
int first = previous_internal ();
|
||||
if (here == iter.getEndIndex() || first == DONE)
|
||||
return first;
|
||||
iter.setIndex(period);
|
||||
return previous_internal ();
|
||||
}
|
||||
|
||||
// This is used for communication between previous and
|
||||
// previous_internal.
|
||||
private int period;
|
||||
}
|
224
libjava/gnu/gcj/text/WordBreakIterator.java
Normal file
224
libjava/gnu/gcj/text/WordBreakIterator.java
Normal file
|
@ -0,0 +1,224 @@
|
|||
// Default word BreakIterator.
|
||||
|
||||
/* Copyright (C) 1999 Cygnus Solutions
|
||||
|
||||
This file is part of libgcj.
|
||||
|
||||
This software is copyrighted work licensed under the terms of the
|
||||
Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
|
||||
details. */
|
||||
|
||||
package gnu.gcj.text;
|
||||
|
||||
import java.text.BreakIterator;
|
||||
import java.text.CharacterIterator;
|
||||
|
||||
/**
|
||||
* @author Tom Tromey <tromey@cygnus.com>
|
||||
* @date March 22, 1999
|
||||
* Written using The Unicode Standard, Version 2.0.
|
||||
*/
|
||||
|
||||
public class WordBreakIterator extends BaseBreakIterator
|
||||
{
|
||||
public Object clone ()
|
||||
{
|
||||
return new WordBreakIterator (this);
|
||||
}
|
||||
|
||||
public WordBreakIterator ()
|
||||
{
|
||||
iter = null;
|
||||
}
|
||||
|
||||
private WordBreakIterator (WordBreakIterator other)
|
||||
{
|
||||
iter = (CharacterIterator) other.iter.clone();
|
||||
}
|
||||
|
||||
// Some methods to tell us different properties of characters.
|
||||
private final boolean isHira (char c)
|
||||
{
|
||||
return c >= 0x3040 && c <= 0x309f;
|
||||
}
|
||||
private final boolean isKata (char c)
|
||||
{
|
||||
return c >= 0x30a0 && c <= 0x30ff;
|
||||
}
|
||||
private final boolean isHan (char c)
|
||||
{
|
||||
return c >= 0x4e00 && c <= 0x9fff;
|
||||
}
|
||||
|
||||
public int next ()
|
||||
{
|
||||
int end = iter.getEndIndex();
|
||||
if (iter.getIndex() == end)
|
||||
return DONE;
|
||||
|
||||
while (iter.getIndex() < end)
|
||||
{
|
||||
char c = iter.current();
|
||||
if (c == CharacterIterator.DONE)
|
||||
break;
|
||||
int type = Character.getType(c);
|
||||
|
||||
char n = iter.next();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
|
||||
// Break after paragraph separators.
|
||||
if (type == Character.PARAGRAPH_SEPARATOR
|
||||
|| type == Character.LINE_SEPARATOR)
|
||||
break;
|
||||
|
||||
// Break between letters and non-letters.
|
||||
// FIXME: we treat apostrophe as part of a word. This
|
||||
// is an English-ism.
|
||||
boolean is_letter = Character.isLetter(c);
|
||||
if (c != '\'' && ! is_letter && type != Character.NON_SPACING_MARK
|
||||
&& Character.isLetter(n))
|
||||
break;
|
||||
|
||||
// Always break after certain symbols, such as punctuation.
|
||||
// This heuristic is derived from hints in the JCL book and is
|
||||
// not part of Unicode. It seems to be right, however.
|
||||
// FIXME: we treat apostrophe as part of a word. This
|
||||
// is an English-ism.
|
||||
if (c != '\''
|
||||
&& (type == Character.DASH_PUNCTUATION
|
||||
|| type == Character.START_PUNCTUATION
|
||||
|| type == Character.END_PUNCTUATION
|
||||
|| type == Character.CONNECTOR_PUNCTUATION
|
||||
|| type == Character.OTHER_PUNCTUATION
|
||||
|| type == Character.MATH_SYMBOL
|
||||
|| type == Character.CURRENCY_SYMBOL
|
||||
|| type == Character.MODIFIER_SYMBOL
|
||||
|| type == Character.OTHER_SYMBOL
|
||||
|| type == Character.FORMAT
|
||||
|| type == Character.CONTROL))
|
||||
break;
|
||||
|
||||
boolean is_hira = isHira (c);
|
||||
boolean is_kata = isKata (c);
|
||||
boolean is_han = isHan (c);
|
||||
|
||||
// Special case Japanese.
|
||||
if (! is_hira && ! is_kata && ! is_han
|
||||
&& type != Character.NON_SPACING_MARK
|
||||
&& (isHira (n) || isKata (n) || isHan (n)))
|
||||
break;
|
||||
|
||||
if (is_hira || is_kata || is_han || is_letter)
|
||||
{
|
||||
// Now we need to do some lookahead. We might need to do
|
||||
// quite a bit of lookahead, so we save our position and
|
||||
// restore it later.
|
||||
int save = iter.getIndex();
|
||||
// Skip string of non spacing marks.
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.NON_SPACING_MARK)
|
||||
n = iter.next();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
if ((is_hira && ! isHira (n))
|
||||
|| (is_kata && ! isHira (n) && ! isKata (n))
|
||||
|| (is_han && ! isHira (n) && ! isHan (n))
|
||||
// FIXME: we treat apostrophe as part of a word. This
|
||||
// is an English-ism.
|
||||
|| (is_letter && ! Character.isLetter(n) && n != '\''))
|
||||
break;
|
||||
iter.setIndex(save);
|
||||
}
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
|
||||
public int previous ()
|
||||
{
|
||||
int start = iter.getBeginIndex();
|
||||
if (iter.getIndex() == start)
|
||||
return DONE;
|
||||
|
||||
while (iter.getIndex() >= start)
|
||||
{
|
||||
char c = iter.previous();
|
||||
if (c == CharacterIterator.DONE)
|
||||
break;
|
||||
|
||||
boolean is_hira = isHira (c);
|
||||
boolean is_kata = isKata (c);
|
||||
boolean is_han = isHan (c);
|
||||
boolean is_letter = Character.isLetter(c);
|
||||
|
||||
char n = iter.previous();
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
iter.next();
|
||||
int type = Character.getType(n);
|
||||
// Break after paragraph separators.
|
||||
if (type == Character.PARAGRAPH_SEPARATOR
|
||||
|| type == Character.LINE_SEPARATOR)
|
||||
break;
|
||||
|
||||
// Break between letters and non-letters.
|
||||
// FIXME: we treat apostrophe as part of a word. This
|
||||
// is an English-ism.
|
||||
if (n != '\'' && ! Character.isLetter(n)
|
||||
&& type != Character.NON_SPACING_MARK
|
||||
&& is_letter)
|
||||
break;
|
||||
|
||||
// Always break after certain symbols, such as punctuation.
|
||||
// This heuristic is derived from hints in the JCL book and is
|
||||
// not part of Unicode. It seems to be right, however.
|
||||
// FIXME: we treat apostrophe as part of a word. This
|
||||
// is an English-ism.
|
||||
if (n != '\''
|
||||
&& (type == Character.DASH_PUNCTUATION
|
||||
|| type == Character.START_PUNCTUATION
|
||||
|| type == Character.END_PUNCTUATION
|
||||
|| type == Character.CONNECTOR_PUNCTUATION
|
||||
|| type == Character.OTHER_PUNCTUATION
|
||||
|| type == Character.MATH_SYMBOL
|
||||
|| type == Character.CURRENCY_SYMBOL
|
||||
|| type == Character.MODIFIER_SYMBOL
|
||||
|| type == Character.OTHER_SYMBOL
|
||||
|| type == Character.FORMAT
|
||||
|| type == Character.CONTROL))
|
||||
break;
|
||||
|
||||
// Special case Japanese.
|
||||
if ((is_hira || is_kata || is_han)
|
||||
&& ! isHira (n) && ! isKata (n) && ! isHan (n)
|
||||
&& type != Character.NON_SPACING_MARK)
|
||||
break;
|
||||
|
||||
// We might have to skip over non spacing marks to see what's
|
||||
// on the other side.
|
||||
if (! is_hira || (! is_letter && c != '\''))
|
||||
{
|
||||
int save = iter.getIndex();
|
||||
while (n != CharacterIterator.DONE
|
||||
&& Character.getType(n) == Character.NON_SPACING_MARK)
|
||||
n = iter.previous();
|
||||
iter.setIndex(save);
|
||||
// This is a strange case: a bunch of non-spacing marks at
|
||||
// the beginning. We treat the current location as a word
|
||||
// break.
|
||||
if (n == CharacterIterator.DONE)
|
||||
break;
|
||||
if ((isHira (n) && ! is_hira)
|
||||
|| (isKata (n) && ! is_hira && ! is_kata)
|
||||
|| (isHan (n) && ! is_hira && ! is_han)
|
||||
// FIXME: we treat apostrophe as part of a word. This
|
||||
// is an English-ism.
|
||||
|| (! is_letter && c != '\'' && Character.isLetter(n)))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return iter.getIndex();
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue