unicode-muncher.pl: Updated to version 2.1 from GNU classpath.

2004-07-09 Michael Koch <konqueror@gmx.de> * scripts/unicode-muncher.pl: Updated to version 2.1 from GNU classpath. Added some clarifications on where to find the needed files from www.unicode.org. * gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html, gnu/gcj/convert/UnicodeData-3.0.0.txt: Removed, these can directly be downloaded from www.unicode.org if needed. * gnu/java/lang/CharData.java: Regenerated. * include/java-chartables.h: Regenerated. * Makefile.am (ordinary_java_source_files): Removed gnu/java/lang/CharData.java. * Makefile.in: Regenerated. From-SVN: r84399
2004-07-09 21:00:12 +00:00 · 2004-07-09 21:00:12 +00:00 · 1f33f6b4c7
commit 1f33f6b4c7
parent dae1dd2e3c
8 changed files with 1753 additions and 12540 deletions
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@ -1,3 +1,18 @@
 2004-07-09  Michael Koch  <konqueror@gmx.de>
 	* scripts/unicode-muncher.pl: Updated to version 2.1
 	from GNU classpath. Added some clarifications on where to find the
 	needed files from www.unicode.org.
 	* gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html,
 	gnu/gcj/convert/UnicodeData-3.0.0.txt:
 	Removed, these can directly be downloaded from www.unicode.org if
 	needed. 
 	* gnu/java/lang/CharData.java: Regenerated.
 	* include/java-chartables.h: Regenerated.
 	* Makefile.am (ordinary_java_source_files):
 	Removed gnu/java/lang/CharData.java.
 	* Makefile.in: Regenerated.
 2004-07-09  Michael Koch  <konqueror@gmx.de>
 	* java/security/AccessControlContext.java,
--- a/libjava/Makefile.am
+++ b/libjava/Makefile.am
@ -2289,7 +2289,6 @@ gnu/java/io/NullOutputStream.java \
 gnu/java/io/ObjectIdentityWrapper.java \
 gnu/java/lang/ArrayHelper.java \
 gnu/java/lang/ClassHelper.java \
 gnu/java/lang/CharData.java \
 gnu/java/lang/MainThread.java \
 gnu/java/lang/reflect/TypeSignature.java \
 gnu/java/locale/Calendar.java \
--- a/libjava/Makefile.in
+++ b/libjava/Makefile.in
@ -1961,7 +1961,6 @@ gnu/java/io/NullOutputStream.java \
 gnu/java/io/ObjectIdentityWrapper.java \
 gnu/java/lang/ArrayHelper.java \
 gnu/java/lang/ClassHelper.java \
 gnu/java/lang/CharData.java \
 gnu/java/lang/MainThread.java \
 gnu/java/lang/reflect/TypeSignature.java \
 gnu/java/locale/Calendar.java \
@ -3237,9 +3236,8 @@ DEP_FILES =  .deps/$(srcdir)/$(CONVERT_DIR)/gen-from-JIS.P \
 .deps/gnu/java/io/ClassLoaderObjectInputStream.P \
 .deps/gnu/java/io/NullOutputStream.P \
 .deps/gnu/java/io/ObjectIdentityWrapper.P \
-.deps/gnu/java/lang/ArrayHelper.P .deps/gnu/java/lang/CharData.P \
+.deps/gnu/java/lang/ArrayHelper.P .deps/gnu/java/lang/ClassHelper.P \
-.deps/gnu/java/lang/ClassHelper.P .deps/gnu/java/lang/MainThread.P \
+.deps/gnu/java/lang/MainThread.P .deps/gnu/java/lang/natMainThread.P \
 .deps/gnu/java/lang/natMainThread.P \
 .deps/gnu/java/lang/reflect/TypeSignature.P \
 .deps/gnu/java/locale/Calendar.P .deps/gnu/java/locale/Calendar_de.P \
 .deps/gnu/java/locale/Calendar_en.P .deps/gnu/java/locale/Calendar_nl.P \
--- a/libjava/gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html
+++ b/libjava/gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html
@ -1,345 +0,0 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
       "http://www.w3.org/TR/REC-html40/loose.dtd"> 
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <link rel="stylesheet" href="http://www.unicode.org/unicode.css" type="text/css">
 <title>Unicode Character Database</title>
 </head>
 <body>
 <h1>UNICODE CHARACTER DATABASE<br>  
 Version 3.0.0</h1>
 <table border="1" cellspacing="2" cellpadding="0" height="87" width="100%">
  <tr>
    <td valign="TOP" width="144">Revision</td>
    <td valign="TOP">3.0.0</td>
  </tr>
  <tr>
    <td valign="TOP" width="144">Authors</td>
    <td valign="TOP">Mark Davis and Ken Whistler</td>
  </tr>
  <tr>
    <td valign="TOP" width="144">Date</td>
    <td valign="TOP">1999-09-11</td>
  </tr>
  <tr>
    <td valign="TOP" width="144">This Version</td>
    <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
  </tr>
  <tr>
    <td valign="TOP" width="144">Previous Version</td>
    <td valign="TOP">n/a</td>
  </tr>
  <tr>
    <td valign="TOP" width="144">Latest Version</td>
    <td valign="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeCharacterDatabase-3.0.0.html</a></td>
  </tr>
 </table>
 <p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.</p>  
 <h2>Disclaimer</h2>  
 <p>The Unicode Character Database is provided as is by Unicode, Inc. No claims   
 are made as to fitness for any particular purpose. No warranties of any kind are   
 expressed or implied. The recipient agrees to determine applicability of   
 information provided. If this file has been purchased on magnetic or optical   
 media from Unicode, Inc., the sole remedy for any claim will be exchange of   
 defective media within 90 days of receipt.</p>  
 <p>This disclaimer is applicable for all other data files accompanying the   
 Unicode Character Database, some of which have been compiled by the Unicode   
 Consortium, and some of which have been supplied by other sources.</p>  
 <h2>Limitations on Rights to Redistribute This Data</h2>  
 <p>Recipient is granted the right to make copies in any form for internal   
 distribution and to freely use the information supplied in the creation of   
 products supporting the Unicode<sup>TM</sup> Standard. The files in the Unicode   
 Character Database can be redistributed to third parties or other organizations   
 (whether for profit or not) as long as this notice and the disclaimer notice are   
 retained. Information can be extracted from these files and used in   
 documentation or programs, as long as there is an accompanying notice indicating   
 the source.</p>  
 <h2>Introduction</h2>  
 <p>The Unicode Character Database is a set of files that define the Unicode   
 character properties and internal mappings. For more information about character   
 properties and mappings, see <i><a href="http://www.unicode.org/unicode/uni2book/u2.html">The   
 Unicode Standard</a></i>.</p>  
 <p>The Unicode Character Database has been updated to reflect Version 3.0 of the   
 Unicode Standard, with many characters added to those published in Version 2.0.   
 A number of corrections have also been made to case mappings or other errors in   
 the database noted since the publication of Version 2.0. Normative bidirectional   
 properties have also been modified to reflect decisions of the Unicode Technical   
 Committee.</p>  
 <p>For more information on versions of the Unicode Standard and how to reference   
 them, see <a href="http://www.unicode.org/unicode/standard/versions/">http://www.unicode.org/unicode/standard/versions/</a>.</p>  
 <h2>Conformance</h2>  
 <p>Character properties may be either normative or informative. <i>Normative</i>   
 means that implementations that claim conformance to the Unicode Standard (at a   
 particular version) and which make use of a particular property or field must   
 follow the specifications of the standard for that property or field in order to   
 be conformant. The term <i>normative</i> when applied to a property or field of   
 the Unicode Character Database, does <i>not</i> mean that the value of that   
 field will never change. Corrections and extensions to the standard in the   
 future may require minor changes to normative values, even though the Unicode   
 Technical Committee strives to minimize such changes. An<i> informative </i>property   
 or field is strongly recommended, but a conformant implementation is free to use   
 or change such values as it may require while still being conformant to the   
 standard. Particular implementations may choose to override the properties and   
 mappings that are not normative. In that case, it is up to the implementer to   
 establish a protocol to convey that information.</p>  
 <h2>Files</h2>  
 <p>The following summarizes the files in the Unicode Character Database. &nbsp;For   
 more information about these files, see the referenced technical report or   
 section of Unicode Standard, Version 3.0.</p>  
 <p><b>UnicodeData.txt (Chapter 4)</b>  
 <ul>  
  <li>The main file in the Unicode Character Database.</li>  
  <li>For detailed information on the format, see <a href="UnicodeData.html">UnicodeData.html</a>.   
    This file also characterizes which properties are normative and which are   
    informative.</li>  
 </ul>  
 <p><b>PropList.txt (Chapter 4)</b>  
 <ul>  
  <li>Additional informative properties list: <i>Alphabetic, Ideographic,</i>   
    and <i>Mathematical</i>, among others.</li>  
 </ul>  
 <p><b>SpecialCasing.txt (Chapter 4)</b>  
 <ul>  
  <li>List of informative special casing properties, including one-to-many   
    mappings such as SHARP S =&gt; &quot;SS&quot;, and locale-specific mappings,   
    such as for Turkish <i>dotless i</i>.</li>  
 </ul>  
 <p><b>Blocks.txt (Chapter 14)</b>  
 <ul>  
  <li>List of normative block names.</li>  
 </ul>  
 <p><b>Jamo.txt (Chapter 4)</b>  
 <ul>  
  <li>List of normative Jamo short names, used in deriving HANGUL SYLLABLE names   
    algorithmically.</li>  
 </ul>  
 <p><b>ArabicShaping.txt (Section 8.2)</b>  
 <ul>  
  <li>Basic Arabic and Syriac character shaping properties, such as initial,   
    medial and final shapes. These properties are normative for minimal shaping   
    of Arabic and Syriac. </li>  
 </ul>  
 <p><b>NamesList.txt (Chapter 14)</b>  
 <ul>  
  <li>This file duplicates some of the material in the UnicodeData file, and   
    adds informative annotations uses in the character charts, as printed in the   
    Unicode Standard. </li>  
  <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches   
    the appropriate version of the book. Changes in the Unicode Character   
    Database since then may not be reflected in these files, since they are   
    primarily of archival interest.</li>  
 </ul>  
 <p><b>Index.txt (Chapter 14)</b>  
 <ul>  
  <li>Informative index to Unicode characters, as printed in the Unicode   
    Standard</li>  
  <li><b>Note: </b>The information in NamesList.txt and Index.txt files matches   
    the appropriate version of the book. Changes in the Unicode Character   
    Database since then may not be reflected in these files, since they are   
    primarily of archival interest.</li>  
 </ul>  
 <p><b>CompositionExclusions.txt (<a href="http://www.unicode.org/unicode/reports/tr15/">UTR#15   
 Unicode Normalization Forms</a>)</b>  
 <ul>  
  <li>Normative properties for normalization.</li>  
 </ul>  
 <p><b>LineBreak.txt (<a href="http://www.unicode.org/unicode/reports/tr14/">UTR   
 #14: Line Breaking Properties</a>)</b>  
 <ul>  
  <li>Normative and informative properties for line breaking. To see which   
    properties are informative and which are normative, consult UTR#14.</li>  
 </ul>  
 <p><b>EastAsianWidth.txt (<a href="http://www.unicode.org/unicode/reports/tr11/">UTR   
 #11: East Asian Character Width</a>)</b>  
 <ul>  
  <li>Informative properties for determining the choice of wide vs. narrow   
    glyphs in East Asian contexts.</li>  
 </ul>  
 <p><b>diffXvY.txt</b>  
 <ul>  
  <li>Mechanically-generated informative files containing accumulated   
    differences between successive versions of UnicodeData.txt</li>  
 </ul>  
 </body>  
 </html>  
--- a/libjava/gnu/gcj/convert/UnicodeData-3.0.0.txt
+++ b/libjava/gnu/gcj/convert/UnicodeData-3.0.0.txt
--- a/libjava/gnu/java/lang/CharData.java
+++ b/libjava/gnu/java/lang/CharData.java
--- a/libjava/include/java-chartables.h
+++ b/libjava/include/java-chartables.h
--- a/libjava/scripts/unicode-muncher.pl
+++ b/libjava/scripts/unicode-muncher.pl
@ -1,6 +1,6 @@
 #!/usr/bin/perl -w
 # unicode-muncher.pl -- generate Unicode database for java.lang.Character
-# Copyright (C) 1998, 2002 Free Software Foundation, Inc.
+# Copyright (C) 1998, 2002, 2004  Free Software Foundation, Inc.
 #
 # This file is part of GNU Classpath.
 #
@ -36,18 +36,22 @@
 # obligated to do so.  If you do not wish to do so, delete this
 # exception statement from your version.
-# Code for reading UnicodeData.txt and generating the code for
+# Code for reading UnicodeData-3.0.0.txt and SpecialCasing-2.txt to generate
-# gnu.java.lang.CharData.  For now, the relevant Unicode definition files
+# the code for gnu.java.lang.CharData. The relevant files can be found here:
-# are found in libjava/gnu/gcj/convert/.
+#
 #   http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt
 #   http://www.unicode.org/Public/3.0-Update/SpecialCasing-2.txt
 #
 # Inspired by code from Jochen Hoenicke.
 # author Eric Blake <ebb9@email.byu.edu>
 #
-# Usage: ./unicode-muncher <UnicodeData.txt> <CharData.java>
+# Usage: ./unicode-muncher <UnicodeData.txt> <SpecialCasing> <CharData.java>
 #   where <UnicodeData.txt> is obtained from www.unicode.org (named
-#   UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
+#   UnicodeData-3.0.0.txt for Unicode version 3.0.0), <SpecialCasing>
-#   is the final location for the Java interface gnu.java.lang.CharData.
+#   is obtained from www.unicode too (named SpecialCasing-2.txt for Unicode
-#   As of JDK 1.4, use Unicode version 3.0.0 for best results.
+#   version 3.0.0), and <CharData.java> is the final location for the Java
 #   interface gnu.java.lang.CharData. As of JDK 1.4, use Unicode version 3.0.0
 #   for best results.
 ##
 ## Convert a 16-bit integer to a Java source code String literal character
@ -75,20 +79,42 @@ my @DIRCODES = qw(L R AL EN ES ET AN CS NSM BN B S WS ON LRE LRO RLE RLO PDF);
 my $NOBREAK_FLAG  = 32;
 my $MIRRORED_FLAG = 64;
 my %special = ();
 my @info = ();
 my $titlecase = "";
 my $count = 0;
 my $range = 0;
-die "Usage: $0 <UnicodeData.txt> <CharData.java>" unless @ARGV == 2;
+die "Usage: $0 <UnicodeData.txt> <SpecialCasing.txt> <CharData.java>"
-open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
+    unless @ARGV == 3;
 $| = 1;
 print "GNU Classpath Unicode Attribute Database Generator 2.1\n";
 print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n";
 # Stage 0: Parse the special casing file
 print "Parsing special casing file\n";
 open (SPECIAL, "< $ARGV[1]") || die "Can't open special casing file: $!\n";
 while (<SPECIAL>) {
    next if /^\#/;
    my ($ch, undef, undef, $upper) = split / *; */;
    # This grabs only the special casing for multi-char uppercase. Note that
    # there are no multi-char lowercase, and that Sun ignores multi-char
    # titlecase rules. This script omits 3 special cases in Unicode 3.0.0,
    # which must be hardcoded in java.lang.String:
    #  \u03a3 (Sun ignores this special case)
    #  \u0049 - lowercases to \u0131, but only in Turkish locale
    #  \u0069 - uppercases to \u0130, but only in Turkish locale
    next unless defined $upper and $upper =~ / /;
    $special{hex $ch} = [map {hex} split ' ', $upper];
 }
 close SPECIAL;
 # Stage 1: Parse the attribute file
 $| = 1;
 print "GNU Classpath Unicode Attribute Database Generator 2.0\n";
 print "Copyright (C) 1998, 2002 Free Software Foundation, Inc.\n";
 print "Parsing attributes file";
-while(<UNICODE>) {
+open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
 while (<UNICODE>) {
    print "." unless $count++ % 1000;
    chomp;
    s/\r//g;
@ -142,6 +168,8 @@ while(<UNICODE>) {
            last;
        }
    }
    $direction <<= 2;
    $direction += $#{$special{$ch}} if defined $special{$ch};
    if ($range) {
        die "Expecting end of range at $ch\n" unless $name =~ /Last>$/;
@ -167,9 +195,7 @@ my @charinfo = ();
 for my $ch (0 .. 0xffff) {
    print "." unless $count++ % 0x1000;
-    if (! defined $info[$ch]) {
+    $info[$ch] = pack("n5", 0, -1, 0, 0, -4) unless defined $info[$ch];
        $info[$ch] = pack("n5", 0, -1, 0, 0, -1);
    }
    my ($type, $numVal, $upper, $lower, $direction) = unpack("n5", $info[$ch]);
    if (! exists $charhash{$info[$ch]}) {
@ -209,7 +235,7 @@ for my $i (3 .. 8) {
    for ($j = $blksize - 1; $j > 0; $j--) {
        my %tails = ();
        for $k (0 .. $#blkarray) {
-            next if ! defined $blkarray[$k];
+            next unless defined $blkarray[$k];
            my $len = length $blkarray[$k];
            my $tail = substr $blkarray[$k], $len - $j * 2;
            if (exists $tails{$tail}) {
@ -222,12 +248,12 @@ for my $i (3 .. 8) {
        # tails are calculated, now calculate the heads and merge.
      BLOCK:
        for $k (0 .. $#blkarray) {
-            next if ! defined $blkarray[$k];
+            next unless defined $blkarray[$k];
            my $tomerge = $k;
            while (1) {
                my $head = substr($blkarray[$tomerge], 0, $j * 2);
                my $entry = $tails{$head};
-                next BLOCK if ! defined $entry;
+                next BLOCK unless defined $entry;
                my $other = shift @{$entry};
                if ($other == $tomerge) {
@ -297,10 +323,10 @@ die "UTF-8 limit of blocks may be exceeded: " . scalar(@blocks) . "\n"
 die "UTF-8 limit of data may be exceeded: " . length($bestblkstr) . "\n"
    if length($bestblkstr) > 0xffff / 3;
 {
-    print "Generating $ARGV[1] with shift of $bestshift";
+    print "Generating $ARGV[2] with shift of $bestshift";
    my ($i, $j);
-    open OUTPUT, "> $ARGV[1]" or die "Failed creating output file: $!\n";
+    open OUTPUT, "> $ARGV[2]" or die "Failed creating output file: $!\n";
    print OUTPUT <<EOF;
 /* gnu/java/lang/CharData -- Database for java.lang.Character Unicode info
   Copyright (C) 2002 Free Software Foundation, Inc.
@ -345,8 +371,9 @@ package gnu.java.lang;
 /**
 * This contains the info about the unicode characters, that
 * java.lang.Character needs.  It is generated automatically from
- * <code>$ARGV[0]</code>, by some
+ * <code>$ARGV[0]</code> and
- * perl scripts. This Unicode definition file can be found on the
+ * <code>$ARGV[1]</code>, by some
 * perl scripts. These Unicode definition files can be found on the
 * <a href="http://www.unicode.org">http://www.unicode.org</a> website.
 * JDK 1.4 uses Unicode version 3.0.0.
 *
@ -358,13 +385,18 @@ package gnu.java.lang;
 * into the attribute tables <code>UPPER</code>, <code>LOWER</code>,
 * <code>NUM_VALUE</code>, and <code>DIRECTION</code>.  Notice that the
 * attribute tables are much smaller than 0xffff entries; as many characters
- * in Unicode share common attributes.  Finally, there is a listing for
+ * in Unicode share common attributes.  The DIRECTION table also contains
- * <code>TITLE</code> exceptions (most characters just have the same
+ * a field for detecting characters with multi-character uppercase expansions.
- * title case as upper case).
+ * Next, there is a listing for <code>TITLE</code> exceptions (most characters
 * just have the same title case as upper case).  Finally, there are two
 * tables for multi-character capitalization, <code>UPPER_SPECIAL</code>
 * which lists the characters which are special cased, and
 * <code>UPPER_EXPAND</code>, which lists their expansion.
 *
 * \@author scripts/unicode-muncher.pl (written by Jochen Hoenicke,
 *         Eric Blake)
 * \@see Character
 * \@see String
 */
 public interface CharData
 {
@ -417,7 +449,7 @@ EOF
        print OUTPUT $i ? "\n    + \"" : "    = \"";
        for $j (0 .. 10) {
            last if $len <= $i * 11 + $j;
-            my $val = unpack "n", substr($bestblkstr, 2 * ($i*11 + $j), 2);
+            my $val = unpack "n", substr($bestblkstr, 2 * ($i * 11 + $j), 2);
            print OUTPUT javaChar($val);
        }
        print OUTPUT "\"";
@ -451,10 +483,12 @@ EOF
 ;
  /**
-   * This is the attribute table for computing the uppercase representation
+   * This is the attribute table for computing the single-character uppercase
-   * of a character.  The value is the signed difference between the
+   * representation of a character.  The value is the signed difference
-   * character and its uppercase version.  Note that this is stored as an
+   * between the character and its uppercase version.  Note that this is
-   * unsigned char since this is a String literal.
+   * stored as an unsigned char since this is a String literal.  When
   * capitalizing a String, you must first check if a multi-character uppercase
   * sequence exists before using this character.
   */
  String UPPER
 EOF
@ -483,11 +517,11 @@ EOF
 EOF
    $len = @charinfo;
-    for ($i = 0; $i < $len / 11; $i++) {
+    for ($i = 0; $i < $len / 13; $i++) {
        print OUTPUT $i ? "\n    + \"" : "    = \"";
-        for $j (0 .. 10) {
+        for $j (0 .. 12) {
-            last if $len <= $i * 11 + $j;
+            last if $len <= $i * 13 + $j;
-            my $val = $charinfo[$i * 11 + $j][2];
+            my $val = $charinfo[$i * 13 + $j][2];
            print OUTPUT javaChar($val);
        }
        print OUTPUT "\"";
@ -498,19 +532,25 @@ EOF
  /**
   * This is the attribute table for computing the directionality class
-   * of a character.  At present, the value is in the range 0 - 18 if the
+   * of a character, as well as a marker of characters with a multi-character
-   * character has a direction, otherwise it is -1.  Note that this is
+   * capitalization.  The direction is taken by performing a signed shift
-   * stored as an unsigned char since this is a String literal.
+   * right by 2 (where a result of -1 means an unknown direction, such as
   * for undefined characters). The lower 2 bits form a count of the
   * additional characters that will be added to a String when performing
   * multi-character uppercase expansion. This count is also used, along with
   * the offset in UPPER_SPECIAL, to determine how much of UPPER_EXPAND to use
   * when performing the case conversion. Note that this information is stored
   * as an unsigned char since this is a String literal.
   */
  String DIRECTION
 EOF
    $len = @charinfo;
-    for ($i = 0; $i < $len / 11; $i++) {
+    for ($i = 0; $i < $len / 17; $i++) {
        print OUTPUT $i ? "\n    + \"" : "    = \"";
-        for $j (0 .. 10) {
+        for $j (0 .. 16) {
-            last if $len <= $i * 11 + $j;
+            last if $len <= $i * 17 + $j;
-            my $val = $charinfo[$i * 11 + $j][3];
+            my $val = $charinfo[$i * 17 + $j][3];
            print OUTPUT javaChar($val);
        }
        print OUTPUT "\"";
@ -520,10 +560,10 @@ EOF
 ;
  /**
-   * This is the listing of titlecase special cases (all other character
+   * This is the listing of titlecase special cases (all other characters
   * can use <code>UPPER</code> to determine their titlecase).  The listing
-   * is a sequence of character pairs; converting the first character of the
+   * is a sorted sequence of character pairs; converting the first character
-   * pair to titlecase produces the second character.
+   * of the pair to titlecase produces the second character.
   */
  String TITLE
 EOF
@ -533,7 +573,64 @@ EOF
        print OUTPUT $i ? "\n    + \"" : "    = \"";
        for $j (0 .. 10) {
            last if $len <= $i * 11 + $j;
-            my $val = unpack "n", substr($titlecase, 2 * ($i*11 + $j), 2);
+            my $val = unpack "n", substr($titlecase, 2 * ($i * 11 + $j), 2);
            print OUTPUT javaChar($val);
        }
        print OUTPUT "\"";
    }
    print OUTPUT <<EOF;
 ;
  /**
   * This is a listing of characters with multi-character uppercase sequences.
   * A character appears in this list exactly when it has a non-zero entry
   * in the low-order 2-bit field of DIRECTION.  The listing is a sorted
   * sequence of pairs (hence a binary search on the even elements is an
   * efficient way to lookup a character). The first element of a pair is the
   * character with the expansion, and the second is the index into
   * UPPER_EXPAND where the expansion begins. Use the 2-bit field of
   * DIRECTION to determine where the expansion ends.
   */
  String UPPER_SPECIAL
 EOF
    my @list = sort {$a <=> $b} keys %special;
    my $expansion = "";
    my $offset = 0;
    $len = @list;
    for ($i = 0; $i < $len / 5; $i++) {
        print OUTPUT $i ? "\n    + \"" : "    = \"";
        for $j (0 .. 4) {
            last if $len <= $i * 5 + $j;
            my $ch = $list[$i * 5 + $j];
            print OUTPUT javaChar($ch);
            print OUTPUT javaChar($offset);
            $offset += @{$special{$ch}};
            $expansion .= pack "n*", @{$special{$ch}};
        }
        print OUTPUT "\"";
    }
    print OUTPUT <<EOF;
 ;
  /**
   * This is the listing of special case multi-character uppercase sequences.
   * Characters listed in UPPER_SPECIAL index into this table to find their
   * uppercase expansion. Remember that you must also perform special-casing
   * on two single-character sequences in the Turkish locale, which are not
   * covered here in CharData.
   */
  String UPPER_EXPAND
 EOF
    $len = length($expansion) / 2;
    for ($i = 0; $i < $len / 11; $i++) {
        print OUTPUT $i ? "\n    + \"" : "    = \"";
        for $j (0 .. 10) {
            last if $len <= $i * 11 + $j;
            my $val = unpack "n", substr($expansion, 2 * ($i * 11 + $j), 2);
            print OUTPUT javaChar($val);
        }
        print OUTPUT "\"";