CollationElementIterator.java, [...]: New versions from GNU classpath.

2004-06-01 Michael Koch <konqueror@gmx.de> * java/text/CollationElementIterator.java, java/text/CollationKey.java, java/text/RuleBasedCollator.java: New versions from GNU classpath. * testsuite/libjava.mauve/xfails: Removed all java.text.CollationElementIterator tests. From-SVN: r82510
2004-05-31 22:16:31 +00:00 · 2004-05-31 22:16:31 +00:00 · 57807c3178
commit 57807c3178
parent f7dbd56c9a
5 changed files with 943 additions and 387 deletions
--- a/libjava/java/text/CollationElementIterator.java
+++ b/libjava/java/text/CollationElementIterator.java
@ -38,6 +38,8 @@ exception statement from your version. */

 package java.text;

+import java.util.Vector;
+
 /* Written using "Java Class Libraries", 2nd edition, plus online
 * API docs for JDK 1.2 from http://www.javasoft.com.
 * Status: Believed complete and correct to JDK 1.1.
@ -73,14 +75,26 @@ public final class CollationElementIterator
   */
  String text;

+  /**
+   * This is the index into the collation decomposition where we are currently scanning.
+   */
+  int index;
+
  /**
   * This is the index into the String where we are currently scanning.
   */
  int textIndex;

-  // A piece of lookahead.
-  boolean lookahead_set;
-  int lookahead;
+  /**
+   * Array containing the collation decomposition of the
+   * text given to the constructor.
+   */
+  private Object[] text_decomposition;
+
+  /**
+   * Array containing the index of the specified block.
+   */
+  private int[] text_indexes;

  /**
   * This method initializes a new instance of <code>CollationElementIterator</code>
@ -97,6 +111,35 @@ public final class CollationElementIterator
    setText (text);    
  }

+  RuleBasedCollator.CollationElement nextBlock()
+  {
+    if (index >= text_decomposition.length)
+      return null;
+    
+    RuleBasedCollator.CollationElement e =
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
+    
+    textIndex = text_indexes[index+1];
+
+    index++;
+
+    return e;
+  }
+
+  RuleBasedCollator.CollationElement previousBlock()
+  {
+    if (index == 0)
+      return null;
+    
+    index--;
+    RuleBasedCollator.CollationElement e =
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
+
+    textIndex = text_indexes[index+1];
+    
+    return e;
+  }
+
  /**
   * This method returns the collation ordering value of the next character sequence
   * in the string (it may be an extended character following collation rules).
@ -107,10 +150,29 @@ public final class CollationElementIterator
   */
  public int next()
  {
-    if (textIndex == text.length())
-      return NULLORDER;
+    RuleBasedCollator.CollationElement e = nextBlock();

-    return collator.ceiNext (this);
+    if (e == null)
+      return NULLORDER;
+    
+    return e.getValue();
+  }
+
+  /**
+   * This method returns the collation ordering value of the previous character
+   * in the string.  This method will return <code>NULLORDER</code> if the
+   * beginning of the string was reached.
+   *
+   * @return The collation ordering value.
+   */
+  public int previous()
+  {
+    RuleBasedCollator.CollationElement e = previousBlock();
+
+    if (e == null)
+      return NULLORDER;
+    
+    return e.getValue();
  }

  /**
@ -133,9 +195,8 @@ public final class CollationElementIterator
   */
  public void reset()
  {
+    index = 0;
    textIndex = 0;
-    lookahead_set = false;
-    lookahead = 0;
  }

  /**
@ -176,10 +237,152 @@ public final class CollationElementIterator
   */
  public void setText(String text)
  {
+    int idx = 0;
+    int idx_idx = 0;
+    int alreadyExpanded = 0;
+    int idxToMove = 0;
+
    this.text = text;
-    this.textIndex = 0;
-    this.lookahead_set = false;
-    this.lookahead = 0;
+    this.index = 0;
+
+    String work_text = text.intern();
+
+    Vector v = new Vector();
+    Vector vi = new Vector();
+
+    // Build element collection ordered as they come in "text".
+    while (idx < work_text.length())
+      {
+	String key, key_old;
+
+	Object object = null;
+	int p = 1;
+	
+	// IMPROVE: use a TreeMap with a prefix-ordering rule.
+	key_old = key = null;
+	do
+	  {
+	    if (object != null)
+	      key_old = key;
+	    key = work_text.substring (idx, idx+p);
+	    object = collator.prefix_tree.get (key);
+	    if (object != null && idx < alreadyExpanded)
+	      {
+		RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
+		if (prefix.expansion != null && 
+		    prefix.expansion.startsWith(work_text.substring(0, idx)))
+		{
+		  object = null;
+		  key = key_old;
+		}
+	      }
+	    p++;
+	  }
+	while (idx+p <= work_text.length());
+	
+	if (object == null)
+	  key = key_old;
+	
+	RuleBasedCollator.CollationElement prefix =
+	  (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
+
+	/*
+	 * First case: There is no such sequence in the database.
+	 * We will have to build one from the context.
+	 */
+	if (prefix == null)
+	  {
+	    /*
+	     * We are dealing with sequences in an expansion. They
+	     * are treated as accented characters (tertiary order).
+	     */
+	    if (alreadyExpanded > 0)
+	      {
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultAccentedElement (work_text.charAt (idx));
+		
+		v.add (e);
+		vi.add (new Integer(idx_idx));
+		idx++;
+		alreadyExpanded--;
+		if (alreadyExpanded == 0)
+		  {
+		    /* There is not any characters left in the expansion set.
+		     * We can increase the pointer in the source string.
+		     */
+		    idx_idx += idxToMove;
+		    idxToMove = 0; 
+		  }
+		else
+		  idx_idx++;
+	      }
+	    else
+	      {
+		/* This is a normal character. */
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultElement (work_text.charAt (idx));
+		Integer i_ref = new Integer(idx_idx);
+
+		/* Don't forget to mark it as a special sequence so the
+		 * string can be ordered.
+		 */
+		v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+		vi.add (i_ref);
+		v.add (e);
+		vi.add (i_ref);
+		idx_idx++;
+		idx++;
+	      }
+	    continue;
+	  }
+ 
+	/*
+	 * Second case: Here we have found a matching sequence.
+	 * Here we have an expansion string prepend it to the "work text" and
+	 * add the corresponding sorting element. We must also mark 
+	 */
+	if (prefix.expansion != null)
+	  {
+	    work_text = prefix.expansion
+	      + work_text.substring (idx+prefix.key.length());
+	    idx = 0;
+	    v.add (prefix);
+	    vi.add (new Integer(idx_idx));
+	    if (alreadyExpanded == 0)
+	      idxToMove = prefix.key.length();
+	    alreadyExpanded += prefix.expansion.length()-prefix.key.length();
+	  }
+	else
+	  {
+	    /* Third case: the simplest. We have got the prefix and it
+	     * has not to be expanded.
+	     */
+	    v.add (prefix);
+	    vi.add (new Integer(idx_idx));
+	    idx += prefix.key.length();
+	    /* If the sequence is in an expansion, we must decrease the
+	     * counter.
+	     */
+	    if (alreadyExpanded > 0)
+	      {
+		alreadyExpanded -= prefix.key.length();
+		if (alreadyExpanded == 0)
+		  {
+		    idx_idx += idxToMove;
+		    idxToMove = 0;
+		  }
+	      } else
+		idx_idx += prefix.key.length();
+	  }
+      }
+    
+    text_decomposition = v.toArray();
+    text_indexes = new int[vi.size()+1];
+    for (int i = 0; i < vi.size(); i++) 
+      {
+	text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
+      }
+    text_indexes[vi.size()] = text.length();
  }

  /**
@ -215,4 +418,50 @@ public final class CollationElementIterator
  {
    return textIndex;
  }
+
+  /**
+   * This method sets the iteration index position into the current
+   * <code>String</code> to the specified value.  This value must not
+   * be negative and must not be greater than the last index position
+   * in the <code>String</code>.
+   *
+   * @param offset The new iteration index position.
+   *
+   * @exception IllegalArgumentException If the new offset is not valid.
+   */
+  public void setOffset(int offset)
+  {
+    if (offset < 0)
+      throw new IllegalArgumentException("Negative offset: " + offset);
+
+    if (offset > (text.length() - 1))
+      throw new IllegalArgumentException("Offset too large: " + offset);
+    
+    for (index = 0; index < text_decomposition.length; index++)
+      {	
+	if (offset <= text_indexes[index])
+	  break;
+      }
+    /*
+     * As text_indexes[0] == 0, we should not have to take care whether index is
+     * greater than 0. It is always.
+     */
+    if (text_indexes[index] == offset)
+      textIndex = offset;
+    else
+      textIndex = text_indexes[index-1];
+  }
+
+  /**
+   * This method returns the maximum length of any expansion sequence that
+   * ends with the specified collation order value.  (Whatever that means).
+   *
+   * @param value The collation order value
+   *
+   * @param The maximum length of an expansion sequence.
+   */
+  public int getMaxExpansion(int value)
+  {
+    return 1;
+  }
 }
--- a/libjava/java/text/CollationKey.java
+++ b/libjava/java/text/CollationKey.java
@ -78,24 +78,13 @@ public final class CollationKey implements Comparable
  /**
   * This is the bit value for this key.
   */
-  private int[] key;
+  private byte[] key;

-  CollationKey(Collator collator, CollationElementIterator iter,
-	       String originalText, int strength)
+  CollationKey (Collator collator, String originalText, byte[] key)
  {
    this.collator = collator;
    this.originalText = originalText;
-
-    // Compute size of required array.
-    int size = 0;
-    while (RuleBasedCollator.next(iter, strength)
-	   != CollationElementIterator.NULLORDER)
-      ++size;
-
-    iter.reset();
-    key = new int[size];
-    for (int i = 0; i < size; i++)
-      key[i] = RuleBasedCollator.next(iter, strength);
+    this.key = key;
  }

  /**
@ -205,15 +194,6 @@ public final class CollationKey implements Comparable
   */
  public byte[] toByteArray()
  {
-    byte[] r = new byte[4 * key.length];
-    int off = 0;
-    for (int i = 0; i < key.length; ++i)
-      {
-	r[off++] = (byte) ((key[i] >>> 24) & 255);
-	r[off++] = (byte) ((key[i] >>> 16) & 255);
-	r[off++] = (byte) ((key[i] >>>  8) & 255);
-	r[off++] = (byte) ((key[i]       ) & 255);
-      }
-    return r;
+    return key;
  }
 }
--- a/libjava/java/text/RuleBasedCollator.java
+++ b/libjava/java/text/RuleBasedCollator.java