public class

TrieIterator

extends Object
implements RangeValueIterator
/*
 * Copyright (c) 2005, 2006, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/*
 *******************************************************************************
 * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
 * Agreement between IBM and Sun. This technology is protected by multiple     *
 * US and International patents. This notice and attribution to IBM may not    *
 * to removed.                                                                 *
 *******************************************************************************
 */

package sun.text.normalizer;

/**
 * <p>Class enabling iteration of the values in a Trie.</p>
 * <p>Result of each iteration contains the interval of codepoints that have
 * the same value type and the value type itself.</p>
 * <p>The comparison of each codepoint value is done via extract(), which the
 * default implementation is to return the value as it is.</p>
 * <p>Method extract() can be overwritten to perform manipulations on
 * codepoint values in order to perform specialized comparison.</p>
 * <p>TrieIterator is designed to be a generic iterator for the CharTrie
 * and the IntTrie, hence to accommodate both types of data, the return
 * result will be in terms of int (32 bit) values.</p>
 * <p>See com.ibm.icu.text.UCharacterTypeIterator for examples of use.</p>
 * <p>Notes for porting utrie_enum from icu4c to icu4j:<br>
 * Internally, icu4c's utrie_enum performs all iterations in its body. In Java
 * sense, the caller will have to pass a object with a callback function
 * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit,
 * uint32_t value) into utrie_enum. utrie_enum will then find ranges of
 * codepoints with the same value as determined by
 * UTrieEnumValue(const void *context, uint32_t value). for each range,
 * utrie_enum calls the callback function to perform a task. In this way,
 * icu4c performs the iteration within utrie_enum.
 * To follow the JDK model, icu4j is slightly different from icu4c.
 * Instead of requesting the caller to implement an object for a callback.
 * The caller will have to implement a subclass of TrieIterator, fleshing out
 * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j,
 * the caller will have to code his own iteration and flesh out the task
 * (equivalent to UTrieEnumRange) to be performed in the iteration loop.
 * </p>
 * <p>There are basically 3 usage scenarios for porting:</p>
 * <p>1) UTrieEnumValue is the only implemented callback then just implement a
 * subclass of TrieIterator and override the extract(int) method. The
 * extract(int) method is analogus to UTrieEnumValue callback.
 * </p>
 * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement
 * a subclass of TrieIterator, override the extract method and iterate, e.g
 * </p>
 * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange,
 *               set);<br>
 * In Java :<br>
 * <pre>
 * class TrieIteratorImpl extends TrieIterator{
 *     public TrieIteratorImpl(Trie data){
 *         super(data);
 *     }
 *     public int extract(int value){
 *         // port the implementation of _enumPropertyStartsValue here
 *     }
 * }
 * ....
 * TrieIterator fcdIter  = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
 * while(fcdIter.next(result)) {
 *     // port the implementation of _enumPropertyStartsRange
 * }
 * </pre>
 * </p>
 * <p>3) UTrieEnumRange is the only implemented callback then just implement
 * the while loop, when utrie_enum is called
 * <pre>
 * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
 * TrieIterator fcdIter  = new TrieIterator(fcdTrieImpl.fcdTrie);
 * while(fcdIter.next(result)){
 *     set.add(result.start);
 * }
 * </pre>
 * </p>
 * @author synwee
 * @see com.ibm.icu.impl.Trie
 * @see com.ibm.icu.lang.UCharacterTypeIterator
 * @since release 2.1, Jan 17 2002
 */
public class TrieIterator implements RangeValueIterator

{
    // public constructor ---------------------------------------------

    /**
    * TrieEnumeration constructor
    * @param trie to be used
    * @exception IllegalArgumentException throw when argument is null.
    * @draft 2.1
    */
    public TrieIterator(Trie trie)
    {
        if (trie == null) {
            throw new IllegalArgumentException(
                                          "Argument trie cannot be null");
        }
        m_trie_             = trie;
        // synwee: check that extract belongs to the child class
        m_initialValue_     = extract(m_trie_.getInitialValue());
        reset();
    }

    // public methods -------------------------------------------------

    /**
    * <p>Returns true if we are not at the end of the iteration, false
    * otherwise.</p>
    * <p>The next set of codepoints with the same value type will be
    * calculated during this call and returned in the arguement element.</p>
    * @param element return result
    * @return true if we are not at the end of the iteration, false otherwise.
    * @exception NoSuchElementException - if no more elements exist.
    * @see com.ibm.icu.util.RangeValueIterator.Element
    * @draft 2.1
    */
    public final boolean next(Element element)
    {
        if (m_nextCodepoint_ > UCharacter.MAX_VALUE) {
            return false;
        }
        if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE &&
            calculateNextBMPElement(element)) {
            return true;
        }
        calculateNextSupplementaryElement(element);
        return true;
    }

    /**
    * Resets the iterator to the beginning of the iteration
    * @draft 2.1
    */
    public final void reset()
    {
        m_currentCodepoint_ = 0;
        m_nextCodepoint_    = 0;
        m_nextIndex_        = 0;
        m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_;
        if (m_nextBlock_ == 0) {
            m_nextValue_ = m_initialValue_;
        }
        else {
            m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_));
        }
        m_nextBlockIndex_ = 0;
        m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_;
    }

    // protected methods ----------------------------------------------

    /**
    * Called by next() to extracts a 32 bit value from a trie value
    * used for comparison.
    * This method is to be overwritten if special manipulation is to be done
    * to retrieve a relevant comparison.
    * The default function is to return the value as it is.
    * @param value a value from the trie
    * @return extracted value
    * @draft 2.1
    */
    protected int extract(int value)
    {
        return value;
    }

    // private methods ------------------------------------------------

    /**
    * Set the result values
    * @param element return result object
    * @param start codepoint of range
    * @param limit (end + 1) codepoint of range
    * @param value common value of range
    */
    private final void setResult(Element element, int start, int limit,
                                 int value)
    {
        element.start = start;
        element.limit = limit;
        element.value = value;
    }

    /**
    * Finding the next element.
    * This method is called just before returning the result of
    * next().
    * We always store the next element before it is requested.
    * In the case that we have to continue calculations into the
    * supplementary planes, a false will be returned.
    * @param element return result object
    * @return true if the next range is found, false if we have to proceed to
    *         the supplementary range.
    */
    private final boolean calculateNextBMPElement(Element element)
    {
        int currentBlock    = m_nextBlock_;
        int currentValue    = m_nextValue_;
        m_currentCodepoint_ = m_nextCodepoint_;
        m_nextCodepoint_ ++;
        m_nextBlockIndex_ ++;
        if (!checkBlockDetail(currentValue)) {
            setResult(element, m_currentCodepoint_, m_nextCodepoint_,
                      currentValue);
            return true;
        }
        // synwee check that next block index == 0 here
        // enumerate BMP - the main loop enumerates data blocks
        while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) {
            m_nextIndex_ ++;
            // because of the way the character is split to form the index
            // the lead surrogate and trail surrogate can not be in the
            // mid of a block
            if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) {
                // skip lead surrogate code units,
                // go to lead surrogate codepoints
                m_nextIndex_ = BMP_INDEX_LENGTH_;
            }
            else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) {
                // go back to regular BMP code points
                m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_;
            }

            m_nextBlockIndex_ = 0;
            if (!checkBlock(currentBlock, currentValue)) {
                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
                          currentValue);
                return true;
            }
        }
        m_nextCodepoint_ --;   // step one back since this value has not been
        m_nextBlockIndex_ --;  // retrieved yet.
        return false;
    }

    /**
    * Finds the next supplementary element.
    * For each entry in the trie, the value to be delivered is passed through
    * extract().
    * We always store the next element before it is requested.
    * Called after calculateNextBMP() completes its round of BMP characters.
    * There is a slight difference in the usage of m_currentCodepoint_
    * here as compared to calculateNextBMP(). Though both represents the
    * lower bound of the next element, in calculateNextBMP() it gets set
    * at the start of any loop, where-else, in calculateNextSupplementary()
    * since m_currentCodepoint_ already contains the lower bound of the
    * next element (passed down from calculateNextBMP()), we keep it till
    * the end before resetting it to the new value.
    * Note, if there are no more iterations, it will never get to here.
    * Blocked out by next().
    * @param element return result object
    * @draft 2.1
    */
    private final void calculateNextSupplementaryElement(Element element)
    {
        int currentValue = m_nextValue_;
        int currentBlock = m_nextBlock_;
        m_nextCodepoint_ ++;
        m_nextBlockIndex_ ++;

        if (UTF16.getTrailSurrogate(m_nextCodepoint_)
                                        != UTF16.TRAIL_SURROGATE_MIN_VALUE) {
            // this piece is only called when we are in the middle of a lead
            // surrogate block
            if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) {
                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
                          currentValue);
                m_currentCodepoint_ = m_nextCodepoint_;
                return;
            }
            // we have cleared one block
            m_nextIndex_ ++;
            m_nextTrailIndexOffset_ ++;
            if (!checkTrailBlock(currentBlock, currentValue)) {
                setResult(element, m_currentCodepoint_, m_nextCodepoint_,
                          currentValue);
                m_currentCodepoint_ = m_nextCodepoint_;
                return;
            }
        }
        int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
        // enumerate supplementary code points
        while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) {
            // lead surrogate access
            int leadBlock =
                   m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
                                                   Trie.INDEX_STAGE_2_SHIFT_;
            if (leadBlock == m_trie_.m_dataOffset_) {
                // no entries for a whole block of lead surrogates
                if (currentValue != m_initialValue_) {
                    m_nextValue_      = m_initialValue_;
                    m_nextBlock_      = 0;
                    m_nextBlockIndex_ = 0;
                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
                              currentValue);
                    m_currentCodepoint_ = m_nextCodepoint_;
                    return;
                }

                nextLead += DATA_BLOCK_LENGTH_;
                // number of total affected supplementary codepoints in one
                // block
                // this is not a simple addition of
                // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
                // that we might have moved some of the codepoints
                m_nextCodepoint_ = UCharacterProperty.getRawSupplementary(
                                     (char)nextLead,
                                     (char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
                continue;
            }
            if (m_trie_.m_dataManipulate_ == null) {
                throw new NullPointerException(
                            "The field DataManipulate in this Trie is null");
            }
            // enumerate trail surrogates for this lead surrogate
            m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
                               m_trie_.getValue(leadBlock +
                                   (nextLead & Trie.INDEX_STAGE_3_MASK_)));
            if (m_nextIndex_ <= 0) {
                // no data for this lead surrogate
                if (currentValue != m_initialValue_) {
                    m_nextValue_      = m_initialValue_;
                    m_nextBlock_      = 0;
                    m_nextBlockIndex_ = 0;
                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
                              currentValue);
                    m_currentCodepoint_ = m_nextCodepoint_;
                    return;
                }
                m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_;
            } else {
                m_nextTrailIndexOffset_ = 0;
                if (!checkTrailBlock(currentBlock, currentValue)) {
                    setResult(element, m_currentCodepoint_, m_nextCodepoint_,
                              currentValue);
                    m_currentCodepoint_ = m_nextCodepoint_;
                    return;
                }
            }
            nextLead ++;
         }

         // deliver last range
         setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1,
                   currentValue);
    }

    /**
    * Internal block value calculations
    * Performs calculations on a data block to find codepoints in m_nextBlock_
    * after the index m_nextBlockIndex_ that has the same value.
    * Note m_*_ variables at this point is the next codepoint whose value
    * has not been calculated.
    * But when returned with false, it will be the last codepoint whose
    * value has been calculated.
    * @param currentValue the value which other codepoints are tested against
    * @return true if the whole block has the same value as currentValue or if
    *              the whole block has been calculated, false otherwise.
    */
    private final boolean checkBlockDetail(int currentValue)
    {
        while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) {
            m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ +
                                                    m_nextBlockIndex_));
            if (m_nextValue_ != currentValue) {
                return false;
            }
            ++ m_nextBlockIndex_;
            ++ m_nextCodepoint_;
        }
        return true;
    }

    /**
    * Internal block value calculations
    * Performs calculations on a data block to find codepoints in m_nextBlock_
    * that has the same value.
    * Will call checkBlockDetail() if highlevel check fails.
    * Note m_*_ variables at this point is the next codepoint whose value
    * has not been calculated.
    * @param currentBlock the initial block containing all currentValue
    * @param currentValue the value which other codepoints are tested against
    * @return true if the whole block has the same value as currentValue or if
    *              the whole block has been calculated, false otherwise.
    */
    private final boolean checkBlock(int currentBlock, int currentValue)
    {
        m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] <<
                                                  Trie.INDEX_STAGE_2_SHIFT_;
        if (m_nextBlock_ == currentBlock &&
            (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) {
            // the block is the same as the previous one, filled with
            // currentValue
            m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
        }
        else if (m_nextBlock_ == 0) {
            // this is the all-initial-value block
            if (currentValue != m_initialValue_) {
                m_nextValue_      = m_initialValue_;
                m_nextBlockIndex_ = 0;
                return false;
            }
            m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
        }
        else {
            if (!checkBlockDetail(currentValue)) {
                return false;
            }
        }
        return true;
    }

    /**
    * Internal block value calculations
    * Performs calculations on multiple data blocks for a set of trail
    * surrogates to find codepoints in m_nextBlock_ that has the same value.
    * Will call checkBlock() for internal block checks.
    * Note m_*_ variables at this point is the next codepoint whose value
    * has not been calculated.
    * @param currentBlock the initial block containing all currentValue
    * @param currentValue the value which other codepoints are tested against
    * @return true if the whole block has the same value as currentValue or if
    *              the whole block has been calculated, false otherwise.
    */
    private final boolean checkTrailBlock(int currentBlock,
                                          int currentValue)
    {
        // enumerate code points for this lead surrogate
        while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_)
        {
            // if we ever reach here, we are at the start of a new block
            m_nextBlockIndex_ = 0;
            // copy of most of the body of the BMP loop
            if (!checkBlock(currentBlock, currentValue)) {
                return false;
            }
            m_nextTrailIndexOffset_ ++;
            m_nextIndex_ ++;
        }
        return true;
    }

    /**
    * Checks if we are beginning at the start of a initial block.
    * If we are then the rest of the codepoints in this initial block
    * has the same values.
    * We increment m_nextCodepoint_ and relevant data members if so.
    * This is used only in for the supplementary codepoints because
    * the offset to the trail indexes could be 0.
    * @return true if we are at the start of a initial block.
    */
    private final boolean checkNullNextTrailIndex()
    {
        if (m_nextIndex_ <= 0) {
            m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
            int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
            int leadBlock =
                   m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
                                                   Trie.INDEX_STAGE_2_SHIFT_;
            if (m_trie_.m_dataManipulate_ == null) {
                throw new NullPointerException(
                            "The field DataManipulate in this Trie is null");
            }
            m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
                               m_trie_.getValue(leadBlock +
                                   (nextLead & Trie.INDEX_STAGE_3_MASK_)));
            m_nextIndex_ --;
            m_nextBlockIndex_ =  DATA_BLOCK_LENGTH_;
            return true;
        }
        return false;
    }

    // private data members --------------------------------------------

    /**
    * Size of the stage 1 BMP indexes
    */
    private static final int BMP_INDEX_LENGTH_ =
                                        0x10000 >> Trie.INDEX_STAGE_1_SHIFT_;
    /**
    * Lead surrogate minimum value
    */
    private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800;
    /**
    * Trail surrogate minimum value
    */
    private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;
    /**
    * Trail surrogate maximum value
    */
    private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF;
    /**
    * Number of trail surrogate
    */
    private static final int TRAIL_SURROGATE_COUNT_ = 0x400;
    /**
    * Number of stage 1 indexes for supplementary calculations that maps to
    * each lead surrogate character.
    * See second pass into getRawOffset for the trail surrogate character.
    * 10 for significant number of bits for trail surrogates, 5 for what we
    * discard during shifting.
    */
    private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ =
                                    1 << (10 - Trie.INDEX_STAGE_1_SHIFT_);
    /**
    * Number of data values in a stage 2 (data array) block.
    */
    private static final int DATA_BLOCK_LENGTH_ =
                                              1 << Trie.INDEX_STAGE_1_SHIFT_;
    /**
    * Number of codepoints in a stage 2 block
    */
    private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ =
                                                     DATA_BLOCK_LENGTH_ << 10;
    /**
    * Trie instance
    */
    private Trie m_trie_;
    /**
    * Initial value for trie values
    */
    private int m_initialValue_;
    /**
    * Next element results and data.
    */
    private int m_currentCodepoint_;
    private int m_nextCodepoint_;
    private int m_nextValue_;
    private int m_nextIndex_;
    private int m_nextBlock_;
    private int m_nextBlockIndex_;
    private int m_nextTrailIndexOffset_;
    /**
    * This is the return result element
    */
    private int m_start_;
    private int m_limit_;
    private int m_value_;
}