public class
ByteToCharUnicode

extends ByteToCharConverter
/*
 * Copyright (c) 1996, 1999, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package sun.io;
import java.io.*;


/**
 * Convert byte arrays containing Unicode characters into arrays of actual
 * Unicode characters.  This class may be used directly, in which case it
 * expects the input byte array to begin with a byte-order mark, or it may be
 * subclassed in order to preset the byte order and mark behavior.
 *
 * <p> Whether or not a mark is expected, if a mark that does not match the
 * established byte order is later discovered then a
 * <tt>MalformedInputException</tt> will be thrown by the <tt>convert</tt>
 * method.  If a correct mark is seen later in the input stream, it is passed
 * through as a character.
 *
 * @see ByteToCharUnicodeLittle
 * @see ByteToCharUnicodeLittleUnmarked
 * @see ByteToCharUnicodeBig
 * @see ByteToCharUnicodeBigUnmarked
 *
 * @author      Mark Reinhold
 */

public class ByteToCharUnicode extends ByteToCharConverter {

    static final char BYTE_ORDER_MARK = (char) 0xfeff;
    static final char REVERSED_MARK = (char) 0xfffe;

    static final int AUTO = 0;
    static final int BIG = 1;
    static final int LITTLE = 2;

    int originalByteOrder;      /* Byte order specified at creation */
    int byteOrder;              /* Byte order in use */
    boolean usesMark;           /* Look for a mark and interpret it */

    /**
     * Creates a Unicode byte-to-char converter that expects the first pair of
     * input bytes to be a byte-order mark, which will be interpreted and
     * discarded.  If the first pair of bytes is not such a mark then a
     * <tt>MalformedInputException</tt> will be thrown by the convert method.
     */
    public ByteToCharUnicode() {
        originalByteOrder = byteOrder = AUTO;
        usesMark = true;
    }

    /**
     * Creates a Unicode byte-to-char converter that uses the given byte order
     * and may or may not insist upon an initial byte-order mark.
     */
    protected ByteToCharUnicode(int bo, boolean m) {
        originalByteOrder = byteOrder = bo;
        usesMark = m;
    }

    public String getCharacterEncoding() {
        switch (originalByteOrder) {
        case BIG:
            return usesMark ? "UnicodeBig" : "UnicodeBigUnmarked";
        case LITTLE:
            return usesMark ? "UnicodeLittle" : "UnicodeLittleUnmarked";
        default:
            return "Unicode";
        }
    }

    boolean started = false;
    int leftOverByte;
    boolean leftOver = false;

    public int convert(byte[] in, int inOff, int inEnd,
                       char[] out, int outOff, int outEnd)
        throws ConversionBufferFullException, MalformedInputException
    {
        byteOff = inOff;
        charOff = outOff;

        if (inOff >= inEnd)
            return 0;

        int b1, b2;
        int bc = 0;
        int inI = inOff, outI = outOff;

        if (leftOver) {
            b1 = leftOverByte & 0xff;
            leftOver = false;
        }
        else {
            b1 = in[inI++] & 0xff;
        }
        bc = 1;

        if (usesMark && !started) {     /* Read initial byte-order mark */
            if (inI < inEnd) {
                b2 = in[inI++] & 0xff;
                bc = 2;

                char c = (char) ((b1 << 8) | b2);
                int bo = AUTO;

                if (c == BYTE_ORDER_MARK)
                    bo = BIG;
                else if (c == REVERSED_MARK)
                    bo = LITTLE;

                if (byteOrder == AUTO) {
                    if (bo == AUTO) {
                        badInputLength = bc;
                        throw new
                            MalformedInputException("Missing byte-order mark");
                    }
                    byteOrder = bo;
                    if (inI < inEnd) {
                        b1 = in[inI++] & 0xff;
                        bc = 1;
                    }
                }
                else if (bo == AUTO) {
                    inI--;
                    bc = 1;
                }
                else if (byteOrder == bo) {
                    if (inI < inEnd) {
                        b1 = in[inI++] & 0xff;
                        bc = 1;
                    }
                }
                else {
                    badInputLength = bc;
                    throw new
                        MalformedInputException("Incorrect byte-order mark");
                }

                started = true;
            }
        }

        /* Loop invariant: (b1 contains the next input byte) && (bc == 1) */
        while (inI < inEnd) {
            b2 = in[inI++] & 0xff;
            bc = 2;

            char c;
            if (byteOrder == BIG)
                c = (char) ((b1 << 8) | b2);
            else
                c = (char) ((b2 << 8) | b1);

            if (c == REVERSED_MARK)
                throw new
                    MalformedInputException("Reversed byte-order mark");

            if (outI >= outEnd)
                throw new ConversionBufferFullException();
            out[outI++] = c;
            byteOff = inI;
            charOff = outI;

            if (inI < inEnd) {
                b1 = in[inI++] & 0xff;
                bc = 1;
            }
        }

        if (bc == 1) {
            leftOverByte = b1;
            byteOff = inI;
            leftOver = true;
        }

        return outI - outOff;
    }

    public void reset() {
        leftOver = false;
        byteOff = charOff = 0;
        started = false;
        byteOrder = originalByteOrder;
    }

    public int flush(char buf[], int off, int len)
        throws MalformedInputException
    {
        if (leftOver) {
            reset();
            throw new MalformedInputException();
        }
        byteOff = charOff = 0;
        return 0;
    }

}
Classes

Exceptions

ByteToCharUnicode