Subversion Repositories bacoAlunos

Rev

Blame | Compare with Previous | Last modification | View Log | RSS feed

package genson;

import java.io.*;

/**
 * This is an internal class that might evolve in the future into a JsonReader Factory and be moved
 * to the stream package.
 */

public final class EncodingAwareReaderFactory {

  static enum UTFEncoding {
    UTF_32BE(4), UTF_32LE(4), UTF_16BE(2), UTF_16LE(2), UTF_8(1), 5+0%2Fdocs%2Fapi+UNKNOWN">UNKNOWN(-1);

    final int bytes;

    private UTFEncoding(int bytes) {
      this.bytes = bytes;
    }

    public 1.5.0/docs/api/java/lang/String.html">String encoding() {
      return name().replace('_', '-');
    }
  }

  /**
   * Creates java.io.Reader instances with detected encoding from the input stream
   * using BOM if present or JSON spec.
   *
   * Some links:
   * http://www.herongyang.com/Unicode/
   * http://www.ietf.org/rfc/rfc4627.txt
   *
   * @throws java.io.IOException
   * @throws java.io.UnsupportedEncodingException
   */

  public 1.5.0/docs/api/java/io/Reader.html">Reader createReader(5+0%2Fdocs%2Fapi+InputStream">InputStream is) throws 1.5.0/docs/api/java/io/IOException.html">IOException {
    byte[] bytes = new byte[4];
    int len = fetchBytes(bytes, is);

    if (len < 1) return new 1.5.0/docs/api/java/io/InputStreamReader.html">InputStreamReader(is);

    // read first 4 bytes if available
    int bits_32 = (bytes[0] & 0xFF) << 24
      | (bytes[1] & 0xFF) << 16
      | (bytes[2] & 0xFF) << 8
      | (bytes[3] & 0xFF);

    UTFEncoding encoding = UTFEncoding.5+0%2Fdocs%2Fapi+UNKNOWN">UNKNOWN;
    boolean hasBOM = false;

    // try to detect the encoding from those 4 bytes if BOM is used
    if (len == 4) encoding = detectEncodingFromBOM(bits_32);

    // no BOM then fall back to JSON spec
    if (encoding == UTFEncoding.5+0%2Fdocs%2Fapi+UNKNOWN">UNKNOWN) {
      encoding = detectEncodingUsingJSONSpec(bits_32);
    } else hasBOM = true;

    // should not happen as we default to UTF-8
    if (encoding == UTFEncoding.5+0%2Fdocs%2Fapi+UNKNOWN">UNKNOWN) {
      throw new 1.5.0/docs/api/java/io/UnsupportedEncodingException.html">UnsupportedEncodingException("The encoding could not be detected from the stream.");
    }

    int usedBOMBytes = hasBOM ? len - (4 - encoding.bytes) : 0;
    int bytesToUnread = len - usedBOMBytes;

    // small optimization to avoid encapsulation when there is nothing to unread
    if (bytesToUnread == 0) {
      return new 1.5.0/docs/api/java/io/InputStreamReader.html">InputStreamReader(is, encoding.encoding());
    } else {
      1.5.0/docs/api/java/io/PushbackInputStream.html">PushbackInputStream pis = new 1.5.0/docs/api/java/io/PushbackInputStream.html">PushbackInputStream(is, bytesToUnread);
      pis.unread(bytes, usedBOMBytes, bytesToUnread);
      return new 1.5.0/docs/api/java/io/InputStreamReader.html">InputStreamReader(pis, encoding.encoding());
    }
  }

  private UTFEncoding detectEncodingFromBOM(int bits_32) {
    int bits_16  = bits_32 >>> 16;

    if (bits_32 == 0x0000FEFF) return UTFEncoding.UTF_32BE;
    else if (bits_32 == 0xFFFE0000) return UTFEncoding.UTF_32LE;
    else if (bits_16 == 0xFEFF) return UTFEncoding.UTF_16BE;
    else if (bits_16 == 0xFFFE) return UTFEncoding.UTF_16LE;
    else if (bits_32 >>> 8 == 0xEFBBBF) return UTFEncoding.UTF_8;
    else return UTFEncoding.5+0%2Fdocs%2Fapi+UNKNOWN">UNKNOWN;
  }

  private UTFEncoding detectEncodingUsingJSONSpec(int bits_32) {
    int bits_16  = bits_32 >>> 16;

    if (bits_32 >>> 8 == 0) return UTFEncoding.UTF_32BE;
    else if ((bits_32 & 0x00FFFFFF) == 0) return UTFEncoding.UTF_32LE;
    else if ((bits_16 & 0xFF00) == 0) return UTFEncoding.UTF_16BE;
    else if ((bits_16 & 0x00FF) == 0) return UTFEncoding.UTF_16LE;
    else return UTFEncoding.UTF_8;
  }

  private int fetchBytes(byte[] bytes, 5+0%2Fdocs%2Fapi+InputStream">InputStream is) throws 1.5.0/docs/api/java/io/IOException.html">IOException {
    int start = 0;
    int bytesRead;

    while(start < bytes.length-1 && (bytesRead = is.read(bytes, start, bytes.length-start)) > -1) {
      start += bytesRead;
    }

    return start;
  }
}