Fixes after review

90e9807a · Lev · 5609efd0 · 90e9807a · 90e9807a · 90e9807a
Commit 90e9807a authored Nov 26, 2022 by Lev
Showing with 76 additions and 94 deletions
library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
library/extractor/src/main/java/com/google/android/exoplayer2/text/tx3g/Tx3gDecoder.java
--- a/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
+++ b/library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
@@ -27,6 +27,10 @@ import java.util.Arrays;
 */
 public final class ParsableByteArray {

+  // UTF-16 BOM
+  public static final char BOM_UTF16_BE = '\uFEFF';
+  public static final char BOM_UTF16_LE = '\uFFFE';
+
  private byte[] data;
  private int position;
  // TODO(internal b/147657250): Enforce this limit on all read methods.
@@ -153,6 +157,11 @@ public final class ParsableByteArray {
    this.position = position;
  }

+  /** Resets the current byte offset. */
+  public void resetPosition() {
+    this.position = 0;
+  }
+
  /**
   * Returns the underlying array.
   *
@@ -228,6 +237,11 @@ public final class ParsableByteArray {
    return (char) ((data[position] & 0xFF) << 8 | (data[position + 1] & 0xFF));
  }

+  /** Peeks at the next char. */
+  public char peekLittleEndianChar() {
+    return (char) ((data[position] & 0xFF) | (data[position + 1] & 0xFF) << 8 );
+  }
+
  /** Reads the next byte as an unsigned value. */
  public int readUnsignedByte() {
    return (data[position++] & 0xFF);
@@ -532,48 +546,67 @@ public final class ParsableByteArray {
  }

  /**
-   * Reads a line of text.
+   * Reads a line of text. Only UTF-8, UTF-16LE, UTF-16BE encoding supported.
   *
   * <p>A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
   * ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The UTF-16 charset
   * is used. This method discards leading UTF-16 byte order marks (BOM), if present.
   *
-   * @param isLittleEndian UTF-16 (LE) or UTF-16 (BE) encoding should be used
+   * @param charset used encoding.
   * @return The line not including any line-termination characters, or null if the end of the data
   * has already been reached.
+   * @throws IllegalArgumentException if charset not supported.
   */
  @Nullable
-  public String readLineUtf16(boolean isLittleEndian) {
+  public String readUtfLine(Charset charset) {
+    if(!charset.equals(Charsets.UTF_8)
+        && !charset.equals(Charsets.UTF_16BE)
+        && !charset.equals(Charsets.UTF_16LE)) {
+      throw new IllegalArgumentException("Only UTF-8, UTF-16LE, UTF-16BE encoding supported.");
+    }
+    if(charset.equals(Charsets.UTF_8)) {
+      return readLine();
+    }
+
    if (bytesLeft() == 0) {
      return null;
    }

+    boolean isLittleEndian = charset.equals(Charsets.UTF_16LE);
    int lineLimit = calculateLineLimitForUtf16(isLittleEndian);

-    if (lineLimit - position >= 2 && isUtf16BOM(data[position], data[position + 1])) {
+    if (lineLimit - position >= 2 && isUtf16BOM(peekChar())) {
      // There's a UTF-16 byte order mark at the start of the line. Discard it.
      position += 2;
    }

-    String line;
-    if (isLittleEndian) {
-      line = Util.fromUtf16LEBytes(data, position, lineLimit - position);
-    } else {
-      line = Util.fromUtf16BEBytes(data, position, lineLimit - position);
-    }
+    String line = readString(lineLimit - position, charset);

-    position = lineLimit;
    if (position == limit) {
      return line;
    }

-    if (isEqualsInUtf16(data[position], data[position + 1], '\r', isLittleEndian)) {
+    char currentChar;
+    if(isLittleEndian) {
+      currentChar = peekLittleEndianChar();
+    } else {
+      currentChar = peekChar();
+    }
+
+    if (currentChar == '\r') {
      position += 2;
      if (position == limit) {
        return line;
      }
    }
-    if (isEqualsInUtf16(data[position], data[position + 1], '\n', isLittleEndian)) {
+
+    if(isLittleEndian) {
+      currentChar = peekLittleEndianChar();
+    } else {
+      currentChar = peekChar();
+    }
+
+    if (currentChar == '\n') {
      position += 2;
    }
    return line;
@@ -614,14 +647,8 @@ public final class ParsableByteArray {
    return value;
  }

-  private boolean isEqualsInUtf16(byte first, byte second, char value, boolean isLittleEndian) {
-    return (isLittleEndian && (first | second << 8) == value)
-        || (!isLittleEndian && (first << 8 | second) == value);
-  }
-
-  private boolean isUtf16BOM(byte first, byte second) {
-    return (first == (byte) 0xFF && second == (byte) 0xFE)
-        || (first == (byte) 0xFE && second == (byte) 0xFF);
+  private boolean isUtf16BOM(char character) {
+    return character == BOM_UTF16_BE || character == BOM_UTF16_LE;
  }

  private int calculateLineLimitForUtf16(boolean isLittleEndian) {

--- a/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
+++ b/library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
@@ -683,30 +683,6 @@ public final class Util {
  }

  /**
-   * Returns a new {@link String} constructed by decoding UTF-16 (LE) encoded bytes in a subarray.
-   *
-   * @param bytes  The UTF-16 encoded bytes to decode.
-   * @param offset The index of the first byte to decode.
-   * @param length The number of bytes to decode.
-   * @return The string.
-   */
-  public static String fromUtf16LEBytes(byte[] bytes, int offset, int length) {
-    return new String(bytes, offset, length, Charsets.UTF_16LE);
-  }
-
-  /**
-   * Returns a new {@link String} constructed by decoding UTF-16 (BE) encoded bytes in a subarray.
-   *
-   * @param bytes  The UTF-16 encoded bytes to decode.
-   * @param offset The index of the first byte to decode.
-   * @param length The number of bytes to decode.
-   * @return The string.
-   */
-  public static String fromUtf16BEBytes(byte[] bytes, int offset, int length) {
-    return new String(bytes, offset, length, Charsets.UTF_16BE);
-  }
-
-  /**
   * Returns a new byte array containing the code points of a {@link String} encoded using UTF-8.
   *
   * @param value The {@link String} whose bytes should be obtained.

--- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
+++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
@@ -72,30 +72,14 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
  }

  @Override
-  protected Subtitle decode(byte[] bytes, int length, boolean reset) {
+  protected Subtitle decode(byte[] data, int length, boolean reset) {
    ArrayList<Cue> cues = new ArrayList<>();
    LongArray cueTimesUs = new LongArray();
-    ParsableByteArray subripData = new ParsableByteArray(bytes, length);
-
-    @Nullable Charset utf16Charset;
-    if (bytes.length >= 2) {
-      utf16Charset = getUtf16Charset(bytes[0], bytes[1]);
-    } else {
-      utf16Charset = null;
-    }
+    ParsableByteArray subripData = new ParsableByteArray(data, length);
+    Charset charset = detectUtfCharset(subripData);

    @Nullable String currentLine;
-    while (true) {
-      if (utf16Charset != null) {
-        currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
-      } else {
-        currentLine = subripData.readLine();
-      }
-
-      if (currentLine == null) {
-        break;
-      }
-
+    while ((currentLine = subripData.readUtfLine(charset)) != null) {
      if (currentLine.length() == 0) {
        // Skip blank lines.
        continue;
@@ -110,11 +94,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
      }

      // Read and parse the timing line.
-      if (utf16Charset != null) {
-        currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
-      } else {
-        currentLine = subripData.readLine();
-      }
+      currentLine = subripData.readUtfLine(charset);
      if (currentLine == null) {
        Log.w(TAG, "Unexpected end");
        break;
@@ -132,21 +112,13 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
      // Read and parse the text and tags.
      textBuilder.setLength(0);
      tags.clear();
-      if (utf16Charset != null) {
-        currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
-      } else {
-        currentLine = subripData.readLine();
-      }
+      currentLine = subripData.readUtfLine(charset);
      while (!TextUtils.isEmpty(currentLine)) {
        if (textBuilder.length() > 0) {
          textBuilder.append("<br>");
        }
        textBuilder.append(processLine(currentLine, tags));
-        if (utf16Charset != null) {
-          currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
-        } else {
-          currentLine = subripData.readLine();
-        }
+        currentLine = subripData.readUtfLine(charset);
      }

      Spanned text = Html.fromHtml(textBuilder.toString());
@@ -169,19 +141,29 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
    return new SubripSubtitle(cuesArray, cueTimesUsArray);
  }

-  @Nullable
-  private Charset getUtf16Charset(byte first, byte second) {
-    if (first == (byte) 0xFE && second == (byte) 0xFF) {
-      // UTF-16 (BE)
-      return Charsets.UTF_16BE;
+  /**
+   * Determine UTF encoding of the byte array. It can be UTF-16LE/UTF-16BE
+   * if the byte array contains BOM, or UTF-8 otherwise as the default behavior.
+   * After it resets the offset in ParsableByteArray
+   *
+   * @param data byte array to determinate UTF encoding.
+   * @return Determined encoding
+   */
+  private Charset detectUtfCharset(ParsableByteArray data) {
+    if(data.limit() < 2) {
+      return Charsets.UTF_8;
    }

-    if (first == (byte) 0xFF && second == (byte) 0xFE) {
-      // UTF-16 (LE)
-      return Charsets.UTF_16LE;
-    }
+    char twoBytes = data.peekChar();

-    return null;
+    switch (twoBytes) {
+      case ParsableByteArray.BOM_UTF16_BE:
+        return Charsets.UTF_16BE;
+      case ParsableByteArray.BOM_UTF16_LE:
+        return Charsets.UTF_16LE;
+      default:
+        return Charsets.UTF_8;
+    }
  }

  /**

--- a/library/extractor/src/main/java/com/google/android/exoplayer2/text/tx3g/Tx3gDecoder.java
+++ b/library/extractor/src/main/java/com/google/android/exoplayer2/text/tx3g/Tx3gDecoder.java
@@ -46,9 +46,6 @@ public final class Tx3gDecoder extends SimpleSubtitleDecoder {

  private static final String TAG = "Tx3gDecoder";

-  private static final char BOM_UTF16_BE = '\uFEFF';
-  private static final char BOM_UTF16_LE = '\uFFFE';
-
  private static final int TYPE_STYL = 0x7374796c;
  private static final int TYPE_TBOX = 0x74626f78;
  private static final String TX3G_SERIF = "Serif";
@@ -173,7 +170,7 @@ public final class Tx3gDecoder extends SimpleSubtitleDecoder {
    }
    if (parsableByteArray.bytesLeft() >= SIZE_BOM_UTF16) {
      char firstChar = parsableByteArray.peekChar();
-      if (firstChar == BOM_UTF16_BE || firstChar == BOM_UTF16_LE) {
+      if (firstChar == ParsableByteArray.BOM_UTF16_BE || firstChar == ParsableByteArray.BOM_UTF16_LE) {
        return parsableByteArray.readString(textLength, Charsets.UTF_16);
      }
    }