Add sniffing to WebvttExtractor

Preparation for sniffing in HLS ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=211455454

Add sniffing to WebvttExtractor
Preparation for sniffing in HLS ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=211455454
7959a691 · aquilescanta · Oliver Woodman · 70f8aeb1 · 7959a691 · 7959a691
Commit 7959a691 authored Sep 04, 2018 by aquilescanta Committed by Oliver Woodman Sep 05, 2018
Showing with 123 additions and 21 deletions
library/core/src/main/java/com/google/android/exoplayer2/text/SubtitleDecoderException.java
library/core/src/main/java/com/google/android/exoplayer2/text/webvtt/WebvttDecoder.java
library/core/src/main/java/com/google/android/exoplayer2/text/webvtt/WebvttParserUtil.java
library/core/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
library/core/src/test/assets/webvtt/with_bom
library/core/src/test/java/com/google/android/exoplayer2/text/webvtt/WebvttDecoderTest.java
library/hls/src/main/java/com/google/android/exoplayer2/source/hls/WebvttExtractor.java
library/hls/src/test/java/com/google/android/exoplayer2/source/hls/WebvttExtractorTest.java
--- a/library/core/src/main/java/com/google/android/exoplayer2/text/SubtitleDecoderException.java
+++ b/library/core/src/main/java/com/google/android/exoplayer2/text/SubtitleDecoderException.java
@@ -27,6 +27,11 @@ public class SubtitleDecoderException extends Exception {
    super(message);
  }

+  /** @param cause The cause of this exception. */
+  public SubtitleDecoderException(Exception cause) {
+    super(cause);
+  }
+
  /**
   * @param message The detail message for this exception.
   * @param cause The cause of this exception.

--- a/library/core/src/main/java/com/google/android/exoplayer2/text/webvtt/WebvttDecoder.java
+++ b/library/core/src/main/java/com/google/android/exoplayer2/text/webvtt/WebvttDecoder.java
@@ -16,6 +16,7 @@
 package com.google.android.exoplayer2.text.webvtt;

 import android.text.TextUtils;
+import com.google.android.exoplayer2.ParserException;
 import com.google.android.exoplayer2.text.SimpleSubtitleDecoder;
 import com.google.android.exoplayer2.text.SubtitleDecoderException;
 import com.google.android.exoplayer2.util.ParsableByteArray;
@@ -62,7 +63,11 @@ public final class WebvttDecoder extends SimpleSubtitleDecoder {
    definedStyles.clear();

    // Validate the first line of the header, and skip the remainder.
-    WebvttParserUtil.validateWebvttHeaderLine(parsableWebvttData);
+    try {
+      WebvttParserUtil.validateWebvttHeaderLine(parsableWebvttData);
+    } catch (ParserException e) {
+      throw new SubtitleDecoderException(e);
+    }
    while (!TextUtils.isEmpty(parsableWebvttData.readLine())) {}

    int event;

--- a/library/core/src/main/java/com/google/android/exoplayer2/text/webvtt/WebvttParserUtil.java
+++ b/library/core/src/main/java/com/google/android/exoplayer2/text/webvtt/WebvttParserUtil.java
@@ -15,7 +15,7 @@
 */
 package com.google.android.exoplayer2.text.webvtt;

-import com.google.android.exoplayer2.text.SubtitleDecoderException;
+import com.google.android.exoplayer2.ParserException;
 import com.google.android.exoplayer2.util.ParsableByteArray;
 import com.google.android.exoplayer2.util.Util;
 import java.util.regex.Matcher;
@@ -27,7 +27,9 @@ import java.util.regex.Pattern;
 public final class WebvttParserUtil {

  private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
-  private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$");
+  private static final String WEBVTT_HEADER = "WEBVTT";
+  private static final int WEBVTT_BOM_BE = 0xfeff;
+  private static final int WEBVTT_BOM_LE = 0xfffe;

  private WebvttParserUtil() {}

@@ -35,14 +37,33 @@ public final class WebvttParserUtil {
   * Reads and validates the first line of a WebVTT file.
   *
   * @param input The input from which the line should be read.
-   * @throws SubtitleDecoderException If the line isn't the start of a valid WebVTT file.
+   * @throws ParserException If the line isn't the start of a valid WebVTT file.
   */
-  public static void validateWebvttHeaderLine(ParsableByteArray input)
-      throws SubtitleDecoderException {
-    String line = input.readLine();
-    if (line == null || !HEADER.matcher(line).matches()) {
-      throw new SubtitleDecoderException("Expected WEBVTT. Got " + line);
+  public static void validateWebvttHeaderLine(ParsableByteArray input) throws ParserException {
+    int startPosition = input.getPosition();
+    if (!isWebvttHeaderLine(input)) {
+      input.setPosition(startPosition);
+      throw new ParserException("Expected WEBVTT. Got " + input.readLine());
+    }
+  }
+
+  /**
+   * Returns whether the given input is the first line of a WebVTT file.
+   *
+   * @param input The input from which the line should be read.
+   */
+  public static boolean isWebvttHeaderLine(ParsableByteArray input) {
+    if (input.bytesLeft() < 2) {
+      return false;
    }
+    int startPosition = input.getPosition();
+    int firstTwoBytes = input.readUnsignedShort();
+    if (firstTwoBytes != WEBVTT_BOM_BE && firstTwoBytes != WEBVTT_BOM_LE) {
+      // Not the BOM, should not be discarded.
+      input.setPosition(startPosition);
+    }
+    String line = input.readLine();
+    return line != null && line.startsWith(WEBVTT_HEADER);
  }

  /**

--- a/library/core/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
+++ b/library/core/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
@@ -499,10 +499,10 @@ public final class ParsableByteArray {

  /**
   * Reads a line of text.
-   * <p>
-   * A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
+   *
+   * <p>A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
   * ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The system's default
-   * charset (UTF-8) is used.
+   * charset (UTF-8) is used. This method discards leading UTF-8 byte order marks, if present.
   *
   * @return The line not including any line-termination characters, or null if the end of the data
   *     has already been reached.
@@ -517,7 +517,7 @@ public final class ParsableByteArray {
    }
    if (lineLimit - position >= 3 && data[position] == (byte) 0xEF
        && data[position + 1] == (byte) 0xBB && data[position + 2] == (byte) 0xBF) {
-      // There's a byte order mark at the start of the line. Discard it.
+      // There's a UTF-8 byte order mark at the start of the line. Discard it.
      position += 3;
    }
    String line = Util.fromUtf8Bytes(data, position, lineLimit - position);

--- a/library/core/src/test/assets/webvtt/with_bom
+++ b/library/core/src/test/assets/webvtt/with_bom
--- a/library/core/src/test/java/com/google/android/exoplayer2/text/webvtt/WebvttDecoderTest.java
+++ b/library/core/src/test/java/com/google/android/exoplayer2/text/webvtt/WebvttDecoderTest.java
--- a/library/hls/src/main/java/com/google/android/exoplayer2/source/hls/WebvttExtractor.java
+++ b/library/hls/src/main/java/com/google/android/exoplayer2/source/hls/WebvttExtractor.java
@@ -25,7 +25,6 @@ import com.google.android.exoplayer2.extractor.ExtractorOutput;
 import com.google.android.exoplayer2.extractor.PositionHolder;
 import com.google.android.exoplayer2.extractor.SeekMap;
 import com.google.android.exoplayer2.extractor.TrackOutput;
-import com.google.android.exoplayer2.text.SubtitleDecoderException;
 import com.google.android.exoplayer2.text.webvtt.WebvttParserUtil;
 import com.google.android.exoplayer2.util.MimeTypes;
 import com.google.android.exoplayer2.util.ParsableByteArray;
@@ -47,6 +46,7 @@ public final class WebvttExtractor implements Extractor {

  private static final Pattern LOCAL_TIMESTAMP = Pattern.compile("LOCAL:([^,]+)");
  private static final Pattern MEDIA_TIMESTAMP = Pattern.compile("MPEGTS:(\\d+)");
+  private static final int HEADER_MAX_LENGTH = 2 /* optional Byte Order Mark */ + 6 /* "WEBVTT" */;

  private final String language;
  private final TimestampAdjuster timestampAdjuster;
@@ -68,8 +68,10 @@ public final class WebvttExtractor implements Extractor {

  @Override
  public boolean sniff(ExtractorInput input) throws IOException, InterruptedException {
-    // This extractor is only used for the HLS use case, which should not call this method.
-    throw new IllegalStateException();
+    input.peekFully(
+        sampleData, /* offset= */ 0, /* length= */ HEADER_MAX_LENGTH, /* allowEndOfInput= */ false);
+    sampleDataWrapper.reset(sampleData, HEADER_MAX_LENGTH);
+    return WebvttParserUtil.isWebvttHeaderLine(sampleDataWrapper);
  }

  @Override
@@ -118,11 +120,7 @@ public final class WebvttExtractor implements Extractor {
    ParsableByteArray webvttData = new ParsableByteArray(sampleData);

    // Validate the first line of the header.
-    try {
-      WebvttParserUtil.validateWebvttHeaderLine(webvttData);
-    } catch (SubtitleDecoderException e) {
-      throw new ParserException(e);
-    }
+    WebvttParserUtil.validateWebvttHeaderLine(webvttData);

    // Defaults to use if the header doesn't contain an X-TIMESTAMP-MAP header.
    long vttTimestampUs = 0;

--- a/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/WebvttExtractorTest.java
+++ b/library/hls/src/test/java/com/google/android/exoplayer2/source/hls/WebvttExtractorTest.java
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.android.exoplayer2.source.hls;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.android.exoplayer2.extractor.ExtractorInput;
+import com.google.android.exoplayer2.testutil.FakeExtractorInput;
+import com.google.android.exoplayer2.util.TimestampAdjuster;
+import java.io.IOException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.robolectric.RobolectricTestRunner;
+
+/** Tests for {@link WebvttExtractor}. */
+@RunWith(RobolectricTestRunner.class)
+public class WebvttExtractorTest {
+
+  @Test
+  public void sniff_sniffsWebvttHeaderWithTrailingSpace() throws IOException, InterruptedException {
+    byte[] data = new byte[] {'W', 'E', 'B', 'V', 'T', 'T', ' ', '\t'};
+    assertThat(sniffData(data)).isTrue();
+  }
+
+  @Test
+  public void sniff_discardsByteOrderMark() throws IOException, InterruptedException {
+    byte[] data = new byte[] {(byte) 0xFE, (byte) 0xFF, 'W', 'E', 'B', 'V', 'T', 'T', '\n', ' '};
+    assertThat(sniffData(data)).isTrue();
+  }
+
+  @Test
+  public void sniff_failsForIncorrectBom() throws IOException, InterruptedException {
+    byte[] data = new byte[] {(byte) 0xFE, (byte) 0xFE, 'W', 'E', 'B', 'V', 'T', 'T', '\n'};
+    assertThat(sniffData(data)).isFalse();
+  }
+
+  @Test
+  public void sniff_failsForIncompleteHeader() throws IOException, InterruptedException {
+    byte[] data = new byte[] {(byte) 0xFE, (byte) 0xFE, 'W', 'E', 'B', 'V', 'T', '\n'};
+    assertThat(sniffData(data)).isFalse();
+  }
+
+  @Test
+  public void sniff_failsForIncorrectHeader() throws IOException, InterruptedException {
+    byte[] data = new byte[] {(byte) 0xFE, (byte) 0xFE, 'W', 'e', 'B', 'V', 'T', 'T', '\n'};
+    assertThat(sniffData(data)).isFalse();
+  }
+
+  private static boolean sniffData(byte[] data) throws IOException, InterruptedException {
+    ExtractorInput input = new FakeExtractorInput.Builder().setData(data).build();
+    return new WebvttExtractor(/* language= */ null, new TimestampAdjuster(0)).sniff(input);
+  }
+}