Commit a1ab1fc2 by Oliver Woodman

Detect WebVTT file header according to the spec:

1. An optional U+FEFF BYTE ORDER MARK (BOM) character.
2. The string "WEBVTT".
3. Optionally, either a U+0020 SPACE character or a U+0009 CHARACTER
   TABULATION (tab) character followed by any number of characters that
   are not U+000A LINE FEED (LF) or U+000D CARRIAGE RETURN (CR) characters.
4. Exactly one WebVTT line terminators to terminate the line with the file
   magic and separate it from the rest of the body.

Issue: #580
parent 712756c3
......@@ -44,6 +44,10 @@ public class WebvttParser implements SubtitleParser {
private static final long SAMPLING_RATE = 90;
private static final String WEBVTT_FILE_HEADER_STRING = "^\uFEFF?WEBVTT((\\u0020|\u0009).*)?$";
private static final Pattern WEBVTT_FILE_HEADER =
Pattern.compile(WEBVTT_FILE_HEADER_STRING);
private static final String WEBVTT_METADATA_HEADER_STRING = "\\S*[:=]\\S*";
private static final Pattern WEBVTT_METADATA_HEADER =
Pattern.compile(WEBVTT_METADATA_HEADER_STRING);
......@@ -116,7 +120,7 @@ public class WebvttParser implements SubtitleParser {
}
}
if (!line.equals("WEBVTT") && !line.equals("\uFEFFWEBVTT")) {
if (!WEBVTT_FILE_HEADER.matcher(line).matches()) {
throw new ParserException("Expected WEBVTT. Got " + line);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment