Commit 65199698 by aquilescanta Committed by Oliver Woodman

Refactored the Webvtt parsing classes

Moved the behaviors related to Cue's to the WebvttCueParser class.
This way, the parsing methods will be more easily accessible to
other classes, such as the MP4Webvtt parser. This class also has
some methods that require state to avoid repetitive avoidable
allocations. The method visibility is subject to changes in
further CLs.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=111616824
parent 0aa4d3d2
...@@ -27,7 +27,7 @@ import android.text.style.UnderlineSpan; ...@@ -27,7 +27,7 @@ import android.text.style.UnderlineSpan;
public final class WebvttCueParserTest extends InstrumentationTestCase { public final class WebvttCueParserTest extends InstrumentationTestCase {
public void testParseStrictValidClassesAndTrailingTokens() throws Exception { public void testParseStrictValidClassesAndTrailingTokens() throws Exception {
Spanned text = WebvttCueParser.parse("<v.first.loud Esme>" Spanned text = WebvttCueParser.parseCueText("<v.first.loud Esme>"
+ "This <u.style1.style2 some stuff>is</u> text with <b.foo><i.bar>html</i></b> tags"); + "This <u.style1.style2 some stuff>is</u> text with <b.foo><i.bar>html</i></b> tags");
assertEquals("This is text with html tags", text.toString()); assertEquals("This is text with html tags", text.toString());
...@@ -48,7 +48,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -48,7 +48,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseStrictValidUnsupportedTagsStrippedOut() throws Exception { public void testParseStrictValidUnsupportedTagsStrippedOut() throws Exception {
Spanned text = WebvttCueParser.parse( Spanned text = WebvttCueParser.parseCueText(
"<v.first.loud Esme>This <unsupported>is</unsupported> text with " "<v.first.loud Esme>This <unsupported>is</unsupported> text with "
+ "<notsupp><invalid>html</invalid></notsupp> tags"); + "<notsupp><invalid>html</invalid></notsupp> tags");
...@@ -58,7 +58,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -58,7 +58,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseWellFormedUnclosedEndAtCueEnd() throws Exception { public void testParseWellFormedUnclosedEndAtCueEnd() throws Exception {
Spanned text = WebvttCueParser.parse( Spanned text = WebvttCueParser.parseCueText(
"An <u some trailing stuff>unclosed u tag with <i>italic</i> inside"); "An <u some trailing stuff>unclosed u tag with <i>italic</i> inside");
assertEquals("An unclosed u tag with italic inside", text.toString()); assertEquals("An unclosed u tag with italic inside", text.toString());
...@@ -76,7 +76,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -76,7 +76,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseWellFormedUnclosedEndAtParent() throws Exception { public void testParseWellFormedUnclosedEndAtParent() throws Exception {
Spanned text = WebvttCueParser.parse( Spanned text = WebvttCueParser.parseCueText(
"An unclosed u tag with <i><u>underline and italic</i> inside"); "An unclosed u tag with <i><u>underline and italic</i> inside");
assertEquals("An unclosed u tag with underline and italic inside", text.toString()); assertEquals("An unclosed u tag with underline and italic inside", text.toString());
...@@ -95,7 +95,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -95,7 +95,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseMalformedNestedElements() throws Exception { public void testParseMalformedNestedElements() throws Exception {
Spanned text = WebvttCueParser.parse( Spanned text = WebvttCueParser.parseCueText(
"<b><u>An unclosed u tag with <i>italic</u> inside</i></b>"); "<b><u>An unclosed u tag with <i>italic</u> inside</i></b>");
assertEquals("An unclosed u tag with italic inside", text.toString()); assertEquals("An unclosed u tag with italic inside", text.toString());
...@@ -121,7 +121,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -121,7 +121,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseCloseNonExistingTag() throws Exception { public void testParseCloseNonExistingTag() throws Exception {
Spanned text = WebvttCueParser.parse("blah<b>blah</i>blah</b>blah"); Spanned text = WebvttCueParser.parseCueText("blah<b>blah</i>blah</b>blah");
assertEquals("blahblahblahblah", text.toString()); assertEquals("blahblahblahblah", text.toString());
StyleSpan[] spans = getSpans(text, StyleSpan.class); StyleSpan[] spans = getSpans(text, StyleSpan.class);
...@@ -132,42 +132,42 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -132,42 +132,42 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseEmptyTagName() throws Exception { public void testParseEmptyTagName() throws Exception {
Spanned text = WebvttCueParser.parse("An unclosed u tag with <>italic inside"); Spanned text = WebvttCueParser.parseCueText("An unclosed u tag with <>italic inside");
assertEquals("An unclosed u tag with italic inside", text.toString()); assertEquals("An unclosed u tag with italic inside", text.toString());
} }
public void testParseEntities() throws Exception { public void testParseEntities() throws Exception {
Spanned text = WebvttCueParser.parse("&amp; &gt; &lt; &nbsp;"); Spanned text = WebvttCueParser.parseCueText("&amp; &gt; &lt; &nbsp;");
assertEquals("& > < ", text.toString()); assertEquals("& > < ", text.toString());
} }
public void testParseEntitiesUnsupported() throws Exception { public void testParseEntitiesUnsupported() throws Exception {
Spanned text = WebvttCueParser.parse("&noway; &sure;"); Spanned text = WebvttCueParser.parseCueText("&noway; &sure;");
assertEquals(" ", text.toString()); assertEquals(" ", text.toString());
} }
public void testParseEntitiesNotTerminated() throws Exception { public void testParseEntitiesNotTerminated() throws Exception {
Spanned text = WebvttCueParser.parse("&amp here comes text"); Spanned text = WebvttCueParser.parseCueText("&amp here comes text");
assertEquals("& here comes text", text.toString()); assertEquals("& here comes text", text.toString());
} }
public void testParseEntitiesNotTerminatedUnsupported() throws Exception { public void testParseEntitiesNotTerminatedUnsupported() throws Exception {
Spanned text = WebvttCueParser.parse("&surenot here comes text"); Spanned text = WebvttCueParser.parseCueText("&surenot here comes text");
assertEquals(" here comes text", text.toString()); assertEquals(" here comes text", text.toString());
} }
public void testParseEntitiesNotTerminatedNoSpace() throws Exception { public void testParseEntitiesNotTerminatedNoSpace() throws Exception {
Spanned text = WebvttCueParser.parse("&surenot"); Spanned text = WebvttCueParser.parseCueText("&surenot");
assertEquals("&surenot", text.toString()); assertEquals("&surenot", text.toString());
} }
public void testParseVoidTag() throws Exception { public void testParseVoidTag() throws Exception {
Spanned text = WebvttCueParser.parse("here comes<br/> text<br/>"); Spanned text = WebvttCueParser.parseCueText("here comes<br/> text<br/>");
assertEquals("here comes text", text.toString()); assertEquals("here comes text", text.toString());
} }
public void testParseMultipleTagsOfSameKind() { public void testParseMultipleTagsOfSameKind() {
Spanned text = WebvttCueParser.parse("blah <b>blah</b> blah <b>foo</b>"); Spanned text = WebvttCueParser.parseCueText("blah <b>blah</b> blah <b>foo</b>");
assertEquals("blah blah blah foo", text.toString()); assertEquals("blah blah blah foo", text.toString());
StyleSpan[] spans = getSpans(text, StyleSpan.class); StyleSpan[] spans = getSpans(text, StyleSpan.class);
...@@ -181,7 +181,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -181,7 +181,7 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseInvalidVoidSlash() { public void testParseInvalidVoidSlash() {
Spanned text = WebvttCueParser.parse("blah <b/.st1.st2 trailing stuff> blah"); Spanned text = WebvttCueParser.parseCueText("blah <b/.st1.st2 trailing stuff> blah");
assertEquals("blah blah", text.toString()); assertEquals("blah blah", text.toString());
StyleSpan[] spans = getSpans(text, StyleSpan.class); StyleSpan[] spans = getSpans(text, StyleSpan.class);
...@@ -189,37 +189,37 @@ public final class WebvttCueParserTest extends InstrumentationTestCase { ...@@ -189,37 +189,37 @@ public final class WebvttCueParserTest extends InstrumentationTestCase {
} }
public void testParseMonkey() throws Exception { public void testParseMonkey() throws Exception {
Spanned text = WebvttCueParser.parse( Spanned text = WebvttCueParser.parseCueText(
"< u>An unclosed u tag with <<<<< i>italic</u></u></u></u ></i><u><u> inside"); "< u>An unclosed u tag with <<<<< i>italic</u></u></u></u ></i><u><u> inside");
assertEquals("An unclosed u tag with italic inside", text.toString()); assertEquals("An unclosed u tag with italic inside", text.toString());
text = WebvttCueParser.parse(">>>>>>>>>An unclosed u tag with <<<<< italic</u></u></u></u >" text = WebvttCueParser.parseCueText(">>>>>>>>>An unclosed u tag with <<<<< italic</u></u></u>"
+ "</i><u><u> inside"); + "</u ></i><u><u> inside");
assertEquals(">>>>>>>>>An unclosed u tag with inside", text.toString()); assertEquals(">>>>>>>>>An unclosed u tag with inside", text.toString());
} }
public void testParseCornerCases() throws Exception { public void testParseCornerCases() throws Exception {
Spanned text = WebvttCueParser.parse(">"); Spanned text = WebvttCueParser.parseCueText(">");
assertEquals(">", text.toString()); assertEquals(">", text.toString());
text = WebvttCueParser.parse("<"); text = WebvttCueParser.parseCueText("<");
assertEquals("", text.toString()); assertEquals("", text.toString());
text = WebvttCueParser.parse("<b.st1.st2 annotation"); text = WebvttCueParser.parseCueText("<b.st1.st2 annotation");
assertEquals("", text.toString()); assertEquals("", text.toString());
text = WebvttCueParser.parse("<<<<<<<<<<<<<<<<"); text = WebvttCueParser.parseCueText("<<<<<<<<<<<<<<<<");
assertEquals("", text.toString()); assertEquals("", text.toString());
text = WebvttCueParser.parse("<<<<<<>><<<<<<<<<<"); text = WebvttCueParser.parseCueText("<<<<<<>><<<<<<<<<<");
assertEquals(">", text.toString()); assertEquals(">", text.toString());
text = WebvttCueParser.parse("<>"); text = WebvttCueParser.parseCueText("<>");
assertEquals("", text.toString()); assertEquals("", text.toString());
text = WebvttCueParser.parse("&"); text = WebvttCueParser.parseCueText("&");
assertEquals("&", text.toString()); assertEquals("&", text.toString());
text = WebvttCueParser.parse("&&&&&&&"); text = WebvttCueParser.parseCueText("&&&&&&&");
assertEquals("&&&&&&&", text.toString()); assertEquals("&&&&&&&", text.toString());
} }
......
...@@ -25,6 +25,7 @@ import com.google.android.exoplayer.extractor.PositionHolder; ...@@ -25,6 +25,7 @@ import com.google.android.exoplayer.extractor.PositionHolder;
import com.google.android.exoplayer.extractor.SeekMap; import com.google.android.exoplayer.extractor.SeekMap;
import com.google.android.exoplayer.extractor.TrackOutput; import com.google.android.exoplayer.extractor.TrackOutput;
import com.google.android.exoplayer.extractor.ts.PtsTimestampAdjuster; import com.google.android.exoplayer.extractor.ts.PtsTimestampAdjuster;
import com.google.android.exoplayer.text.webvtt.WebvttCueParser;
import com.google.android.exoplayer.text.webvtt.WebvttParserUtil; import com.google.android.exoplayer.text.webvtt.WebvttParserUtil;
import com.google.android.exoplayer.util.MimeTypes; import com.google.android.exoplayer.util.MimeTypes;
import com.google.android.exoplayer.util.ParsableByteArray; import com.google.android.exoplayer.util.ParsableByteArray;
...@@ -137,7 +138,7 @@ import java.util.regex.Pattern; ...@@ -137,7 +138,7 @@ import java.util.regex.Pattern;
} }
// Find the first cue header and parse the start time. // Find the first cue header and parse the start time.
Matcher cueHeaderMatcher = WebvttParserUtil.findNextCueHeader(webvttData); Matcher cueHeaderMatcher = WebvttCueParser.findNextCueHeader(webvttData);
if (cueHeaderMatcher == null) { if (cueHeaderMatcher == null) {
// No cues found. Don't output a sample, but still output a corresponding track. // No cues found. Don't output a sample, but still output a corresponding track.
buildTrackOutput(0); buildTrackOutput(0);
......
...@@ -15,7 +15,11 @@ ...@@ -15,7 +15,11 @@
*/ */
package com.google.android.exoplayer.text.webvtt; package com.google.android.exoplayer.text.webvtt;
import com.google.android.exoplayer.text.Cue;
import com.google.android.exoplayer.util.ParsableByteArray;
import android.graphics.Typeface; import android.graphics.Typeface;
import android.text.Layout.Alignment;
import android.text.SpannableStringBuilder; import android.text.SpannableStringBuilder;
import android.text.Spanned; import android.text.Spanned;
import android.text.style.StyleSpan; import android.text.style.StyleSpan;
...@@ -23,11 +27,19 @@ import android.text.style.UnderlineSpan; ...@@ -23,11 +27,19 @@ import android.text.style.UnderlineSpan;
import android.util.Log; import android.util.Log;
import java.util.Stack; import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** /**
* Parser for webvtt cue text. (https://w3c.github.io/webvtt/#cue-text) * Parser for webvtt cue text. (https://w3c.github.io/webvtt/#cue-text)
*/ */
/* package */ final class WebvttCueParser { public final class WebvttCueParser {
public static final Pattern CUE_HEADER_PATTERN = Pattern
.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
private static final Pattern CUE_SETTING_PATTERN = Pattern.compile("(\\S+?):(\\S+)");
private static final char CHAR_LESS_THAN = '<'; private static final char CHAR_LESS_THAN = '<';
private static final char CHAR_GREATER_THAN = '>'; private static final char CHAR_GREATER_THAN = '>';
...@@ -54,9 +66,102 @@ import java.util.Stack; ...@@ -54,9 +66,102 @@ import java.util.Stack;
private static final String TAG = "WebvttCueParser"; private static final String TAG = "WebvttCueParser";
private WebvttCueParser() {} private StringBuilder textBuilder;
private PositionHolder positionHolder;
public WebvttCueParser() {
positionHolder = new PositionHolder();
textBuilder = new StringBuilder();
}
/**
* Parses the next valid Webvtt cue in a parsable array, including timestamps, settings and text.
*
* @param webvttData parsable Webvtt file data.
* @return a {@link WebvttCue} instance if cue content is found. {@code null} otherwise.
*/
public WebvttCue parseNextValidCue(ParsableByteArray webvttData) {
Matcher cueHeaderMatcher;
while ((cueHeaderMatcher = findNextCueHeader(webvttData)) != null) {
WebvttCue currentCue = parseCue(cueHeaderMatcher, webvttData);
if (currentCue != null) {
return currentCue;
}
}
return null;
}
private WebvttCue parseCue(Matcher cueHeaderMatcher, ParsableByteArray webvttData) {
long cueStartTime;
long cueEndTime;
try {
// Parse the cue start and end times.
cueStartTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(1));
cueEndTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(2));
} catch (NumberFormatException e) {
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
return null;
}
// Default cue settings.
Alignment cueTextAlignment = null;
float cueLine = Cue.DIMEN_UNSET;
int cueLineType = Cue.TYPE_UNSET;
int cueLineAnchor = Cue.TYPE_UNSET;
float cuePosition = Cue.DIMEN_UNSET;
int cuePositionAnchor = Cue.TYPE_UNSET;
float cueWidth = Cue.DIMEN_UNSET;
// Parse the cue settings list.
Matcher cueSettingMatcher = CUE_SETTING_PATTERN.matcher(cueHeaderMatcher.group(3));
while (cueSettingMatcher.find()) {
String name = cueSettingMatcher.group(1);
String value = cueSettingMatcher.group(2);
try {
if ("line".equals(name)) {
parseLineAttribute(value, positionHolder);
cueLine = positionHolder.position;
cueLineType = positionHolder.lineType;
cueLineAnchor = positionHolder.positionAnchor;
} else if ("align".equals(name)) {
cueTextAlignment = parseTextAlignment(value);
} else if ("position".equals(name)) {
parsePositionAttribute(value, positionHolder);
cuePosition = positionHolder.position;
cuePositionAnchor = positionHolder.positionAnchor;
} else if ("size".equals(name)) {
cueWidth = WebvttParserUtil.parsePercentage(value);
} else {
Log.w(TAG, "Unknown cue setting " + name + ":" + value);
}
} catch (NumberFormatException e) {
Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group());
}
}
if (cuePosition != Cue.DIMEN_UNSET && cuePositionAnchor == Cue.TYPE_UNSET) {
// Computed position alignment should be derived from the text alignment if it has not been
// set explicitly.
cuePositionAnchor = alignmentToAnchor(cueTextAlignment);
}
public static Spanned parse(String markup) { // Parse the cue text.
textBuilder.setLength(0);
String line;
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
if (textBuilder.length() > 0) {
textBuilder.append("\n");
}
textBuilder.append(line.trim());
}
CharSequence cueText = parseCueText(textBuilder.toString());
return new WebvttCue(cueStartTime, cueEndTime, cueText, cueTextAlignment, cueLine,
cueLineType, cueLineAnchor, cuePosition, cuePositionAnchor, cueWidth);
}
/* package */ static Spanned parseCueText(String markup) {
SpannableStringBuilder spannedText = new SpannableStringBuilder(); SpannableStringBuilder spannedText = new SpannableStringBuilder();
Stack<StartTag> startTagStack = new Stack<>(); Stack<StartTag> startTagStack = new Stack<>();
String[] tagTokens; String[] tagTokens;
...@@ -122,6 +227,126 @@ import java.util.Stack; ...@@ -122,6 +227,126 @@ import java.util.Stack;
} }
/** /**
* Reads lines up to and including the next WebVTT cue header.
*
* @param input The input from which lines should be read.
* @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was
* reached without a cue header being found. In the case that a cue header is found, groups 1,
* 2 and 3 of the returned matcher contain the start time, end time and settings list.
*/
public static Matcher findNextCueHeader(ParsableByteArray input) {
String line;
while ((line = input.readLine()) != null) {
if (COMMENT.matcher(line).matches()) {
// Skip until the end of the comment block.
while ((line = input.readLine()) != null && !line.isEmpty()) {}
} else {
Matcher cueHeaderMatcher = WebvttCueParser.CUE_HEADER_PATTERN.matcher(line);
if (cueHeaderMatcher.matches()) {
return cueHeaderMatcher;
}
}
}
return null;
}
private static final class PositionHolder {
public float position;
public int positionAnchor;
public int lineType;
}
// Internal methods
private static void parseLineAttribute(String s, PositionHolder out)
throws NumberFormatException {
int lineAnchor;
int commaPosition = s.indexOf(',');
if (commaPosition != -1) {
lineAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
s = s.substring(0, commaPosition);
} else {
lineAnchor = Cue.TYPE_UNSET;
}
float line;
int lineType;
if (s.endsWith("%")) {
line = WebvttParserUtil.parsePercentage(s);
lineType = Cue.LINE_TYPE_FRACTION;
} else {
line = Integer.parseInt(s);
lineType = Cue.LINE_TYPE_NUMBER;
}
out.position = line;
out.positionAnchor = lineAnchor;
out.lineType = lineType;
}
private static void parsePositionAttribute(String s, PositionHolder out)
throws NumberFormatException {
int positionAnchor;
int commaPosition = s.indexOf(',');
if (commaPosition != -1) {
positionAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
s = s.substring(0, commaPosition);
} else {
positionAnchor = Cue.TYPE_UNSET;
}
out.position = WebvttParserUtil.parsePercentage(s);
out.positionAnchor = positionAnchor;
out.lineType = Cue.TYPE_UNSET;
}
private static int parsePositionAnchor(String s) {
switch (s) {
case "start":
return Cue.ANCHOR_TYPE_START;
case "middle":
return Cue.ANCHOR_TYPE_MIDDLE;
case "end":
return Cue.ANCHOR_TYPE_END;
default:
Log.w(TAG, "Invalid anchor value: " + s);
return Cue.TYPE_UNSET;
}
}
private static Alignment parseTextAlignment(String s) {
switch (s) {
case "start":
case "left":
return Alignment.ALIGN_NORMAL;
case "middle":
return Alignment.ALIGN_CENTER;
case "end":
case "right":
return Alignment.ALIGN_OPPOSITE;
default:
Log.w(TAG, "Invalid alignment value: " + s);
return null;
}
}
private static int alignmentToAnchor(Alignment alignment) {
if (alignment == null) {
return Cue.TYPE_UNSET;
}
switch (alignment) {
case ALIGN_NORMAL:
return Cue.ANCHOR_TYPE_START;
case ALIGN_CENTER:
return Cue.ANCHOR_TYPE_MIDDLE;
case ALIGN_OPPOSITE:
return Cue.ANCHOR_TYPE_END;
default:
Log.w(TAG, "Unrecognized alignment: " + alignment);
return Cue.ANCHOR_TYPE_START;
}
}
/**
* Find end of tag (&gt;). The position returned is the position of the &gt; plus one (exclusive). * Find end of tag (&gt;). The position returned is the position of the &gt; plus one (exclusive).
* *
* @param markup The webvtt cue markup to be parsed. * @param markup The webvtt cue markup to be parsed.
......
...@@ -16,18 +16,13 @@ ...@@ -16,18 +16,13 @@
package com.google.android.exoplayer.text.webvtt; package com.google.android.exoplayer.text.webvtt;
import com.google.android.exoplayer.ParserException; import com.google.android.exoplayer.ParserException;
import com.google.android.exoplayer.text.Cue;
import com.google.android.exoplayer.text.SubtitleParser; import com.google.android.exoplayer.text.SubtitleParser;
import com.google.android.exoplayer.util.MimeTypes; import com.google.android.exoplayer.util.MimeTypes;
import com.google.android.exoplayer.util.ParsableByteArray; import com.google.android.exoplayer.util.ParsableByteArray;
import android.text.Layout.Alignment;
import android.text.TextUtils; import android.text.TextUtils;
import android.util.Log;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** /**
* A simple WebVTT parser. * A simple WebVTT parser.
...@@ -36,16 +31,12 @@ import java.util.regex.Pattern; ...@@ -36,16 +31,12 @@ import java.util.regex.Pattern;
*/ */
public final class WebvttParser implements SubtitleParser { public final class WebvttParser implements SubtitleParser {
private static final String TAG = "WebvttParser"; private final WebvttCueParser cueParser;
private final ParsableByteArray parsableWebvttData;
private static final Pattern CUE_SETTING = Pattern.compile("(\\S+?):(\\S+)");
private final PositionHolder positionHolder;
private final StringBuilder textBuilder;
public WebvttParser() { public WebvttParser() {
positionHolder = new PositionHolder(); cueParser = new WebvttCueParser();
textBuilder = new StringBuilder(); parsableWebvttData = new ParsableByteArray();
} }
@Override @Override
...@@ -55,190 +46,20 @@ public final class WebvttParser implements SubtitleParser { ...@@ -55,190 +46,20 @@ public final class WebvttParser implements SubtitleParser {
@Override @Override
public final WebvttSubtitle parse(byte[] bytes, int offset, int length) throws ParserException { public final WebvttSubtitle parse(byte[] bytes, int offset, int length) throws ParserException {
ParsableByteArray webvttData = new ParsableByteArray(bytes, offset + length); parsableWebvttData.reset(bytes, offset + length);
webvttData.setPosition(offset); parsableWebvttData.setPosition(offset);
// Validate the first line of the header, and skip the remainder. // Validate the first line of the header, and skip the remainder.
WebvttParserUtil.validateWebvttHeaderLine(webvttData); WebvttParserUtil.validateWebvttHeaderLine(parsableWebvttData);
while (!TextUtils.isEmpty(webvttData.readLine())) {} while (!TextUtils.isEmpty(parsableWebvttData.readLine())) {}
// Process the cues and text. // Extract Cues
ArrayList<WebvttCue> subtitles = new ArrayList<>(); ArrayList<WebvttCue> subtitles = new ArrayList<>();
Matcher cueHeaderMatcher; WebvttCue currentWebvttCue;
while ((cueHeaderMatcher = WebvttParserUtil.findNextCueHeader(webvttData)) != null) { while ((currentWebvttCue = cueParser.parseNextValidCue(parsableWebvttData)) != null) {
long cueStartTime; subtitles.add(currentWebvttCue);
long cueEndTime;
try {
// Parse the cue start and end times.
cueStartTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(1));
cueEndTime = WebvttParserUtil.parseTimestampUs(cueHeaderMatcher.group(2));
} catch (NumberFormatException e) {
Log.w(TAG, "Skipping cue with bad header: " + cueHeaderMatcher.group());
continue;
}
// Default cue settings.
Alignment cueTextAlignment = null;
float cueLine = Cue.DIMEN_UNSET;
int cueLineType = Cue.TYPE_UNSET;
int cueLineAnchor = Cue.TYPE_UNSET;
float cuePosition = Cue.DIMEN_UNSET;
int cuePositionAnchor = Cue.TYPE_UNSET;
float cueWidth = Cue.DIMEN_UNSET;
// Parse the cue settings list.
Matcher cueSettingMatcher = CUE_SETTING.matcher(cueHeaderMatcher.group(3));
while (cueSettingMatcher.find()) {
String name = cueSettingMatcher.group(1);
String value = cueSettingMatcher.group(2);
try {
if ("line".equals(name)) {
parseLineAttribute(value, positionHolder);
cueLine = positionHolder.position;
cueLineType = positionHolder.lineType;
cueLineAnchor = positionHolder.positionAnchor;
} else if ("align".equals(name)) {
cueTextAlignment = parseTextAlignment(value);
} else if ("position".equals(name)) {
parsePositionAttribute(value, positionHolder);
cuePosition = positionHolder.position;
cuePositionAnchor = positionHolder.positionAnchor;
} else if ("size".equals(name)) {
cueWidth = parsePercentage(value);
} else {
Log.w(TAG, "Unknown cue setting " + name + ":" + value);
}
} catch (NumberFormatException e) {
Log.w(TAG, "Skipping bad cue setting: " + cueSettingMatcher.group());
}
}
if (cuePosition != Cue.DIMEN_UNSET && cuePositionAnchor == Cue.TYPE_UNSET) {
// Computed position alignment should be derived from the text alignment if it has not been
// set explicitly.
cuePositionAnchor = alignmentToAnchor(cueTextAlignment);
}
// Parse the cue text.
textBuilder.setLength(0);
String line;
while ((line = webvttData.readLine()) != null && !line.isEmpty()) {
if (textBuilder.length() > 0) {
textBuilder.append("\n");
}
textBuilder.append(line.trim());
}
CharSequence cueText = WebvttCueParser.parse(textBuilder.toString());
WebvttCue cue = new WebvttCue(cueStartTime, cueEndTime, cueText, cueTextAlignment, cueLine,
cueLineType, cueLineAnchor, cuePosition, cuePositionAnchor, cueWidth);
subtitles.add(cue);
} }
return new WebvttSubtitle(subtitles); return new WebvttSubtitle(subtitles);
} }
private static void parseLineAttribute(String s, PositionHolder out)
throws NumberFormatException {
int lineAnchor;
int commaPosition = s.indexOf(",");
if (commaPosition != -1) {
lineAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
s = s.substring(0, commaPosition);
} else {
lineAnchor = Cue.TYPE_UNSET;
}
float line;
int lineType;
if (s.endsWith("%")) {
line = parsePercentage(s);
lineType = Cue.LINE_TYPE_FRACTION;
} else {
line = Integer.parseInt(s);
lineType = Cue.LINE_TYPE_NUMBER;
}
out.position = line;
out.positionAnchor = lineAnchor;
out.lineType = lineType;
}
private static void parsePositionAttribute(String s, PositionHolder out)
throws NumberFormatException {
int positionAnchor;
int commaPosition = s.indexOf(",");
if (commaPosition != -1) {
positionAnchor = parsePositionAnchor(s.substring(commaPosition + 1));
s = s.substring(0, commaPosition);
} else {
positionAnchor = Cue.TYPE_UNSET;
}
out.position = parsePercentage(s);
out.positionAnchor = positionAnchor;
out.lineType = Cue.TYPE_UNSET;
}
private static float parsePercentage(String s) throws NumberFormatException {
if (!s.endsWith("%")) {
throw new NumberFormatException("Percentages must end with %");
}
s = s.substring(0, s.length() - 1);
return Float.parseFloat(s) / 100;
}
private static int parsePositionAnchor(String s) {
switch (s) {
case "start":
return Cue.ANCHOR_TYPE_START;
case "middle":
return Cue.ANCHOR_TYPE_MIDDLE;
case "end":
return Cue.ANCHOR_TYPE_END;
default:
Log.w(TAG, "Invalid anchor value: " + s);
return Cue.TYPE_UNSET;
}
}
private static Alignment parseTextAlignment(String s) {
switch (s) {
case "start":
case "left":
return Alignment.ALIGN_NORMAL;
case "middle":
return Alignment.ALIGN_CENTER;
case "end":
case "right":
return Alignment.ALIGN_OPPOSITE;
default:
Log.w(TAG, "Invalid alignment value: " + s);
return null;
}
}
private static int alignmentToAnchor(Alignment alignment) {
if (alignment == null) {
return Cue.TYPE_UNSET;
}
switch (alignment) {
case ALIGN_NORMAL:
return Cue.ANCHOR_TYPE_START;
case ALIGN_CENTER:
return Cue.ANCHOR_TYPE_MIDDLE;
case ALIGN_OPPOSITE:
return Cue.ANCHOR_TYPE_END;
default:
Log.w(TAG, "Unrecognized alignment: " + alignment);
return Cue.ANCHOR_TYPE_START;
}
}
private static final class PositionHolder {
public float position;
public int positionAnchor;
public int lineType;
}
} }
...@@ -18,7 +18,6 @@ package com.google.android.exoplayer.text.webvtt; ...@@ -18,7 +18,6 @@ package com.google.android.exoplayer.text.webvtt;
import com.google.android.exoplayer.ParserException; import com.google.android.exoplayer.ParserException;
import com.google.android.exoplayer.util.ParsableByteArray; import com.google.android.exoplayer.util.ParsableByteArray;
import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
...@@ -27,8 +26,6 @@ import java.util.regex.Pattern; ...@@ -27,8 +26,6 @@ import java.util.regex.Pattern;
public final class WebvttParserUtil { public final class WebvttParserUtil {
private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$"); private static final Pattern HEADER = Pattern.compile("^\uFEFF?WEBVTT((\u0020|\u0009).*)?$");
private static final Pattern COMMENT = Pattern.compile("^NOTE((\u0020|\u0009).*)?$");
private static final Pattern CUE_HEADER = Pattern.compile("^(\\S+)\\s+-->\\s+(\\S+)(.*)?$");
private WebvttParserUtil() {} private WebvttParserUtil() {}
...@@ -46,30 +43,6 @@ public final class WebvttParserUtil { ...@@ -46,30 +43,6 @@ public final class WebvttParserUtil {
} }
/** /**
* Reads lines up to and including the next WebVTT cue header.
*
* @param input The input from which lines should be read.
* @return A {@link Matcher} for the WebVTT cue header, or null if the end of the input was
* reached without a cue header being found. In the case that a cue header is found, groups 1,
* 2 and 3 of the returned matcher contain the start time, end time and settings list.
*/
public static Matcher findNextCueHeader(ParsableByteArray input) {
String line;
while ((line = input.readLine()) != null) {
if (COMMENT.matcher(line).matches()) {
// Skip until the end of the comment block.
while ((line = input.readLine()) != null && !line.isEmpty()) {}
} else {
Matcher cueHeaderMatcher = CUE_HEADER.matcher(line);
if (cueHeaderMatcher.matches()) {
return cueHeaderMatcher;
}
}
}
return null;
}
/**
* Parses a WebVTT timestamp. * Parses a WebVTT timestamp.
* *
* @param timestamp The timestamp string. * @param timestamp The timestamp string.
...@@ -86,4 +59,17 @@ public final class WebvttParserUtil { ...@@ -86,4 +59,17 @@ public final class WebvttParserUtil {
return (value * 1000 + Long.parseLong(parts[1])) * 1000; return (value * 1000 + Long.parseLong(parts[1])) * 1000;
} }
/**
* Parses a percentage and returns a scaled float.
* @param s contains the number to parse.
* @return a float scaled number. 1.0 represents 100%.
* @throws NumberFormatException if the number format is invalid or does not end with '%'.
*/
public static float parsePercentage(String s) throws NumberFormatException {
if (!s.endsWith("%")) {
throw new NumberFormatException("Percentages must end with %");
}
return Float.parseFloat(s.substring(0, s.length() - 1)) / 100;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment