Commit 83568ca5 by Oliver Woodman

Support MKV embedded SubRip captions.

parent 009d4d0c
1
00:00:00,000 -->
SubRip doesn't technically allow missing end timecodes.
2
00:00:02,345 -->
We interpret it to mean that a subtitle extends to the start of the next one.
3
00:00:03,456 -->
Or to the end of the media.
\ No newline at end of file
......@@ -25,13 +25,14 @@ import java.io.InputStream;
*/
public final class SubripParserTest extends InstrumentationTestCase {
private static final String TYPICAL_SUBRIP_FILE = "subrip/typical";
private static final String EMPTY_SUBRIP_FILE = "subrip/empty";
private static final String EMPTY_FILE = "subrip/empty";
private static final String TYPICAL_FILE = "subrip/typical";
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
public void testParseNullSubripFile() throws IOException {
public void testParseEmptySubripFile() throws IOException {
SubripParser parser = new SubripParser();
InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets().open(EMPTY_SUBRIP_FILE);
getInstrumentation().getContext().getResources().getAssets().open(EMPTY_FILE);
SubripSubtitle subtitle = parser.parse(inputStream);
// Assert that the subtitle is empty.
assertEquals(0, subtitle.getEventTimeCount());
......@@ -41,7 +42,7 @@ public final class SubripParserTest extends InstrumentationTestCase {
public void testParseTypicalSubripFile() throws IOException {
SubripParser parser = new SubripParser();
InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_SUBRIP_FILE);
getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_FILE);
SubripSubtitle subtitle = parser.parse(inputStream);
// Test event count.
......@@ -60,4 +61,29 @@ public final class SubripParserTest extends InstrumentationTestCase {
assertEquals(3456000, subtitle.getEventTime(3));
}
public void testParseNoEndTimecodes() throws IOException {
SubripParser parser = new SubripParser();
InputStream inputStream = getInstrumentation().getContext().getResources().getAssets()
.open(NO_END_TIMECODES_FILE);
SubripSubtitle subtitle = parser.parse(inputStream);
// Test event count.
assertEquals(3, subtitle.getEventTimeCount());
// Test first cue.
assertEquals(0, subtitle.getEventTime(0));
assertEquals("SubRip doesn't technically allow missing end timecodes.",
subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString());
// Test second cue.
assertEquals(2345000, subtitle.getEventTime(1));
assertEquals("We interpret it to mean that a subtitle extends to the start of the next one.",
subtitle.getCues(subtitle.getEventTime(1)).get(0).text.toString());
// Test third cue.
assertEquals(3456000, subtitle.getEventTime(2));
assertEquals("Or to the end of the media.",
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
}
}
......@@ -39,7 +39,7 @@ import java.util.regex.Pattern;
*/
public final class SubripParser implements SubtitleParser {
private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(.*)\\s+-->\\s+(.*)");
private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(\\S*)\\s*-->\\s*(\\S*)");
private static final Pattern SUBRIP_TIMESTAMP =
Pattern.compile("(?:(\\d+):)?(\\d+):(\\d+),(\\d+)");
......@@ -54,6 +54,7 @@ public final class SubripParser implements SubtitleParser {
ArrayList<Cue> cues = new ArrayList<>();
LongArray cueTimesUs = new LongArray();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME));
boolean haveEndTimecode;
String currentLine;
while ((currentLine = reader.readLine()) != null) {
......@@ -65,11 +66,16 @@ public final class SubripParser implements SubtitleParser {
}
// Read and parse the timing line.
haveEndTimecode = false;
currentLine = reader.readLine();
Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine);
if (matcher.find()) {
cueTimesUs.add(parseTimestampUs(matcher.group(1)));
cueTimesUs.add(parseTimestampUs(matcher.group(2)));
cueTimesUs.add(parseTimecode(matcher.group(1)));
String endTimecode = matcher.group(2);
if (!TextUtils.isEmpty(endTimecode)) {
haveEndTimecode = true;
cueTimesUs.add(parseTimecode(matcher.group(2)));
}
} else {
throw new ParserException("Expected timing line: " + currentLine);
}
......@@ -85,6 +91,9 @@ public final class SubripParser implements SubtitleParser {
Spanned text = Html.fromHtml(textBuilder.toString());
cues.add(new Cue(text));
if (haveEndTimecode) {
cues.add(null);
}
}
Cue[] cuesArray = new Cue[cues.size()];
......@@ -98,7 +107,7 @@ public final class SubripParser implements SubtitleParser {
return MimeTypes.APPLICATION_SUBRIP.equals(mimeType);
}
private static long parseTimestampUs(String s) throws NumberFormatException {
private static long parseTimecode(String s) throws NumberFormatException {
Matcher matcher = SUBRIP_TIMESTAMP.matcher(s);
if (!matcher.matches()) {
throw new NumberFormatException("has invalid format");
......
......@@ -32,8 +32,8 @@ import java.util.List;
private final long[] cueTimesUs;
/**
* @param cues The cues in the subtitle.
* @param cueTimesUs Interleaved cue start and end times, in microseconds.
* @param cues The cues in the subtitle. Null entries may be used to represent empty cues.
* @param cueTimesUs The cue times, in microseconds.
*/
public SubripSubtitle(Cue[] cues, long[] cueTimesUs) {
this.cues = cues;
......@@ -69,11 +69,11 @@ import java.util.List;
@Override
public List<Cue> getCues(long timeUs) {
int index = Util.binarySearchFloor(cueTimesUs, timeUs, true, false);
if (index == -1 || index % 2 == 1) {
// timeUs is earlier than the start of the first cue, or corresponds to a gap between cues.
if (index == -1 || cues[index] == null) {
// timeUs is earlier than the start of the first cue, or we have an empty cue.
return Collections.<Cue>emptyList();
} else {
return Collections.singletonList(cues[index / 2]);
return Collections.singletonList(cues[index]);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment