Commit 83568ca5 by Oliver Woodman

Support MKV embedded SubRip captions.

parent 009d4d0c
1
00:00:00,000 -->
SubRip doesn't technically allow missing end timecodes.
2
00:00:02,345 -->
We interpret it to mean that a subtitle extends to the start of the next one.
3
00:00:03,456 -->
Or to the end of the media.
\ No newline at end of file
...@@ -25,13 +25,14 @@ import java.io.InputStream; ...@@ -25,13 +25,14 @@ import java.io.InputStream;
*/ */
public final class SubripParserTest extends InstrumentationTestCase { public final class SubripParserTest extends InstrumentationTestCase {
private static final String TYPICAL_SUBRIP_FILE = "subrip/typical"; private static final String EMPTY_FILE = "subrip/empty";
private static final String EMPTY_SUBRIP_FILE = "subrip/empty"; private static final String TYPICAL_FILE = "subrip/typical";
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
public void testParseNullSubripFile() throws IOException { public void testParseEmptySubripFile() throws IOException {
SubripParser parser = new SubripParser(); SubripParser parser = new SubripParser();
InputStream inputStream = InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets().open(EMPTY_SUBRIP_FILE); getInstrumentation().getContext().getResources().getAssets().open(EMPTY_FILE);
SubripSubtitle subtitle = parser.parse(inputStream); SubripSubtitle subtitle = parser.parse(inputStream);
// Assert that the subtitle is empty. // Assert that the subtitle is empty.
assertEquals(0, subtitle.getEventTimeCount()); assertEquals(0, subtitle.getEventTimeCount());
...@@ -41,7 +42,7 @@ public final class SubripParserTest extends InstrumentationTestCase { ...@@ -41,7 +42,7 @@ public final class SubripParserTest extends InstrumentationTestCase {
public void testParseTypicalSubripFile() throws IOException { public void testParseTypicalSubripFile() throws IOException {
SubripParser parser = new SubripParser(); SubripParser parser = new SubripParser();
InputStream inputStream = InputStream inputStream =
getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_SUBRIP_FILE); getInstrumentation().getContext().getResources().getAssets().open(TYPICAL_FILE);
SubripSubtitle subtitle = parser.parse(inputStream); SubripSubtitle subtitle = parser.parse(inputStream);
// Test event count. // Test event count.
...@@ -60,4 +61,29 @@ public final class SubripParserTest extends InstrumentationTestCase { ...@@ -60,4 +61,29 @@ public final class SubripParserTest extends InstrumentationTestCase {
assertEquals(3456000, subtitle.getEventTime(3)); assertEquals(3456000, subtitle.getEventTime(3));
} }
public void testParseNoEndTimecodes() throws IOException {
SubripParser parser = new SubripParser();
InputStream inputStream = getInstrumentation().getContext().getResources().getAssets()
.open(NO_END_TIMECODES_FILE);
SubripSubtitle subtitle = parser.parse(inputStream);
// Test event count.
assertEquals(3, subtitle.getEventTimeCount());
// Test first cue.
assertEquals(0, subtitle.getEventTime(0));
assertEquals("SubRip doesn't technically allow missing end timecodes.",
subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString());
// Test second cue.
assertEquals(2345000, subtitle.getEventTime(1));
assertEquals("We interpret it to mean that a subtitle extends to the start of the next one.",
subtitle.getCues(subtitle.getEventTime(1)).get(0).text.toString());
// Test third cue.
assertEquals(3456000, subtitle.getEventTime(2));
assertEquals("Or to the end of the media.",
subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString());
}
} }
...@@ -39,7 +39,7 @@ import java.util.regex.Pattern; ...@@ -39,7 +39,7 @@ import java.util.regex.Pattern;
*/ */
public final class SubripParser implements SubtitleParser { public final class SubripParser implements SubtitleParser {
private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(.*)\\s+-->\\s+(.*)"); private static final Pattern SUBRIP_TIMING_LINE = Pattern.compile("(\\S*)\\s*-->\\s*(\\S*)");
private static final Pattern SUBRIP_TIMESTAMP = private static final Pattern SUBRIP_TIMESTAMP =
Pattern.compile("(?:(\\d+):)?(\\d+):(\\d+),(\\d+)"); Pattern.compile("(?:(\\d+):)?(\\d+):(\\d+),(\\d+)");
...@@ -54,6 +54,7 @@ public final class SubripParser implements SubtitleParser { ...@@ -54,6 +54,7 @@ public final class SubripParser implements SubtitleParser {
ArrayList<Cue> cues = new ArrayList<>(); ArrayList<Cue> cues = new ArrayList<>();
LongArray cueTimesUs = new LongArray(); LongArray cueTimesUs = new LongArray();
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME)); BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, C.UTF8_NAME));
boolean haveEndTimecode;
String currentLine; String currentLine;
while ((currentLine = reader.readLine()) != null) { while ((currentLine = reader.readLine()) != null) {
...@@ -65,11 +66,16 @@ public final class SubripParser implements SubtitleParser { ...@@ -65,11 +66,16 @@ public final class SubripParser implements SubtitleParser {
} }
// Read and parse the timing line. // Read and parse the timing line.
haveEndTimecode = false;
currentLine = reader.readLine(); currentLine = reader.readLine();
Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine); Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine);
if (matcher.find()) { if (matcher.find()) {
cueTimesUs.add(parseTimestampUs(matcher.group(1))); cueTimesUs.add(parseTimecode(matcher.group(1)));
cueTimesUs.add(parseTimestampUs(matcher.group(2))); String endTimecode = matcher.group(2);
if (!TextUtils.isEmpty(endTimecode)) {
haveEndTimecode = true;
cueTimesUs.add(parseTimecode(matcher.group(2)));
}
} else { } else {
throw new ParserException("Expected timing line: " + currentLine); throw new ParserException("Expected timing line: " + currentLine);
} }
...@@ -85,6 +91,9 @@ public final class SubripParser implements SubtitleParser { ...@@ -85,6 +91,9 @@ public final class SubripParser implements SubtitleParser {
Spanned text = Html.fromHtml(textBuilder.toString()); Spanned text = Html.fromHtml(textBuilder.toString());
cues.add(new Cue(text)); cues.add(new Cue(text));
if (haveEndTimecode) {
cues.add(null);
}
} }
Cue[] cuesArray = new Cue[cues.size()]; Cue[] cuesArray = new Cue[cues.size()];
...@@ -98,7 +107,7 @@ public final class SubripParser implements SubtitleParser { ...@@ -98,7 +107,7 @@ public final class SubripParser implements SubtitleParser {
return MimeTypes.APPLICATION_SUBRIP.equals(mimeType); return MimeTypes.APPLICATION_SUBRIP.equals(mimeType);
} }
private static long parseTimestampUs(String s) throws NumberFormatException { private static long parseTimecode(String s) throws NumberFormatException {
Matcher matcher = SUBRIP_TIMESTAMP.matcher(s); Matcher matcher = SUBRIP_TIMESTAMP.matcher(s);
if (!matcher.matches()) { if (!matcher.matches()) {
throw new NumberFormatException("has invalid format"); throw new NumberFormatException("has invalid format");
......
...@@ -32,8 +32,8 @@ import java.util.List; ...@@ -32,8 +32,8 @@ import java.util.List;
private final long[] cueTimesUs; private final long[] cueTimesUs;
/** /**
* @param cues The cues in the subtitle. * @param cues The cues in the subtitle. Null entries may be used to represent empty cues.
* @param cueTimesUs Interleaved cue start and end times, in microseconds. * @param cueTimesUs The cue times, in microseconds.
*/ */
public SubripSubtitle(Cue[] cues, long[] cueTimesUs) { public SubripSubtitle(Cue[] cues, long[] cueTimesUs) {
this.cues = cues; this.cues = cues;
...@@ -69,11 +69,11 @@ import java.util.List; ...@@ -69,11 +69,11 @@ import java.util.List;
@Override @Override
public List<Cue> getCues(long timeUs) { public List<Cue> getCues(long timeUs) {
int index = Util.binarySearchFloor(cueTimesUs, timeUs, true, false); int index = Util.binarySearchFloor(cueTimesUs, timeUs, true, false);
if (index == -1 || index % 2 == 1) { if (index == -1 || cues[index] == null) {
// timeUs is earlier than the start of the first cue, or corresponds to a gap between cues. // timeUs is earlier than the start of the first cue, or we have an empty cue.
return Collections.<Cue>emptyList(); return Collections.<Cue>emptyList();
} else { } else {
return Collections.singletonList(cues[index / 2]); return Collections.singletonList(cues[index]);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment