Commit ddb70d96 by ibaker Committed by Oliver Woodman

Require an end timecode in SSA and Subrip subtitles

SSA spec allows the lines in any order, so they must all have an end time:
http://moodub.free.fr/video/ass-specs.doc

The Matroska write-up of SubRip assumes the end time is present:
https://matroska.org/technical/specs/subtitles/srt.html

This will massively simplify merging issue:#6595

PiperOrigin-RevId: 279926730
parent b43db3bc
...@@ -111,6 +111,9 @@ ...@@ -111,6 +111,9 @@
([#5523](https://github.com/google/ExoPlayer/issues/5523)). ([#5523](https://github.com/google/ExoPlayer/issues/5523)).
* Handle new signaling for E-AC3 JOC audio in DASH * Handle new signaling for E-AC3 JOC audio in DASH
([#6636](https://github.com/google/ExoPlayer/issues/6636)). ([#6636](https://github.com/google/ExoPlayer/issues/6636)).
* Require an end time or duration for SubRip (SRT) and SubStation Alpha
(SSA/ASS) subtitles. This applies to both sidecar files & subtitles
[embedded in Matroska streams](https://matroska.org/technical/specs/subtitles/index.html).
### 2.10.7 (2019-11-12) ### ### 2.10.7 (2019-11-12) ###
......
...@@ -256,14 +256,6 @@ public class MatroskaExtractor implements Extractor { ...@@ -256,14 +256,6 @@ public class MatroskaExtractor implements Extractor {
*/ */
private static final int SUBRIP_PREFIX_END_TIMECODE_OFFSET = 19; private static final int SUBRIP_PREFIX_END_TIMECODE_OFFSET = 19;
/** /**
* A special end timecode indicating that a subrip subtitle should be displayed until the next
* subtitle, or until the end of the media in the case of the last subtitle.
* <p>
* Equivalent to the UTF-8 string: " ".
*/
private static final byte[] SUBRIP_TIMECODE_EMPTY =
new byte[] {32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
/**
* The value by which to divide a time in microseconds to convert it to the unit of the last value * The value by which to divide a time in microseconds to convert it to the unit of the last value
* in a subrip timecode (milliseconds). * in a subrip timecode (milliseconds).
*/ */
...@@ -304,14 +296,6 @@ public class MatroskaExtractor implements Extractor { ...@@ -304,14 +296,6 @@ public class MatroskaExtractor implements Extractor {
*/ */
private static final long SSA_TIMECODE_LAST_VALUE_SCALING_FACTOR = 10000; private static final long SSA_TIMECODE_LAST_VALUE_SCALING_FACTOR = 10000;
/** /**
* A special end timecode indicating that an SSA subtitle should be displayed until the next
* subtitle, or until the end of the media in the case of the last subtitle.
* <p>
* Equivalent to the UTF-8 string: " ".
*/
private static final byte[] SSA_TIMECODE_EMPTY =
new byte[] {32, 32, 32, 32, 32, 32, 32, 32, 32, 32};
/**
* The format of an SSA timecode. * The format of an SSA timecode.
*/ */
private static final String SSA_TIMECODE_FORMAT = "%01d:%02d:%02d:%02d"; private static final String SSA_TIMECODE_FORMAT = "%01d:%02d:%02d:%02d";
...@@ -1238,21 +1222,18 @@ public class MatroskaExtractor implements Extractor { ...@@ -1238,21 +1222,18 @@ public class MatroskaExtractor implements Extractor {
if (track.trueHdSampleRechunker != null) { if (track.trueHdSampleRechunker != null) {
track.trueHdSampleRechunker.sampleMetadata(track, timeUs); track.trueHdSampleRechunker.sampleMetadata(track, timeUs);
} else { } else {
if (CODEC_ID_SUBRIP.equals(track.codecId)) { if (CODEC_ID_SUBRIP.equals(track.codecId) || CODEC_ID_ASS.equals(track.codecId)) {
commitSubtitleSample( if (durationUs == C.TIME_UNSET) {
track, Log.w(TAG, "Skipping subtitle sample with no duration.");
SUBRIP_TIMECODE_FORMAT, } else {
SUBRIP_PREFIX_END_TIMECODE_OFFSET, setSubtitleEndTime(track.codecId, durationUs, subtitleSample.data);
SUBRIP_TIMECODE_LAST_VALUE_SCALING_FACTOR, // Note: If we ever want to support DRM protected subtitles then we'll need to output the
SUBRIP_TIMECODE_EMPTY); // appropriate encryption data here.
} else if (CODEC_ID_ASS.equals(track.codecId)) { track.output.sampleData(subtitleSample, subtitleSample.limit());
commitSubtitleSample( sampleBytesWritten += subtitleSample.limit();
track, }
SSA_TIMECODE_FORMAT,
SSA_PREFIX_END_TIMECODE_OFFSET,
SSA_TIMECODE_LAST_VALUE_SCALING_FACTOR,
SSA_TIMECODE_EMPTY);
} }
if ((blockFlags & C.BUFFER_FLAG_HAS_SUPPLEMENTAL_DATA) != 0) { if ((blockFlags & C.BUFFER_FLAG_HAS_SUPPLEMENTAL_DATA) != 0) {
// Append supplemental data. // Append supplemental data.
int size = blockAddData.limit(); int size = blockAddData.limit();
...@@ -1480,51 +1461,58 @@ public class MatroskaExtractor implements Extractor { ...@@ -1480,51 +1461,58 @@ public class MatroskaExtractor implements Extractor {
// the correct end timecode, which we might not have yet. // the correct end timecode, which we might not have yet.
} }
private void commitSubtitleSample(Track track, String timecodeFormat, int endTimecodeOffset,
long lastTimecodeValueScalingFactor, byte[] emptyTimecode) {
setSubtitleSampleDuration(
subtitleSample.data,
blockDurationUs,
timecodeFormat,
endTimecodeOffset,
lastTimecodeValueScalingFactor,
emptyTimecode);
// Note: If we ever want to support DRM protected subtitles then we'll need to output the
// appropriate encryption data here.
track.output.sampleData(subtitleSample, subtitleSample.limit());
sampleBytesWritten += subtitleSample.limit();
}
/** /**
* Formats {@code durationUs} using {@code timecodeFormat}, and sets it as the end timecode in * Overwrites the end timecode in {@code subtitleData} with the correctly formatted time derived
* {@code subtitleSampleData}. * from {@code durationUs}.
* *
* <p>See documentation on {@link #SSA_DIALOGUE_FORMAT} and {@link #SUBRIP_PREFIX} for why we use * <p>See documentation on {@link #SSA_DIALOGUE_FORMAT} and {@link #SUBRIP_PREFIX} for why we use
* the duration as the end timecode. * the duration as the end timecode.
*
* @param codecId The subtitle codec; must be {@link #CODEC_ID_SUBRIP} or {@link #CODEC_ID_ASS}.
* @param durationUs The duration of the sample, in microseconds.
* @param subtitleData The subtitle sample in which to overwrite the end timecode (output
* parameter).
*/
private static void setSubtitleEndTime(String codecId, long durationUs, byte[] subtitleData) {
byte[] endTimecode;
int endTimecodeOffset;
switch (codecId) {
case CODEC_ID_SUBRIP:
endTimecode =
formatSubtitleTimecode(
durationUs, SUBRIP_TIMECODE_FORMAT, SUBRIP_TIMECODE_LAST_VALUE_SCALING_FACTOR);
endTimecodeOffset = SUBRIP_PREFIX_END_TIMECODE_OFFSET;
break;
case CODEC_ID_ASS:
endTimecode =
formatSubtitleTimecode(
durationUs, SSA_TIMECODE_FORMAT, SSA_TIMECODE_LAST_VALUE_SCALING_FACTOR);
endTimecodeOffset = SSA_PREFIX_END_TIMECODE_OFFSET;
break;
default:
throw new IllegalArgumentException();
}
System.arraycopy(endTimecode, 0, subtitleData, endTimecodeOffset, endTimecode.length);
}
/**
* Formats {@code timeUs} using {@code timecodeFormat}, and sets it as the end timecode in {@code
* subtitleSampleData}.
*/ */
private static void setSubtitleSampleDuration( private static byte[] formatSubtitleTimecode(
byte[] subtitleSampleData, long timeUs, String timecodeFormat, long lastTimecodeValueScalingFactor) {
long durationUs, Assertions.checkArgument(timeUs != C.TIME_UNSET);
String timecodeFormat,
int endTimecodeOffset,
long lastTimecodeValueScalingFactor,
byte[] emptyTimecode) {
byte[] timeCodeData; byte[] timeCodeData;
if (durationUs == C.TIME_UNSET) { int hours = (int) (timeUs / (3600 * C.MICROS_PER_SECOND));
timeCodeData = emptyTimecode; timeUs -= (hours * 3600 * C.MICROS_PER_SECOND);
} else { int minutes = (int) (timeUs / (60 * C.MICROS_PER_SECOND));
int hours = (int) (durationUs / (3600 * C.MICROS_PER_SECOND)); timeUs -= (minutes * 60 * C.MICROS_PER_SECOND);
durationUs -= (hours * 3600 * C.MICROS_PER_SECOND); int seconds = (int) (timeUs / C.MICROS_PER_SECOND);
int minutes = (int) (durationUs / (60 * C.MICROS_PER_SECOND)); timeUs -= (seconds * C.MICROS_PER_SECOND);
durationUs -= (minutes * 60 * C.MICROS_PER_SECOND); int lastValue = (int) (timeUs / lastTimecodeValueScalingFactor);
int seconds = (int) (durationUs / C.MICROS_PER_SECOND);
durationUs -= (seconds * C.MICROS_PER_SECOND);
int lastValue = (int) (durationUs / lastTimecodeValueScalingFactor);
timeCodeData = Util.getUtf8Bytes(String.format(Locale.US, timecodeFormat, hours, minutes, timeCodeData = Util.getUtf8Bytes(String.format(Locale.US, timecodeFormat, hours, minutes,
seconds, lastValue)); seconds, lastValue));
} return timeCodeData;
Assertions.checkState(timeCodeData.length == emptyTimecode.length);
System.arraycopy(timeCodeData, 0, subtitleSampleData, endTimecodeOffset, timeCodeData.length);
} }
/** /**
......
...@@ -180,20 +180,16 @@ public final class SsaDecoder extends SimpleSubtitleDecoder { ...@@ -180,20 +180,16 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
return; return;
} }
long startTimeUs = SsaDecoder.parseTimecodeUs(lineValues[formatStartIndex]); long startTimeUs = parseTimecodeUs(lineValues[formatStartIndex]);
if (startTimeUs == C.TIME_UNSET) { if (startTimeUs == C.TIME_UNSET) {
Log.w(TAG, "Skipping invalid timing: " + dialogueLine); Log.w(TAG, "Skipping invalid timing: " + dialogueLine);
return; return;
} }
long endTimeUs = C.TIME_UNSET; long endTimeUs = parseTimecodeUs(lineValues[formatEndIndex]);
String endTimeString = lineValues[formatEndIndex]; if (endTimeUs == C.TIME_UNSET) {
if (!endTimeString.trim().isEmpty()) { Log.w(TAG, "Skipping invalid timing: " + dialogueLine);
endTimeUs = SsaDecoder.parseTimecodeUs(endTimeString); return;
if (endTimeUs == C.TIME_UNSET) {
Log.w(TAG, "Skipping invalid timing: " + dialogueLine);
return;
}
} }
String text = String text =
...@@ -203,10 +199,8 @@ public final class SsaDecoder extends SimpleSubtitleDecoder { ...@@ -203,10 +199,8 @@ public final class SsaDecoder extends SimpleSubtitleDecoder {
.replaceAll("\\\\n", "\n"); .replaceAll("\\\\n", "\n");
cues.add(new Cue(text)); cues.add(new Cue(text));
cueTimesUs.add(startTimeUs); cueTimesUs.add(startTimeUs);
if (endTimeUs != C.TIME_UNSET) { cues.add(Cue.EMPTY);
cues.add(Cue.EMPTY); cueTimesUs.add(endTimeUs);
cueTimesUs.add(endTimeUs);
}
} }
/** /**
......
...@@ -43,7 +43,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -43,7 +43,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+),(\\d+)"; private static final String SUBRIP_TIMECODE = "(?:(\\d+):)?(\\d+):(\\d+),(\\d+)";
private static final Pattern SUBRIP_TIMING_LINE = private static final Pattern SUBRIP_TIMING_LINE =
Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")?\\s*"); Pattern.compile("\\s*(" + SUBRIP_TIMECODE + ")\\s*-->\\s*(" + SUBRIP_TIMECODE + ")\\s*");
private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}"); private static final Pattern SUBRIP_TAG_PATTERN = Pattern.compile("\\{\\\\.*?\\}");
private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}"; private static final String SUBRIP_ALIGNMENT_TAG = "\\{\\\\an[1-9]\\}";
...@@ -90,7 +90,6 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -90,7 +90,6 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
} }
// Read and parse the timing line. // Read and parse the timing line.
boolean haveEndTimecode = false;
currentLine = subripData.readLine(); currentLine = subripData.readLine();
if (currentLine == null) { if (currentLine == null) {
Log.w(TAG, "Unexpected end"); Log.w(TAG, "Unexpected end");
...@@ -99,11 +98,8 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -99,11 +98,8 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine); Matcher matcher = SUBRIP_TIMING_LINE.matcher(currentLine);
if (matcher.matches()) { if (matcher.matches()) {
cueTimesUs.add(parseTimecode(matcher, 1)); cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 1));
if (!TextUtils.isEmpty(matcher.group(6))) { cueTimesUs.add(parseTimecode(matcher, /* groupOffset= */ 6));
haveEndTimecode = true;
cueTimesUs.add(parseTimecode(matcher, 6));
}
} else { } else {
Log.w(TAG, "Skipping invalid timing: " + currentLine); Log.w(TAG, "Skipping invalid timing: " + currentLine);
continue; continue;
...@@ -133,10 +129,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -133,10 +129,7 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
} }
} }
cues.add(buildCue(text, alignmentTag)); cues.add(buildCue(text, alignmentTag));
cues.add(Cue.EMPTY);
if (haveEndTimecode) {
cues.add(Cue.EMPTY);
}
} }
Cue[] cuesArray = new Cue[cues.size()]; Cue[] cuesArray = new Cue[cues.size()];
......
...@@ -10,3 +10,5 @@ Format: Layer, Start, End, Style, Name, Text ...@@ -10,3 +10,5 @@ Format: Layer, Start, End, Style, Name, Text
Dialogue: 0,Invalid,0:00:01.23,Default,Olly,This is the first subtitle{ignored}. Dialogue: 0,Invalid,0:00:01.23,Default,Olly,This is the first subtitle{ignored}.
Dialogue: 0,0:00:02.34,Invalid,Default,Olly,This is the second subtitle \nwith a newline \Nand another. Dialogue: 0,0:00:02.34,Invalid,Default,Olly,This is the second subtitle \nwith a newline \Nand another.
Dialogue: 0,0:00:04:56,0:00:08:90,Default,Olly,This is the third subtitle, with a comma. Dialogue: 0,0:00:04:56,0:00:08:90,Default,Olly,This is the third subtitle, with a comma.
Dialogue: 0, ,0:00:10:90,Default,Olly,This is the fourth subtitle.
Dialogue: 0,0:00:12:90, ,Default,Olly,This is the fifth subtitle.
[Script Info]
Title: SomeTitle
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,Open Sans Semibold,36,&H00FFFFFF,&H000000FF,&H00020713,&H00000000,-1,0,0,0,100,100,0,0,1,1.7,0,2,0,0,28,1
[Events]
Format: Layer, Start, End, Style, Name, Text
Dialogue: 0,0:00:00.00, ,Default,Olly,This is the first subtitle.
Dialogue: 0,0:00:02.34, ,Default,Olly,This is the second subtitle \nwith a newline \Nand another.
Dialogue: 0,0:00:04.56, ,Default,Olly,This is the third subtitle, with a comma.
1
00:00:00,000 -->
SubRip doesn't technically allow missing end timecodes.
2
00:00:02,345 -->
We interpret it to mean that a subtitle extends to the start of the next one.
3
00:00:03,456 -->
Or to the end of the media.
...@@ -9,3 +9,11 @@ Second subtitle with second line. ...@@ -9,3 +9,11 @@ Second subtitle with second line.
3 3
00:00:04,567 --> 00:00:08,901 00:00:04,567 --> 00:00:08,901
This is the third subtitle. This is the third subtitle.
4
--> 00:00:10,901
This is the fourth subtitle.
5
00:00:12,901 -->
This is the fifth subtitle.
...@@ -36,7 +36,6 @@ public final class SsaDecoderTest { ...@@ -36,7 +36,6 @@ public final class SsaDecoderTest {
private static final String TYPICAL_DIALOGUE_ONLY = "ssa/typical_dialogue"; private static final String TYPICAL_DIALOGUE_ONLY = "ssa/typical_dialogue";
private static final String TYPICAL_FORMAT_ONLY = "ssa/typical_format"; private static final String TYPICAL_FORMAT_ONLY = "ssa/typical_format";
private static final String INVALID_TIMECODES = "ssa/invalid_timecodes"; private static final String INVALID_TIMECODES = "ssa/invalid_timecodes";
private static final String NO_END_TIMECODES = "ssa/no_end_timecodes";
@Test @Test
public void testDecodeEmpty() throws IOException { public void testDecodeEmpty() throws IOException {
...@@ -92,28 +91,6 @@ public final class SsaDecoderTest { ...@@ -92,28 +91,6 @@ public final class SsaDecoderTest {
assertTypicalCue3(subtitle, 0); assertTypicalCue3(subtitle, 0);
} }
@Test
public void testDecodeNoEndTimecodes() throws IOException {
SsaDecoder decoder = new SsaDecoder();
byte[] bytes =
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), NO_END_TIMECODES);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(3);
assertThat(subtitle.getEventTime(0)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString())
.isEqualTo("This is the first subtitle.");
assertThat(subtitle.getEventTime(1)).isEqualTo(2340000);
assertThat(subtitle.getCues(subtitle.getEventTime(1)).get(0).text.toString())
.isEqualTo("This is the second subtitle \nwith a newline \nand another.");
assertThat(subtitle.getEventTime(2)).isEqualTo(4560000);
assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString())
.isEqualTo("This is the third subtitle, with a comma.");
}
private static void assertTypicalCue1(Subtitle subtitle, int eventIndex) { private static void assertTypicalCue1(Subtitle subtitle, int eventIndex) {
assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0); assertThat(subtitle.getEventTime(eventIndex)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString()) assertThat(subtitle.getCues(subtitle.getEventTime(eventIndex)).get(0).text.toString())
......
...@@ -39,7 +39,6 @@ public final class SubripDecoderTest { ...@@ -39,7 +39,6 @@ public final class SubripDecoderTest {
private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps"; private static final String TYPICAL_NEGATIVE_TIMESTAMPS = "subrip/typical_negative_timestamps";
private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end"; private static final String TYPICAL_UNEXPECTED_END = "subrip/typical_unexpected_end";
private static final String TYPICAL_WITH_TAGS = "subrip/typical_with_tags"; private static final String TYPICAL_WITH_TAGS = "subrip/typical_with_tags";
private static final String NO_END_TIMECODES_FILE = "subrip/no_end_timecodes";
@Test @Test
public void testDecodeEmpty() throws IOException { public void testDecodeEmpty() throws IOException {
...@@ -146,28 +145,6 @@ public final class SubripDecoderTest { ...@@ -146,28 +145,6 @@ public final class SubripDecoderTest {
} }
@Test @Test
public void testDecodeNoEndTimecodes() throws IOException {
SubripDecoder decoder = new SubripDecoder();
byte[] bytes =
TestUtil.getByteArray(ApplicationProvider.getApplicationContext(), NO_END_TIMECODES_FILE);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(3);
assertThat(subtitle.getEventTime(0)).isEqualTo(0);
assertThat(subtitle.getCues(subtitle.getEventTime(0)).get(0).text.toString())
.isEqualTo("SubRip doesn't technically allow missing end timecodes.");
assertThat(subtitle.getEventTime(1)).isEqualTo(2345000);
assertThat(subtitle.getCues(subtitle.getEventTime(1)).get(0).text.toString())
.isEqualTo("We interpret it to mean that a subtitle extends to the start of the next one.");
assertThat(subtitle.getEventTime(2)).isEqualTo(3456000);
assertThat(subtitle.getCues(subtitle.getEventTime(2)).get(0).text.toString())
.isEqualTo("Or to the end of the media.");
}
@Test
public void testDecodeCueWithTag() throws IOException { public void testDecodeCueWithTag() throws IOException {
SubripDecoder decoder = new SubripDecoder(); SubripDecoder decoder = new SubripDecoder();
byte[] bytes = byte[] bytes =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment