Commit 5609efd0 by Lev

Added UTF-16 (LE) and UTF-16 (BE) support for subrip subtitles.

parent ab4d37f4
...@@ -532,6 +532,54 @@ public final class ParsableByteArray { ...@@ -532,6 +532,54 @@ public final class ParsableByteArray {
} }
/** /**
* Reads a line of text.
*
* <p>A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
* ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The UTF-16 charset
* is used. This method discards leading UTF-16 byte order marks (BOM), if present.
*
* @param isLittleEndian UTF-16 (LE) or UTF-16 (BE) encoding should be used
* @return The line not including any line-termination characters, or null if the end of the data
* has already been reached.
*/
@Nullable
public String readLineUtf16(boolean isLittleEndian) {
if (bytesLeft() == 0) {
return null;
}
int lineLimit = calculateLineLimitForUtf16(isLittleEndian);
if (lineLimit - position >= 2 && isUtf16BOM(data[position], data[position + 1])) {
// There's a UTF-16 byte order mark at the start of the line. Discard it.
position += 2;
}
String line;
if (isLittleEndian) {
line = Util.fromUtf16LEBytes(data, position, lineLimit - position);
} else {
line = Util.fromUtf16BEBytes(data, position, lineLimit - position);
}
position = lineLimit;
if (position == limit) {
return line;
}
if (isEqualsInUtf16(data[position], data[position + 1], '\r', isLittleEndian)) {
position += 2;
if (position == limit) {
return line;
}
}
if (isEqualsInUtf16(data[position], data[position + 1], '\n', isLittleEndian)) {
position += 2;
}
return line;
}
/**
* Reads a long value encoded by UTF-8 encoding * Reads a long value encoded by UTF-8 encoding
* *
* @throws NumberFormatException if there is a problem with decoding * @throws NumberFormatException if there is a problem with decoding
...@@ -565,4 +613,29 @@ public final class ParsableByteArray { ...@@ -565,4 +613,29 @@ public final class ParsableByteArray {
position += length; position += length;
return value; return value;
} }
private boolean isEqualsInUtf16(byte first, byte second, char value, boolean isLittleEndian) {
return (isLittleEndian && (first | second << 8) == value)
|| (!isLittleEndian && (first << 8 | second) == value);
}
private boolean isUtf16BOM(byte first, byte second) {
return (first == (byte) 0xFF && second == (byte) 0xFE)
|| (first == (byte) 0xFE && second == (byte) 0xFF);
}
private int calculateLineLimitForUtf16(boolean isLittleEndian) {
int lineLimit = position;
while (lineLimit < limit - 1) {
if (isLittleEndian && Util.isLinebreak(data[lineLimit] | data[lineLimit + 1] << 8)) {
break;
} else if (!isLittleEndian && Util.isLinebreak(data[lineLimit] << 8 | data[lineLimit + 1])) {
break;
}
lineLimit += 2;
}
return lineLimit;
}
} }
...@@ -683,6 +683,30 @@ public final class Util { ...@@ -683,6 +683,30 @@ public final class Util {
} }
/** /**
* Returns a new {@link String} constructed by decoding UTF-16 (LE) encoded bytes in a subarray.
*
* @param bytes The UTF-16 encoded bytes to decode.
* @param offset The index of the first byte to decode.
* @param length The number of bytes to decode.
* @return The string.
*/
public static String fromUtf16LEBytes(byte[] bytes, int offset, int length) {
return new String(bytes, offset, length, Charsets.UTF_16LE);
}
/**
* Returns a new {@link String} constructed by decoding UTF-16 (BE) encoded bytes in a subarray.
*
* @param bytes The UTF-16 encoded bytes to decode.
* @param offset The index of the first byte to decode.
* @param length The number of bytes to decode.
* @return The string.
*/
public static String fromUtf16BEBytes(byte[] bytes, int offset, int length) {
return new String(bytes, offset, length, Charsets.UTF_16BE);
}
/**
* Returns a new byte array containing the code points of a {@link String} encoded using UTF-8. * Returns a new byte array containing the code points of a {@link String} encoded using UTF-8.
* *
* @param value The {@link String} whose bytes should be obtained. * @param value The {@link String} whose bytes should be obtained.
......
...@@ -26,6 +26,8 @@ import com.google.android.exoplayer2.util.Assertions; ...@@ -26,6 +26,8 @@ import com.google.android.exoplayer2.util.Assertions;
import com.google.android.exoplayer2.util.Log; import com.google.android.exoplayer2.util.Log;
import com.google.android.exoplayer2.util.LongArray; import com.google.android.exoplayer2.util.LongArray;
import com.google.android.exoplayer2.util.ParsableByteArray; import com.google.android.exoplayer2.util.ParsableByteArray;
import com.google.common.base.Charsets;
import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
...@@ -75,8 +77,25 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -75,8 +77,25 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
LongArray cueTimesUs = new LongArray(); LongArray cueTimesUs = new LongArray();
ParsableByteArray subripData = new ParsableByteArray(bytes, length); ParsableByteArray subripData = new ParsableByteArray(bytes, length);
@Nullable Charset utf16Charset;
if (bytes.length >= 2) {
utf16Charset = getUtf16Charset(bytes[0], bytes[1]);
} else {
utf16Charset = null;
}
@Nullable String currentLine; @Nullable String currentLine;
while ((currentLine = subripData.readLine()) != null) { while (true) {
if (utf16Charset != null) {
currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
} else {
currentLine = subripData.readLine();
}
if (currentLine == null) {
break;
}
if (currentLine.length() == 0) { if (currentLine.length() == 0) {
// Skip blank lines. // Skip blank lines.
continue; continue;
...@@ -91,7 +110,11 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -91,7 +110,11 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
} }
// Read and parse the timing line. // Read and parse the timing line.
if (utf16Charset != null) {
currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
} else {
currentLine = subripData.readLine(); currentLine = subripData.readLine();
}
if (currentLine == null) { if (currentLine == null) {
Log.w(TAG, "Unexpected end"); Log.w(TAG, "Unexpected end");
break; break;
...@@ -109,14 +132,22 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -109,14 +132,22 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
// Read and parse the text and tags. // Read and parse the text and tags.
textBuilder.setLength(0); textBuilder.setLength(0);
tags.clear(); tags.clear();
if (utf16Charset != null) {
currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
} else {
currentLine = subripData.readLine(); currentLine = subripData.readLine();
}
while (!TextUtils.isEmpty(currentLine)) { while (!TextUtils.isEmpty(currentLine)) {
if (textBuilder.length() > 0) { if (textBuilder.length() > 0) {
textBuilder.append("<br>"); textBuilder.append("<br>");
} }
textBuilder.append(processLine(currentLine, tags)); textBuilder.append(processLine(currentLine, tags));
if (utf16Charset != null) {
currentLine = subripData.readLineUtf16(utf16Charset.equals(Charsets.UTF_16LE));
} else {
currentLine = subripData.readLine(); currentLine = subripData.readLine();
} }
}
Spanned text = Html.fromHtml(textBuilder.toString()); Spanned text = Html.fromHtml(textBuilder.toString());
...@@ -138,6 +169,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder { ...@@ -138,6 +169,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
return new SubripSubtitle(cuesArray, cueTimesUsArray); return new SubripSubtitle(cuesArray, cueTimesUsArray);
} }
@Nullable
private Charset getUtf16Charset(byte first, byte second) {
if (first == (byte) 0xFE && second == (byte) 0xFF) {
// UTF-16 (BE)
return Charsets.UTF_16BE;
}
if (first == (byte) 0xFF && second == (byte) 0xFE) {
// UTF-16 (LE)
return Charsets.UTF_16LE;
}
return null;
}
/** /**
* Trims and removes tags from the given line. The removed tags are added to {@code tags}. * Trims and removes tags from the given line. The removed tags are added to {@code tags}.
* *
......
...@@ -40,6 +40,8 @@ public final class SubripDecoderTest { ...@@ -40,6 +40,8 @@ public final class SubripDecoderTest {
private static final String TYPICAL_NEGATIVE_TIMESTAMPS = private static final String TYPICAL_NEGATIVE_TIMESTAMPS =
"media/subrip/typical_negative_timestamps"; "media/subrip/typical_negative_timestamps";
private static final String TYPICAL_UNEXPECTED_END = "media/subrip/typical_unexpected_end"; private static final String TYPICAL_UNEXPECTED_END = "media/subrip/typical_unexpected_end";
private static final String TYPICAL_UTF16BE = "media/subrip/typical_utf16be";
private static final String TYPICAL_UTF16LE = "media/subrip/typical_utf16le";
private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags"; private static final String TYPICAL_WITH_TAGS = "media/subrip/typical_with_tags";
private static final String TYPICAL_NO_HOURS_AND_MILLIS = private static final String TYPICAL_NO_HOURS_AND_MILLIS =
"media/subrip/typical_no_hours_and_millis"; "media/subrip/typical_no_hours_and_millis";
...@@ -81,6 +83,34 @@ public final class SubripDecoderTest { ...@@ -81,6 +83,34 @@ public final class SubripDecoderTest {
} }
@Test @Test
public void decodeTypicalUtf16LE() throws IOException {
SubripDecoder decoder = new SubripDecoder();
byte[] bytes =
TestUtil.getByteArray(
ApplicationProvider.getApplicationContext(), TYPICAL_UTF16LE);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
assertTypicalCue1(subtitle, 0);
assertTypicalCue2(subtitle, 2);
assertTypicalCue3(subtitle, 4);
}
@Test
public void decodeTypicalUtf16BE() throws IOException {
SubripDecoder decoder = new SubripDecoder();
byte[] bytes =
TestUtil.getByteArray(
ApplicationProvider.getApplicationContext(), TYPICAL_UTF16BE);
Subtitle subtitle = decoder.decode(bytes, bytes.length, false);
assertThat(subtitle.getEventTimeCount()).isEqualTo(6);
assertTypicalCue1(subtitle, 0);
assertTypicalCue2(subtitle, 2);
assertTypicalCue3(subtitle, 4);
}
@Test
public void decodeTypicalExtraBlankLine() throws IOException { public void decodeTypicalExtraBlankLine() throws IOException {
SubripDecoder decoder = new SubripDecoder(); SubripDecoder decoder = new SubripDecoder();
byte[] bytes = byte[] bytes =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment