Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SDK
/
exoplayer
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
5609efd0
authored
Nov 05, 2022
by
Lev
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
Added UTF-16 (LE) and UTF-16 (BE) support for subrip subtitles.
parent
ab4d37f4
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
174 additions
and
1 deletions
library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java
testdata/src/test/assets/media/subrip/typical_utf16be
testdata/src/test/assets/media/subrip/typical_utf16le
library/common/src/main/java/com/google/android/exoplayer2/util/ParsableByteArray.java
View file @
5609efd0
...
@@ -532,6 +532,54 @@ public final class ParsableByteArray {
...
@@ -532,6 +532,54 @@ public final class ParsableByteArray {
}
}
/**
/**
* Reads a line of text.
*
* <p>A line is considered to be terminated by any one of a carriage return ('\r'), a line feed
* ('\n'), or a carriage return followed immediately by a line feed ('\r\n'). The UTF-16 charset
* is used. This method discards leading UTF-16 byte order marks (BOM), if present.
*
* @param isLittleEndian UTF-16 (LE) or UTF-16 (BE) encoding should be used
* @return The line not including any line-termination characters, or null if the end of the data
* has already been reached.
*/
@Nullable
public
String
readLineUtf16
(
boolean
isLittleEndian
)
{
if
(
bytesLeft
()
==
0
)
{
return
null
;
}
int
lineLimit
=
calculateLineLimitForUtf16
(
isLittleEndian
);
if
(
lineLimit
-
position
>=
2
&&
isUtf16BOM
(
data
[
position
],
data
[
position
+
1
]))
{
// There's a UTF-16 byte order mark at the start of the line. Discard it.
position
+=
2
;
}
String
line
;
if
(
isLittleEndian
)
{
line
=
Util
.
fromUtf16LEBytes
(
data
,
position
,
lineLimit
-
position
);
}
else
{
line
=
Util
.
fromUtf16BEBytes
(
data
,
position
,
lineLimit
-
position
);
}
position
=
lineLimit
;
if
(
position
==
limit
)
{
return
line
;
}
if
(
isEqualsInUtf16
(
data
[
position
],
data
[
position
+
1
],
'\r'
,
isLittleEndian
))
{
position
+=
2
;
if
(
position
==
limit
)
{
return
line
;
}
}
if
(
isEqualsInUtf16
(
data
[
position
],
data
[
position
+
1
],
'\n'
,
isLittleEndian
))
{
position
+=
2
;
}
return
line
;
}
/**
* Reads a long value encoded by UTF-8 encoding
* Reads a long value encoded by UTF-8 encoding
*
*
* @throws NumberFormatException if there is a problem with decoding
* @throws NumberFormatException if there is a problem with decoding
...
@@ -565,4 +613,29 @@ public final class ParsableByteArray {
...
@@ -565,4 +613,29 @@ public final class ParsableByteArray {
position
+=
length
;
position
+=
length
;
return
value
;
return
value
;
}
}
private
boolean
isEqualsInUtf16
(
byte
first
,
byte
second
,
char
value
,
boolean
isLittleEndian
)
{
return
(
isLittleEndian
&&
(
first
|
second
<<
8
)
==
value
)
||
(!
isLittleEndian
&&
(
first
<<
8
|
second
)
==
value
);
}
private
boolean
isUtf16BOM
(
byte
first
,
byte
second
)
{
return
(
first
==
(
byte
)
0xFF
&&
second
==
(
byte
)
0xFE
)
||
(
first
==
(
byte
)
0xFE
&&
second
==
(
byte
)
0xFF
);
}
private
int
calculateLineLimitForUtf16
(
boolean
isLittleEndian
)
{
int
lineLimit
=
position
;
while
(
lineLimit
<
limit
-
1
)
{
if
(
isLittleEndian
&&
Util
.
isLinebreak
(
data
[
lineLimit
]
|
data
[
lineLimit
+
1
]
<<
8
))
{
break
;
}
else
if
(!
isLittleEndian
&&
Util
.
isLinebreak
(
data
[
lineLimit
]
<<
8
|
data
[
lineLimit
+
1
]))
{
break
;
}
lineLimit
+=
2
;
}
return
lineLimit
;
}
}
}
library/common/src/main/java/com/google/android/exoplayer2/util/Util.java
View file @
5609efd0
...
@@ -683,6 +683,30 @@ public final class Util {
...
@@ -683,6 +683,30 @@ public final class Util {
}
}
/**
/**
* Returns a new {@link String} constructed by decoding UTF-16 (LE) encoded bytes in a subarray.
*
* @param bytes The UTF-16 encoded bytes to decode.
* @param offset The index of the first byte to decode.
* @param length The number of bytes to decode.
* @return The string.
*/
public
static
String
fromUtf16LEBytes
(
byte
[]
bytes
,
int
offset
,
int
length
)
{
return
new
String
(
bytes
,
offset
,
length
,
Charsets
.
UTF_16LE
);
}
/**
* Returns a new {@link String} constructed by decoding UTF-16 (BE) encoded bytes in a subarray.
*
* @param bytes The UTF-16 encoded bytes to decode.
* @param offset The index of the first byte to decode.
* @param length The number of bytes to decode.
* @return The string.
*/
public
static
String
fromUtf16BEBytes
(
byte
[]
bytes
,
int
offset
,
int
length
)
{
return
new
String
(
bytes
,
offset
,
length
,
Charsets
.
UTF_16BE
);
}
/**
* Returns a new byte array containing the code points of a {@link String} encoded using UTF-8.
* Returns a new byte array containing the code points of a {@link String} encoded using UTF-8.
*
*
* @param value The {@link String} whose bytes should be obtained.
* @param value The {@link String} whose bytes should be obtained.
...
...
library/extractor/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
View file @
5609efd0
...
@@ -26,6 +26,8 @@ import com.google.android.exoplayer2.util.Assertions;
...
@@ -26,6 +26,8 @@ import com.google.android.exoplayer2.util.Assertions;
import
com.google.android.exoplayer2.util.Log
;
import
com.google.android.exoplayer2.util.Log
;
import
com.google.android.exoplayer2.util.LongArray
;
import
com.google.android.exoplayer2.util.LongArray
;
import
com.google.android.exoplayer2.util.ParsableByteArray
;
import
com.google.android.exoplayer2.util.ParsableByteArray
;
import
com.google.common.base.Charsets
;
import
java.nio.charset.Charset
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.regex.Matcher
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
java.util.regex.Pattern
;
...
@@ -75,8 +77,25 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
...
@@ -75,8 +77,25 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
LongArray
cueTimesUs
=
new
LongArray
();
LongArray
cueTimesUs
=
new
LongArray
();
ParsableByteArray
subripData
=
new
ParsableByteArray
(
bytes
,
length
);
ParsableByteArray
subripData
=
new
ParsableByteArray
(
bytes
,
length
);
@Nullable
Charset
utf16Charset
;
if
(
bytes
.
length
>=
2
)
{
utf16Charset
=
getUtf16Charset
(
bytes
[
0
],
bytes
[
1
]);
}
else
{
utf16Charset
=
null
;
}
@Nullable
String
currentLine
;
@Nullable
String
currentLine
;
while
((
currentLine
=
subripData
.
readLine
())
!=
null
)
{
while
(
true
)
{
if
(
utf16Charset
!=
null
)
{
currentLine
=
subripData
.
readLineUtf16
(
utf16Charset
.
equals
(
Charsets
.
UTF_16LE
));
}
else
{
currentLine
=
subripData
.
readLine
();
}
if
(
currentLine
==
null
)
{
break
;
}
if
(
currentLine
.
length
()
==
0
)
{
if
(
currentLine
.
length
()
==
0
)
{
// Skip blank lines.
// Skip blank lines.
continue
;
continue
;
...
@@ -91,7 +110,11 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
...
@@ -91,7 +110,11 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
}
}
// Read and parse the timing line.
// Read and parse the timing line.
if
(
utf16Charset
!=
null
)
{
currentLine
=
subripData
.
readLineUtf16
(
utf16Charset
.
equals
(
Charsets
.
UTF_16LE
));
}
else
{
currentLine
=
subripData
.
readLine
();
currentLine
=
subripData
.
readLine
();
}
if
(
currentLine
==
null
)
{
if
(
currentLine
==
null
)
{
Log
.
w
(
TAG
,
"Unexpected end"
);
Log
.
w
(
TAG
,
"Unexpected end"
);
break
;
break
;
...
@@ -109,14 +132,22 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
...
@@ -109,14 +132,22 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
// Read and parse the text and tags.
// Read and parse the text and tags.
textBuilder
.
setLength
(
0
);
textBuilder
.
setLength
(
0
);
tags
.
clear
();
tags
.
clear
();
if
(
utf16Charset
!=
null
)
{
currentLine
=
subripData
.
readLineUtf16
(
utf16Charset
.
equals
(
Charsets
.
UTF_16LE
));
}
else
{
currentLine
=
subripData
.
readLine
();
currentLine
=
subripData
.
readLine
();
}
while
(!
TextUtils
.
isEmpty
(
currentLine
))
{
while
(!
TextUtils
.
isEmpty
(
currentLine
))
{
if
(
textBuilder
.
length
()
>
0
)
{
if
(
textBuilder
.
length
()
>
0
)
{
textBuilder
.
append
(
"<br>"
);
textBuilder
.
append
(
"<br>"
);
}
}
textBuilder
.
append
(
processLine
(
currentLine
,
tags
));
textBuilder
.
append
(
processLine
(
currentLine
,
tags
));
if
(
utf16Charset
!=
null
)
{
currentLine
=
subripData
.
readLineUtf16
(
utf16Charset
.
equals
(
Charsets
.
UTF_16LE
));
}
else
{
currentLine
=
subripData
.
readLine
();
currentLine
=
subripData
.
readLine
();
}
}
}
Spanned
text
=
Html
.
fromHtml
(
textBuilder
.
toString
());
Spanned
text
=
Html
.
fromHtml
(
textBuilder
.
toString
());
...
@@ -138,6 +169,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
...
@@ -138,6 +169,21 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
return
new
SubripSubtitle
(
cuesArray
,
cueTimesUsArray
);
return
new
SubripSubtitle
(
cuesArray
,
cueTimesUsArray
);
}
}
@Nullable
private
Charset
getUtf16Charset
(
byte
first
,
byte
second
)
{
if
(
first
==
(
byte
)
0xFE
&&
second
==
(
byte
)
0xFF
)
{
// UTF-16 (BE)
return
Charsets
.
UTF_16BE
;
}
if
(
first
==
(
byte
)
0xFF
&&
second
==
(
byte
)
0xFE
)
{
// UTF-16 (LE)
return
Charsets
.
UTF_16LE
;
}
return
null
;
}
/**
/**
* Trims and removes tags from the given line. The removed tags are added to {@code tags}.
* Trims and removes tags from the given line. The removed tags are added to {@code tags}.
*
*
...
...
library/extractor/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java
View file @
5609efd0
...
@@ -40,6 +40,8 @@ public final class SubripDecoderTest {
...
@@ -40,6 +40,8 @@ public final class SubripDecoderTest {
private
static
final
String
TYPICAL_NEGATIVE_TIMESTAMPS
=
private
static
final
String
TYPICAL_NEGATIVE_TIMESTAMPS
=
"media/subrip/typical_negative_timestamps"
;
"media/subrip/typical_negative_timestamps"
;
private
static
final
String
TYPICAL_UNEXPECTED_END
=
"media/subrip/typical_unexpected_end"
;
private
static
final
String
TYPICAL_UNEXPECTED_END
=
"media/subrip/typical_unexpected_end"
;
private
static
final
String
TYPICAL_UTF16BE
=
"media/subrip/typical_utf16be"
;
private
static
final
String
TYPICAL_UTF16LE
=
"media/subrip/typical_utf16le"
;
private
static
final
String
TYPICAL_WITH_TAGS
=
"media/subrip/typical_with_tags"
;
private
static
final
String
TYPICAL_WITH_TAGS
=
"media/subrip/typical_with_tags"
;
private
static
final
String
TYPICAL_NO_HOURS_AND_MILLIS
=
private
static
final
String
TYPICAL_NO_HOURS_AND_MILLIS
=
"media/subrip/typical_no_hours_and_millis"
;
"media/subrip/typical_no_hours_and_millis"
;
...
@@ -81,6 +83,34 @@ public final class SubripDecoderTest {
...
@@ -81,6 +83,34 @@ public final class SubripDecoderTest {
}
}
@Test
@Test
public
void
decodeTypicalUtf16LE
()
throws
IOException
{
SubripDecoder
decoder
=
new
SubripDecoder
();
byte
[]
bytes
=
TestUtil
.
getByteArray
(
ApplicationProvider
.
getApplicationContext
(),
TYPICAL_UTF16LE
);
Subtitle
subtitle
=
decoder
.
decode
(
bytes
,
bytes
.
length
,
false
);
assertThat
(
subtitle
.
getEventTimeCount
()).
isEqualTo
(
6
);
assertTypicalCue1
(
subtitle
,
0
);
assertTypicalCue2
(
subtitle
,
2
);
assertTypicalCue3
(
subtitle
,
4
);
}
@Test
public
void
decodeTypicalUtf16BE
()
throws
IOException
{
SubripDecoder
decoder
=
new
SubripDecoder
();
byte
[]
bytes
=
TestUtil
.
getByteArray
(
ApplicationProvider
.
getApplicationContext
(),
TYPICAL_UTF16BE
);
Subtitle
subtitle
=
decoder
.
decode
(
bytes
,
bytes
.
length
,
false
);
assertThat
(
subtitle
.
getEventTimeCount
()).
isEqualTo
(
6
);
assertTypicalCue1
(
subtitle
,
0
);
assertTypicalCue2
(
subtitle
,
2
);
assertTypicalCue3
(
subtitle
,
4
);
}
@Test
public
void
decodeTypicalExtraBlankLine
()
throws
IOException
{
public
void
decodeTypicalExtraBlankLine
()
throws
IOException
{
SubripDecoder
decoder
=
new
SubripDecoder
();
SubripDecoder
decoder
=
new
SubripDecoder
();
byte
[]
bytes
=
byte
[]
bytes
=
...
...
testdata/src/test/assets/media/subrip/typical_utf16be
0 → 100644
View file @
5609efd0
No preview for this file type
testdata/src/test/assets/media/subrip/typical_utf16le
0 → 100644
View file @
5609efd0
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment