Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SDK
/
exoplayer
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
ae520a8c
authored
Jul 25, 2018
by
Arnold Szabo
Browse files
Options
_('Browse Files')
Download
Email Patches
Plain Diff
#4306 - Extract tags from SubRip subtitles, add support for alignment tags based on SSA v4+
parent
05a31dfd
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
241 additions
and
2 deletions
library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
library/core/src/test/assets/subrip/typical_with_tags
library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java
library/core/src/main/java/com/google/android/exoplayer2/text/subrip/SubripDecoder.java
View file @
ae520a8c
...
...
@@ -15,7 +15,9 @@
*/
package
com
.
google
.
android
.
exoplayer2
.
text
.
subrip
;
import
android.support.annotation.StringDef
;
import
android.text.Html
;
import
android.text.Layout
;
import
android.text.Spanned
;
import
android.text.TextUtils
;
import
android.util.Log
;
...
...
@@ -23,7 +25,11 @@ import com.google.android.exoplayer2.text.Cue;
import
com.google.android.exoplayer2.text.SimpleSubtitleDecoder
;
import
com.google.android.exoplayer2.util.LongArray
;
import
com.google.android.exoplayer2.util.ParsableByteArray
;
import
java.lang.annotation.Retention
;
import
java.lang.annotation.RetentionPolicy
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
...
...
@@ -38,6 +44,33 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
private
static
final
Pattern
SUBRIP_TIMING_LINE
=
Pattern
.
compile
(
"\\s*("
+
SUBRIP_TIMECODE
+
")\\s*-->\\s*("
+
SUBRIP_TIMECODE
+
")?\\s*"
);
private
static
final
Pattern
SUBRIP_TAG_PATTERN
=
Pattern
.
compile
(
"\\{\\\\.*?\\}"
);
private
static
final
String
SUBRIP_ALIGNMENT_TAG
=
"\\{\\\\an[1-9]\\}"
;
private
static
final
float
DEFAULT_START_FRACTION
=
0.08f
;
private
static
final
float
DEFAULT_END_FRACTION
=
1
-
DEFAULT_START_FRACTION
;
private
static
final
float
DEFAULT_MID_FRACTION
=
0.5f
;
@Retention
(
RetentionPolicy
.
SOURCE
)
@StringDef
({
ALIGN_BOTTOM_LEFT
,
ALIGN_BOTTOM_MID
,
ALIGN_BOTTOM_RIGHT
,
ALIGN_MID_LEFT
,
ALIGN_MID_MID
,
ALIGN_MID_RIGHT
,
ALIGN_TOP_LEFT
,
ALIGN_TOP_MID
,
ALIGN_TOP_RIGHT
})
private
@interface
SubRipTag
{}
// Possible valid alignment tags based on SSA v4+ specs
private
static
final
String
ALIGN_BOTTOM_LEFT
=
"{\\an1}"
;
private
static
final
String
ALIGN_BOTTOM_MID
=
"{\\an2}"
;
private
static
final
String
ALIGN_BOTTOM_RIGHT
=
"{\\an3}"
;
private
static
final
String
ALIGN_MID_LEFT
=
"{\\an4}"
;
private
static
final
String
ALIGN_MID_MID
=
"{\\an5}"
;
private
static
final
String
ALIGN_MID_RIGHT
=
"{\\an6}"
;
private
static
final
String
ALIGN_TOP_LEFT
=
"{\\an7}"
;
private
static
final
String
ALIGN_TOP_MID
=
"{\\an8}"
;
private
static
final
String
ALIGN_TOP_RIGHT
=
"{\\an9}"
;
private
final
StringBuilder
textBuilder
;
public
SubripDecoder
()
{
...
...
@@ -95,8 +128,36 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
textBuilder
.
append
(
currentLine
.
trim
());
}
Spanned
text
=
Html
.
fromHtml
(
textBuilder
.
toString
());
cues
.
add
(
new
Cue
(
text
));
// Extract tags
SubtitleTagResult
tagResult
=
extractTags
(
textBuilder
);
Spanned
text
=
Html
.
fromHtml
(
tagResult
.
cue
);
Cue
cue
=
null
;
// Check if tags are present
if
(
tagResult
.
tags
.
length
>
0
)
{
boolean
alignTagFound
=
false
;
// At end of this loop the clue must be created with the applied tags
for
(
String
tag
:
tagResult
.
tags
)
{
// Check if the tag is an alignment tag
if
(
tag
.
matches
(
SUBRIP_ALIGNMENT_TAG
))
{
// Based on the specs, in case of the alignment tags only the first appearance counts
if
(
alignTagFound
)
continue
;
alignTagFound
=
true
;
AlignmentResult
alignmentResult
=
getAlignmentValues
(
tag
);
cue
=
new
Cue
(
text
,
Layout
.
Alignment
.
ALIGN_NORMAL
,
alignmentResult
.
line
,
Cue
.
LINE_TYPE_FRACTION
,
alignmentResult
.
lineAnchor
,
alignmentResult
.
position
,
alignmentResult
.
positionAnchor
,
Cue
.
DIMEN_UNSET
);
}
}
}
cues
.
add
(
cue
==
null
?
new
Cue
(
text
)
:
cue
);
if
(
haveEndTimecode
)
{
cues
.
add
(
null
);
}
...
...
@@ -108,6 +169,111 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
return
new
SubripSubtitle
(
cuesArray
,
cueTimesUsArray
);
}
/**
* Extracts the tags from the given {@code cue}
* The pattern that is used to extract the tags is specified in SSA v4+ specs and
* has the following form: "{\...}".
* <p>
* "All override codes appear within braces {}"
* "All override codes are always preceded by a backslash \"
*
* @param cue Cue text
* @return {@link SubtitleTagResult} that holds new cue and also the extracted tags
*/
private
SubtitleTagResult
extractTags
(
StringBuilder
cue
)
{
StringBuilder
cueCopy
=
new
StringBuilder
(
cue
.
toString
());
List
<
String
>
tags
=
new
ArrayList
<>();
int
replacedCharacters
=
0
;
Matcher
matcher
=
SUBRIP_TAG_PATTERN
.
matcher
(
cue
.
toString
());
while
(
matcher
.
find
())
{
String
tag
=
matcher
.
group
();
tags
.
add
(
tag
);
cueCopy
.
replace
(
matcher
.
start
()
-
replacedCharacters
,
matcher
.
end
()
-
replacedCharacters
,
""
);
replacedCharacters
+=
tag
.
length
();
}
return
new
SubtitleTagResult
(
tags
.
toArray
(
new
String
[
tags
.
size
()]),
cueCopy
.
toString
());
}
/**
* Match the alignment tag and calculate the line, position, position anchor accordingly
*
* Based on SSA v4+ specs the alignment tag can have the following form: {\an[1-9},
* where the number specifies the direction (based on the numpad layout).
* Note. older SSA scripts may contain tags like {\a1[1-9]} but these are based on
* other direction rules, but multiple sources says that these are deprecated, so no support here either
*
* @param tag Alignment tag
* @return {@link AlignmentResult} that holds the line, position, position anchor values
*/
private
AlignmentResult
getAlignmentValues
(
String
tag
)
{
// Default values used for positioning the subtitle in case of align tags
float
line
=
DEFAULT_END_FRACTION
,
position
=
DEFAULT_MID_FRACTION
;
@Cue
.
AnchorType
int
positionAnchor
=
Cue
.
ANCHOR_TYPE_MIDDLE
;
@Cue
.
AnchorType
int
lineAnchor
=
Cue
.
ANCHOR_TYPE_END
;
switch
(
tag
)
{
case
ALIGN_BOTTOM_LEFT:
line
=
DEFAULT_END_FRACTION
;
position
=
DEFAULT_START_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_START
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_END
;
break
;
case
ALIGN_BOTTOM_MID:
line
=
DEFAULT_END_FRACTION
;
position
=
DEFAULT_MID_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_MIDDLE
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_END
;
break
;
case
ALIGN_BOTTOM_RIGHT:
line
=
DEFAULT_END_FRACTION
;
position
=
DEFAULT_END_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_END
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_END
;
break
;
case
ALIGN_MID_LEFT:
line
=
DEFAULT_MID_FRACTION
;
position
=
DEFAULT_START_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_START
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_MIDDLE
;
break
;
case
ALIGN_MID_MID:
line
=
DEFAULT_MID_FRACTION
;
position
=
DEFAULT_MID_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_MIDDLE
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_MIDDLE
;
break
;
case
ALIGN_MID_RIGHT:
line
=
DEFAULT_MID_FRACTION
;
position
=
DEFAULT_END_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_END
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_MIDDLE
;
break
;
case
ALIGN_TOP_LEFT:
line
=
DEFAULT_START_FRACTION
;
position
=
DEFAULT_START_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_START
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_START
;
break
;
case
ALIGN_TOP_MID:
line
=
DEFAULT_START_FRACTION
;
position
=
DEFAULT_MID_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_MIDDLE
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_START
;
break
;
case
ALIGN_TOP_RIGHT:
line
=
DEFAULT_START_FRACTION
;
position
=
DEFAULT_END_FRACTION
;
positionAnchor
=
Cue
.
ANCHOR_TYPE_END
;
lineAnchor
=
Cue
.
ANCHOR_TYPE_START
;
break
;
}
return
new
AlignmentResult
(
positionAnchor
,
position
,
lineAnchor
,
line
);
}
private
static
long
parseTimecode
(
Matcher
matcher
,
int
groupOffset
)
{
long
timestampMs
=
Long
.
parseLong
(
matcher
.
group
(
groupOffset
+
1
))
*
60
*
60
*
1000
;
timestampMs
+=
Long
.
parseLong
(
matcher
.
group
(
groupOffset
+
2
))
*
60
*
1000
;
...
...
@@ -116,4 +282,35 @@ public final class SubripDecoder extends SimpleSubtitleDecoder {
return
timestampMs
*
1000
;
}
/**
* Class that holds the tags, new clue after the tag extraction
*/
private
static
final
class
SubtitleTagResult
{
public
final
String
[]
tags
;
public
final
String
cue
;
public
SubtitleTagResult
(
String
[]
tags
,
String
cue
)
{
this
.
tags
=
tags
;
this
.
cue
=
cue
;
}
}
/**
* Class that holds the parsed and mapped alignment values (such as line,
* position and anchor type of line)
*/
private
static
final
class
AlignmentResult
{
public
@Cue
.
AnchorType
int
positionAnchor
;
public
@Cue
.
AnchorType
int
lineAnchor
;
public
float
position
,
line
;
public
AlignmentResult
(
@Cue
.
AnchorType
int
positionAnchor
,
float
position
,
@Cue
.
AnchorType
int
lineAnchor
,
float
line
)
{
this
.
positionAnchor
=
positionAnchor
;
this
.
position
=
position
;
this
.
line
=
line
;
this
.
lineAnchor
=
lineAnchor
;
}
}
}
library/core/src/test/assets/subrip/typical_with_tags
0 → 100644
View file @
ae520a8c
1
00:00:00,000 --> 00:00:01,234
This is {\an1} the first subtitle.
2
00:00:02,345 --> 00:00:03,456
This is the second subtitle.
Second {\ an 2} subtitle with second line.
3
00:00:04,567 --> 00:00:08,901
This {\an2} is the third {\ tag} subtitle.
4
00:00:09,567 --> 00:00:12,901
This { \an2} is the fourth subtitle.
5
00:00:013,567 --> 00:00:14,901
This {\an2} is the fifth subtitle with multiple {\xyz} valid {\qwe} tags.
library/core/src/test/java/com/google/android/exoplayer2/text/subrip/SubripDecoderTest.java
View file @
ae520a8c
...
...
@@ -36,6 +36,7 @@ public final class SubripDecoderTest {
private
static
final
String
TYPICAL_MISSING_SEQUENCE
=
"subrip/typical_missing_sequence"
;
private
static
final
String
TYPICAL_NEGATIVE_TIMESTAMPS
=
"subrip/typical_negative_timestamps"
;
private
static
final
String
TYPICAL_UNEXPECTED_END
=
"subrip/typical_unexpected_end"
;
private
static
final
String
TYPICAL_WITH_TAGS
=
"subrip/typical_with_tags"
;
private
static
final
String
NO_END_TIMECODES_FILE
=
"subrip/no_end_timecodes"
;
@Test
...
...
@@ -154,6 +155,27 @@ public final class SubripDecoderTest {
.
isEqualTo
(
"Or to the end of the media."
);
}
@Test
public
void
testDecodeCueWithTag
()
throws
IOException
{
SubripDecoder
decoder
=
new
SubripDecoder
();
byte
[]
bytes
=
TestUtil
.
getByteArray
(
RuntimeEnvironment
.
application
,
TYPICAL_WITH_TAGS
);
SubripSubtitle
subtitle
=
decoder
.
decode
(
bytes
,
bytes
.
length
,
false
);
assertThat
(
subtitle
.
getCues
(
subtitle
.
getEventTime
(
0
)).
get
(
0
).
text
.
toString
())
.
isEqualTo
(
"This is the first subtitle."
);
assertThat
(
subtitle
.
getCues
(
subtitle
.
getEventTime
(
2
)).
get
(
0
).
text
.
toString
())
.
isEqualTo
(
"This is the second subtitle.\nSecond subtitle with second line."
);
assertThat
(
subtitle
.
getCues
(
subtitle
.
getEventTime
(
4
)).
get
(
0
).
text
.
toString
())
.
isEqualTo
(
"This is the third subtitle."
);
// Based on the SSA v4+ specs the curly bracket must be followed by a backslash, so this is
// not a valid tag (won't be parsed / replaced)
assertThat
(
subtitle
.
getCues
(
subtitle
.
getEventTime
(
6
)).
get
(
0
).
text
.
toString
())
.
isEqualTo
(
"This { \\an2} is the fourth subtitle."
);
assertThat
(
subtitle
.
getCues
(
subtitle
.
getEventTime
(
8
)).
get
(
0
).
text
.
toString
())
.
isEqualTo
(
"This is the fifth subtitle with multiple valid tags."
);
}
private
static
void
assertTypicalCue1
(
SubripSubtitle
subtitle
,
int
eventIndex
)
{
assertThat
(
subtitle
.
getEventTime
(
eventIndex
)).
isEqualTo
(
0
);
assertThat
(
subtitle
.
getCues
(
subtitle
.
getEventTime
(
eventIndex
)).
get
(
0
).
text
.
toString
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment