Commit e0316d1c by Oliver Woodman

Refine TTML parsing logic + setup for adding styling information.

1. Refine the way TtmlNode handles whitespace collapsing when constructing
   the output text. Can of worms.
2. Start using SpannableStringBuilder. This will allow attaching of spans
   in getText as nodes are encountered, which is how we'll be incorporating
   styling information into the result.
parent efa92ac1
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
*/ */
package com.google.android.exoplayer.text.ttml; package com.google.android.exoplayer.text.ttml;
import android.text.SpannableStringBuilder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
...@@ -52,7 +54,7 @@ import java.util.TreeSet; ...@@ -52,7 +54,7 @@ import java.util.TreeSet;
private List<TtmlNode> children; private List<TtmlNode> children;
public static TtmlNode buildTextNode(String text) { public static TtmlNode buildTextNode(String text) {
return new TtmlNode(null, applySpacePolicy(text, true), UNDEFINED_TIME, UNDEFINED_TIME); return new TtmlNode(null, applyTextElementSpacePolicy(text), UNDEFINED_TIME, UNDEFINED_TIME);
} }
public static TtmlNode buildNode(String tag, long startTimeUs, long endTimeUs) { public static TtmlNode buildNode(String tag, long startTimeUs, long endTimeUs) {
...@@ -123,47 +125,104 @@ import java.util.TreeSet; ...@@ -123,47 +125,104 @@ import java.util.TreeSet;
} }
} }
public String getText(long timeUs) { public CharSequence getText(long timeUs) {
StringBuilder builder = new StringBuilder(); SpannableStringBuilder builder = getText(timeUs, new SpannableStringBuilder(), false);
getText(timeUs, builder, false); // Having joined the text elements, we need to do some final cleanup on the result.
return applySpacePolicy(builder.toString().replaceAll("\n$", ""), false); // 1. Collapse multiple consecutive spaces into a single space.
int builderLength = builder.length();
for (int i = 0; i < builderLength; i++) {
if (builder.charAt(i) == ' ') {
int j = i + 1;
while (j < builder.length() && builder.charAt(j) == ' ') {
j++;
}
int spacesToDelete = j - (i + 1);
if (spacesToDelete > 0) {
builder.delete(i, i + spacesToDelete);
builderLength -= spacesToDelete;
}
}
}
// 2. Remove any spaces from the start of each line.
if (builderLength > 0 && builder.charAt(0) == ' ') {
builder.delete(0, 1);
builderLength--;
}
for (int i = 0; i < builderLength - 1; i++) {
if (builder.charAt(i) == '\n' && builder.charAt(i + 1) == ' ') {
builder.delete(i + 1, i + 2);
builderLength--;
}
}
// 3. Remove any spaces from the end of each line.
if (builderLength > 0 && builder.charAt(builderLength - 1) == ' ') {
builder.delete(builderLength - 1, builderLength);
builderLength--;
}
for (int i = 0; i < builderLength - 1; i++) {
if (builder.charAt(i) == ' ' && builder.charAt(i + 1) == '\n') {
builder.delete(i, i + 1);
builderLength--;
}
}
// 4. Trim a trailing newline, if there is one.
if (builderLength > 0 && builder.charAt(builderLength - 1) == '\n') {
builder.delete(builderLength - 1, builderLength);
builderLength--;
}
return builder.subSequence(0, builderLength);
} }
private void getText(long timeUs, StringBuilder builder, boolean descendsPNode) { private SpannableStringBuilder getText(long timeUs, SpannableStringBuilder builder,
boolean descendsPNode) {
if (isTextNode && descendsPNode) { if (isTextNode && descendsPNode) {
builder.append(text); builder.append(text);
} else if (TAG_BR.equals(tag) && descendsPNode) { } else if (TAG_BR.equals(tag) && descendsPNode) {
builder.append("\n"); builder.append('\n');
} else if (TAG_METADATA.equals(tag)) { } else if (TAG_METADATA.equals(tag)) {
// Do nothing. // Do nothing.
} else if (isActive(timeUs)) { } else if (isActive(timeUs)) {
boolean isPNode = TAG_P.equals(tag); boolean isPNode = TAG_P.equals(tag);
int length = builder.length();
for (int i = 0; i < getChildCount(); ++i) { for (int i = 0; i < getChildCount(); ++i) {
getChild(i).getText(timeUs, builder, descendsPNode || isPNode); getChild(i).getText(timeUs, builder, descendsPNode || isPNode);
} }
if (isPNode && length != builder.length()) { if (isPNode) {
builder.append("\n"); endParagraph(builder);
} }
} }
return builder;
}
/**
* Invoked when the end of a paragraph is encountered. Adds a newline if there are one or more
* non-space characters since the previous newline.
*
* @param builder The builder.
*/
private static void endParagraph(SpannableStringBuilder builder) {
int position = builder.length() - 1;
while (position >= 0 && builder.charAt(position) == ' ') {
position--;
}
if (position >= 0 && builder.charAt(position) != '\n') {
builder.append('\n');
}
} }
/** /**
* Applies the space policy to the given string. See: * Applies the appropriate space policy to the given text element.
* <a href src="http://www.w3.org/TR/ttaf1-dfxp/#content-attribute-space">The default space
* policy</a>
* *
* @param in A string to apply the policy. * @param in The text element to which the policy should be applied.
* @param treatLineFeedAsSpace Whether to convert line feeds to spaces. * @return The result of applying the policy to the text element.
*/ */
private static String applySpacePolicy(String in, boolean treatLineFeedAsSpace) { private static String applyTextElementSpacePolicy(String in) {
// Removes carriage return followed by line feed. See: http://www.w3.org/TR/xml/#sec-line-ends // Removes carriage return followed by line feed. See: http://www.w3.org/TR/xml/#sec-line-ends
String out = in.replaceAll("\r\n", "\n"); String out = in.replaceAll("\r\n", "\n");
// Apply suppress-at-line-break="auto" and // Apply suppress-at-line-break="auto" and
// white-space-treatment="ignore-if-surrounding-linefeed" // white-space-treatment="ignore-if-surrounding-linefeed"
out = out.replaceAll(" *\n *", "\n"); out = out.replaceAll(" *\n *", "\n");
// Apply linefeed-treatment="treat-as-space" // Apply linefeed-treatment="treat-as-space"
out = treatLineFeedAsSpace ? out.replaceAll("\n", " ") : out; out = out.replaceAll("\n", " ");
// Apply white-space-collapse="true" // Apply white-space-collapse="true"
out = out.replaceAll("[ \t\\x0B\f\r]+", " "); out = out.replaceAll("[ \t\\x0B\f\r]+", " ");
return out; return out;
......
...@@ -65,7 +65,7 @@ public final class TtmlSubtitle implements Subtitle { ...@@ -65,7 +65,7 @@ public final class TtmlSubtitle implements Subtitle {
@Override @Override
public List<Cue> getCues(long timeUs) { public List<Cue> getCues(long timeUs) {
String cueText = root.getText(timeUs - startTimeUs); CharSequence cueText = root.getText(timeUs - startTimeUs);
if (cueText == null) { if (cueText == null) {
return Collections.<Cue>emptyList(); return Collections.<Cue>emptyList();
} else { } else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment