Commit f273c731 by Oliver Woodman

Support NALUnitLengthFieldLength != 4.

Seems a little hacky to define this in Track, but there doesn't seem to
be a particularly nice alternative.

Issue: #406
parent afb1a930
...@@ -68,10 +68,9 @@ import java.util.List; ...@@ -68,10 +68,9 @@ import java.util.List;
.getContainerAtomOfType(Atom.TYPE_stbl); .getContainerAtomOfType(Atom.TYPE_stbl);
long mediaTimescale = parseMdhd(mdia.getLeafAtomOfType(Atom.TYPE_mdhd).data); long mediaTimescale = parseMdhd(mdia.getLeafAtomOfType(Atom.TYPE_mdhd).data);
Pair<MediaFormat, TrackEncryptionBox[]> sampleDescriptions = StsdDataHolder stsdData = parseStsd(stbl.getLeafAtomOfType(Atom.TYPE_stsd).data, durationUs);
parseStsd(stbl.getLeafAtomOfType(Atom.TYPE_stsd).data, durationUs); return new Track(id, trackType, mediaTimescale, durationUs, stsdData.mediaFormat,
return new Track(id, trackType, mediaTimescale, durationUs, sampleDescriptions.first, stsdData.trackEncryptionBoxes, stsdData.nalUnitLengthFieldLength);
sampleDescriptions.second);
} }
/** /**
...@@ -327,12 +326,10 @@ import java.util.List; ...@@ -327,12 +326,10 @@ import java.util.List;
return mdhd.readUnsignedInt(); return mdhd.readUnsignedInt();
} }
private static Pair<MediaFormat, TrackEncryptionBox[]> parseStsd( private static StsdDataHolder parseStsd(ParsableByteArray stsd, long durationUs) {
ParsableByteArray stsd, long durationUs) {
stsd.setPosition(Atom.FULL_HEADER_SIZE); stsd.setPosition(Atom.FULL_HEADER_SIZE);
int numberOfEntries = stsd.readInt(); int numberOfEntries = stsd.readInt();
MediaFormat mediaFormat = null; StsdDataHolder holder = new StsdDataHolder(numberOfEntries);
TrackEncryptionBox[] trackEncryptionBoxes = new TrackEncryptionBox[numberOfEntries];
for (int i = 0; i < numberOfEntries; i++) { for (int i = 0; i < numberOfEntries; i++) {
int childStartPosition = stsd.getPosition(); int childStartPosition = stsd.getPosition();
int childAtomSize = stsd.readInt(); int childAtomSize = stsd.readInt();
...@@ -340,29 +337,25 @@ import java.util.List; ...@@ -340,29 +337,25 @@ import java.util.List;
int childAtomType = stsd.readInt(); int childAtomType = stsd.readInt();
if (childAtomType == Atom.TYPE_avc1 || childAtomType == Atom.TYPE_avc3 if (childAtomType == Atom.TYPE_avc1 || childAtomType == Atom.TYPE_avc3
|| childAtomType == Atom.TYPE_encv) { || childAtomType == Atom.TYPE_encv) {
Pair<MediaFormat, TrackEncryptionBox> avc = parseAvcFromParent(stsd, childStartPosition, childAtomSize, durationUs, holder, i);
parseAvcFromParent(stsd, childStartPosition, childAtomSize, durationUs);
mediaFormat = avc.first;
trackEncryptionBoxes[i] = avc.second;
} else if (childAtomType == Atom.TYPE_mp4a || childAtomType == Atom.TYPE_enca } else if (childAtomType == Atom.TYPE_mp4a || childAtomType == Atom.TYPE_enca
|| childAtomType == Atom.TYPE_ac_3) { || childAtomType == Atom.TYPE_ac_3) {
Pair<MediaFormat, TrackEncryptionBox> audioSampleEntry = parseAudioSampleEntry(stsd, parseAudioSampleEntry(stsd, childAtomType, childStartPosition, childAtomSize, durationUs,
childAtomType, childStartPosition, childAtomSize, durationUs); holder, i);
mediaFormat = audioSampleEntry.first;
trackEncryptionBoxes[i] = audioSampleEntry.second;
} else if (childAtomType == Atom.TYPE_TTML) { } else if (childAtomType == Atom.TYPE_TTML) {
mediaFormat = MediaFormat.createTtmlFormat(); holder.mediaFormat = MediaFormat.createTtmlFormat();
} else if (childAtomType == Atom.TYPE_mp4v) { } else if (childAtomType == Atom.TYPE_mp4v) {
mediaFormat = parseMp4vFromParent(stsd, childStartPosition, childAtomSize, durationUs); holder.mediaFormat = parseMp4vFromParent(stsd, childStartPosition, childAtomSize,
durationUs);
} }
stsd.setPosition(childStartPosition + childAtomSize); stsd.setPosition(childStartPosition + childAtomSize);
} }
return Pair.create(mediaFormat, trackEncryptionBoxes); return holder;
} }
/** Returns the media format for an avc1 box. */ /** Returns the media format for an avc1 box. */
private static Pair<MediaFormat, TrackEncryptionBox> parseAvcFromParent(ParsableByteArray parent, private static void parseAvcFromParent(ParsableByteArray parent, int position, int size,
int position, int size, long durationUs) { long durationUs, StsdDataHolder out, int entryIndex) {
parent.setPosition(position + Atom.HEADER_SIZE); parent.setPosition(position + Atom.HEADER_SIZE);
parent.skipBytes(24); parent.skipBytes(24);
...@@ -372,7 +365,6 @@ import java.util.List; ...@@ -372,7 +365,6 @@ import java.util.List;
parent.skipBytes(50); parent.skipBytes(50);
List<byte[]> initializationData = null; List<byte[]> initializationData = null;
TrackEncryptionBox trackEncryptionBox = null;
int childPosition = parent.getPosition(); int childPosition = parent.getPosition();
while (childPosition - position < size) { while (childPosition - position < size) {
parent.setPosition(childPosition); parent.setPosition(childPosition);
...@@ -385,27 +377,28 @@ import java.util.List; ...@@ -385,27 +377,28 @@ import java.util.List;
Assertions.checkArgument(childAtomSize > 0, "childAtomSize should be positive"); Assertions.checkArgument(childAtomSize > 0, "childAtomSize should be positive");
int childAtomType = parent.readInt(); int childAtomType = parent.readInt();
if (childAtomType == Atom.TYPE_avcC) { if (childAtomType == Atom.TYPE_avcC) {
initializationData = parseAvcCFromParent(parent, childStartPosition); Pair<List<byte[]>, Integer> avcCData = parseAvcCFromParent(parent, childStartPosition);
initializationData = avcCData.first;
out.nalUnitLengthFieldLength = avcCData.second;
} else if (childAtomType == Atom.TYPE_sinf) { } else if (childAtomType == Atom.TYPE_sinf) {
trackEncryptionBox = parseSinfFromParent(parent, childStartPosition, childAtomSize); out.trackEncryptionBoxes[entryIndex] =
parseSinfFromParent(parent, childStartPosition, childAtomSize);
} else if (childAtomType == Atom.TYPE_pasp) { } else if (childAtomType == Atom.TYPE_pasp) {
pixelWidthHeightRatio = parsePaspFromParent(parent, childStartPosition); pixelWidthHeightRatio = parsePaspFromParent(parent, childStartPosition);
} }
childPosition += childAtomSize; childPosition += childAtomSize;
} }
MediaFormat format = MediaFormat.createVideoFormat(MimeTypes.VIDEO_H264, MediaFormat.NO_VALUE, out.mediaFormat = MediaFormat.createVideoFormat(MimeTypes.VIDEO_H264, MediaFormat.NO_VALUE,
durationUs, width, height, pixelWidthHeightRatio, initializationData); durationUs, width, height, pixelWidthHeightRatio, initializationData);
return Pair.create(format, trackEncryptionBox);
} }
private static List<byte[]> parseAvcCFromParent(ParsableByteArray parent, int position) { private static Pair<List<byte[]>, Integer> parseAvcCFromParent(ParsableByteArray parent,
int position) {
parent.setPosition(position + Atom.HEADER_SIZE + 4); parent.setPosition(position + Atom.HEADER_SIZE + 4);
// Start of the AVCDecoderConfigurationRecord (defined in 14496-15) // Start of the AVCDecoderConfigurationRecord (defined in 14496-15)
int nalUnitLength = (parent.readUnsignedByte() & 0x3) + 1; int nalUnitLengthFieldLength = (parent.readUnsignedByte() & 0x3) + 1;
if (nalUnitLength != 4) { if (nalUnitLengthFieldLength == 3) {
// readSample currently relies on a nalUnitLength of 4.
// TODO: Consider handling the case where it isn't.
throw new IllegalStateException(); throw new IllegalStateException();
} }
List<byte[]> initializationData = new ArrayList<byte[]>(); List<byte[]> initializationData = new ArrayList<byte[]>();
...@@ -419,7 +412,7 @@ import java.util.List; ...@@ -419,7 +412,7 @@ import java.util.List;
for (int j = 0; j < numPictureParameterSets; j++) { for (int j = 0; j < numPictureParameterSets; j++) {
initializationData.add(H264Util.parseChildNalUnit(parent)); initializationData.add(H264Util.parseChildNalUnit(parent));
} }
return initializationData; return Pair.create(initializationData, nalUnitLengthFieldLength);
} }
private static TrackEncryptionBox parseSinfFromParent(ParsableByteArray parent, int position, private static TrackEncryptionBox parseSinfFromParent(ParsableByteArray parent, int position,
...@@ -502,8 +495,8 @@ import java.util.List; ...@@ -502,8 +495,8 @@ import java.util.List;
MimeTypes.VIDEO_MP4V, MediaFormat.NO_VALUE, durationUs, width, height, initializationData); MimeTypes.VIDEO_MP4V, MediaFormat.NO_VALUE, durationUs, width, height, initializationData);
} }
private static Pair<MediaFormat, TrackEncryptionBox> parseAudioSampleEntry( private static void parseAudioSampleEntry(ParsableByteArray parent, int atomType, int position,
ParsableByteArray parent, int atomType, int position, int size, long durationUs) { int size, long durationUs, StsdDataHolder out, int entryIndex) {
parent.setPosition(position + Atom.HEADER_SIZE); parent.setPosition(position + Atom.HEADER_SIZE);
parent.skipBytes(16); parent.skipBytes(16);
int channelCount = parent.readUnsignedShort(); int channelCount = parent.readUnsignedShort();
...@@ -513,7 +506,6 @@ import java.util.List; ...@@ -513,7 +506,6 @@ import java.util.List;
int bitrate = MediaFormat.NO_VALUE; int bitrate = MediaFormat.NO_VALUE;
byte[] initializationData = null; byte[] initializationData = null;
TrackEncryptionBox trackEncryptionBox = null;
int childPosition = parent.getPosition(); int childPosition = parent.getPosition();
while (childPosition - position < size) { while (childPosition - position < size) {
parent.setPosition(childPosition); parent.setPosition(childPosition);
...@@ -531,7 +523,8 @@ import java.util.List; ...@@ -531,7 +523,8 @@ import java.util.List;
sampleRate = audioSpecificConfig.first; sampleRate = audioSpecificConfig.first;
channelCount = audioSpecificConfig.second; channelCount = audioSpecificConfig.second;
} else if (childAtomType == Atom.TYPE_sinf) { } else if (childAtomType == Atom.TYPE_sinf) {
trackEncryptionBox = parseSinfFromParent(parent, childStartPosition, childAtomSize); out.trackEncryptionBoxes[entryIndex] = parseSinfFromParent(parent, childStartPosition,
childAtomSize);
} }
} else if (atomType == Atom.TYPE_ac_3 && childAtomType == Atom.TYPE_dac3) { } else if (atomType == Atom.TYPE_ac_3 && childAtomType == Atom.TYPE_dac3) {
// TODO: Choose the right AC-3 track based on the contents of dac3/dec3. // TODO: Choose the right AC-3 track based on the contents of dac3/dec3.
...@@ -542,12 +535,10 @@ import java.util.List; ...@@ -542,12 +535,10 @@ import java.util.List;
channelCount = ac3Format.channelCount; channelCount = ac3Format.channelCount;
bitrate = ac3Format.bitrate; bitrate = ac3Format.bitrate;
} }
// TODO: Add support for encryption (by setting out.trackEncryptionBoxes).
// TODO: Add support for encrypted AC-3.
trackEncryptionBox = null;
} else if (atomType == Atom.TYPE_ec_3 && childAtomType == Atom.TYPE_dec3) { } else if (atomType == Atom.TYPE_ec_3 && childAtomType == Atom.TYPE_dec3) {
sampleRate = parseEc3SpecificBoxFromParent(parent, childStartPosition); sampleRate = parseEc3SpecificBoxFromParent(parent, childStartPosition);
trackEncryptionBox = null; // TODO: Add support for encryption (by setting out.trackEncryptionBoxes).
} }
childPosition += childAtomSize; childPosition += childAtomSize;
} }
...@@ -561,10 +552,9 @@ import java.util.List; ...@@ -561,10 +552,9 @@ import java.util.List;
mimeType = MimeTypes.AUDIO_AAC; mimeType = MimeTypes.AUDIO_AAC;
} }
MediaFormat format = MediaFormat.createAudioFormat( out.mediaFormat = MediaFormat.createAudioFormat(
mimeType, sampleSize, durationUs, channelCount, sampleRate, bitrate, mimeType, sampleSize, durationUs, channelCount, sampleRate, bitrate,
initializationData == null ? null : Collections.singletonList(initializationData)); initializationData == null ? null : Collections.singletonList(initializationData));
return Pair.create(format, trackEncryptionBox);
} }
/** Returns codec-specific initialization data contained in an esds box. */ /** Returns codec-specific initialization data contained in an esds box. */
...@@ -675,4 +665,21 @@ import java.util.List; ...@@ -675,4 +665,21 @@ import java.util.List;
} }
/**
* Holds data parsed from an stsd atom and its children.
*/
private static final class StsdDataHolder {
public final TrackEncryptionBox[] trackEncryptionBoxes;
public MediaFormat mediaFormat;
public int nalUnitLengthFieldLength;
public StsdDataHolder(int numberOfEntries) {
trackEncryptionBoxes = new TrackEncryptionBox[numberOfEntries];
nalUnitLengthFieldLength = -1;
}
}
} }
...@@ -614,18 +614,29 @@ public final class FragmentedMp4Extractor implements Extractor { ...@@ -614,18 +614,29 @@ public final class FragmentedMp4Extractor implements Extractor {
parserState = STATE_READING_SAMPLE_CONTINUE; parserState = STATE_READING_SAMPLE_CONTINUE;
} }
if (track.type == Track.TYPE_VIDEO) { if (track.nalUnitLengthFieldLength != -1) {
// Zero the top three bytes of the array that we'll use to parse nal unit lengths, in case
// they're only 1 or 2 bytes long.
byte[] nalLengthData = nalLength.data;
nalLengthData[0] = 0;
nalLengthData[1] = 0;
nalLengthData[2] = 0;
int nalUnitLengthFieldLength = track.nalUnitLengthFieldLength;
int nalUnitLengthFieldLengthDiff = 4 - track.nalUnitLengthFieldLength;
// NAL units are length delimited, but the decoder requires start code delimited units.
// Loop until we've written the sample to the track output, replacing length delimiters with
// start codes as we encounter them.
while (sampleBytesWritten < sampleSize) { while (sampleBytesWritten < sampleSize) {
// NAL units are length delimited, but the decoder requires start code delimited units.
if (sampleCurrentNalBytesRemaining == 0) { if (sampleCurrentNalBytesRemaining == 0) {
// Read the NAL length so that we know where we find the next NAL unit. // Read the NAL length so that we know where we find the next one.
input.readFully(nalLength.data, 0, 4); input.readFully(nalLength.data, nalUnitLengthFieldLengthDiff, nalUnitLengthFieldLength);
nalLength.setPosition(0); nalLength.setPosition(0);
sampleCurrentNalBytesRemaining = nalLength.readUnsignedIntToInt(); sampleCurrentNalBytesRemaining = nalLength.readUnsignedIntToInt();
// Write a start code for the current NAL unit. // Write a start code for the current NAL unit.
nalStartCode.setPosition(0); nalStartCode.setPosition(0);
trackOutput.sampleData(nalStartCode, 4); trackOutput.sampleData(nalStartCode, 4);
sampleBytesWritten += 4; sampleBytesWritten += 4;
sampleSize += nalUnitLengthFieldLengthDiff;
} else { } else {
// Write the payload of the NAL unit. // Write the payload of the NAL unit.
int writtenBytes = trackOutput.sampleData(input, sampleCurrentNalBytesRemaining); int writtenBytes = trackOutput.sampleData(input, sampleCurrentNalBytesRemaining);
......
...@@ -24,7 +24,6 @@ import com.google.android.exoplayer.extractor.TrackOutput; ...@@ -24,7 +24,6 @@ import com.google.android.exoplayer.extractor.TrackOutput;
import com.google.android.exoplayer.extractor.mp4.Atom.ContainerAtom; import com.google.android.exoplayer.extractor.mp4.Atom.ContainerAtom;
import com.google.android.exoplayer.util.Assertions; import com.google.android.exoplayer.util.Assertions;
import com.google.android.exoplayer.util.H264Util; import com.google.android.exoplayer.util.H264Util;
import com.google.android.exoplayer.util.MimeTypes;
import com.google.android.exoplayer.util.ParsableByteArray; import com.google.android.exoplayer.util.ParsableByteArray;
import java.io.IOException; import java.io.IOException;
...@@ -284,19 +283,29 @@ public final class Mp4Extractor implements Extractor, SeekMap { ...@@ -284,19 +283,29 @@ public final class Mp4Extractor implements Extractor, SeekMap {
} }
input.skipFully((int) skipAmount); input.skipFully((int) skipAmount);
sampleSize = track.sampleTable.sizes[sampleIndex]; sampleSize = track.sampleTable.sizes[sampleIndex];
if (track.track.type == Track.TYPE_VIDEO if (track.track.nalUnitLengthFieldLength != -1) {
&& MimeTypes.VIDEO_H264.equals(track.track.mediaFormat.mimeType)) { // Zero the top three bytes of the array that we'll use to parse nal unit lengths, in case
// they're only 1 or 2 bytes long.
byte[] nalLengthData = nalLength.data;
nalLengthData[0] = 0;
nalLengthData[1] = 0;
nalLengthData[2] = 0;
int nalUnitLengthFieldLength = track.track.nalUnitLengthFieldLength;
int nalUnitLengthFieldLengthDiff = 4 - track.track.nalUnitLengthFieldLength;
// NAL units are length delimited, but the decoder requires start code delimited units.
// Loop until we've written the sample to the track output, replacing length delimiters with
// start codes as we encounter them.
while (sampleBytesWritten < sampleSize) { while (sampleBytesWritten < sampleSize) {
// NAL units are length delimited, but the decoder requires start code delimited units.
if (sampleCurrentNalBytesRemaining == 0) { if (sampleCurrentNalBytesRemaining == 0) {
// Read the NAL length so that we know where we find the next NAL unit. // Read the NAL length so that we know where we find the next one.
input.readFully(nalLength.data, 0, 4); input.readFully(nalLength.data, nalUnitLengthFieldLengthDiff, nalUnitLengthFieldLength);
nalLength.setPosition(0); nalLength.setPosition(0);
sampleCurrentNalBytesRemaining = nalLength.readUnsignedIntToInt(); sampleCurrentNalBytesRemaining = nalLength.readUnsignedIntToInt();
// Write a start code for the current NAL unit. // Write a start code for the current NAL unit.
nalStartCode.setPosition(0); nalStartCode.setPosition(0);
track.trackOutput.sampleData(nalStartCode, 4); track.trackOutput.sampleData(nalStartCode, 4);
sampleBytesWritten += 4; sampleBytesWritten += 4;
sampleSize += nalUnitLengthFieldLengthDiff;
} else { } else {
// Write the payload of the NAL unit. // Write the payload of the NAL unit.
int writtenBytes = track.trackOutput.sampleData(input, sampleCurrentNalBytesRemaining); int writtenBytes = track.trackOutput.sampleData(input, sampleCurrentNalBytesRemaining);
......
...@@ -79,14 +79,21 @@ public final class Track { ...@@ -79,14 +79,21 @@ public final class Track {
*/ */
public final TrackEncryptionBox[] sampleDescriptionEncryptionBoxes; public final TrackEncryptionBox[] sampleDescriptionEncryptionBoxes;
/**
* For H264 video tracks, the length in bytes of the NALUnitLength field in each sample. -1 for
* other track types.
*/
public final int nalUnitLengthFieldLength;
public Track(int id, int type, long timescale, long durationUs, MediaFormat mediaFormat, public Track(int id, int type, long timescale, long durationUs, MediaFormat mediaFormat,
TrackEncryptionBox[] sampleDescriptionEncryptionBoxes) { TrackEncryptionBox[] sampleDescriptionEncryptionBoxes, int nalUnitLengthFieldLength) {
this.id = id; this.id = id;
this.type = type; this.type = type;
this.timescale = timescale; this.timescale = timescale;
this.durationUs = durationUs; this.durationUs = durationUs;
this.mediaFormat = mediaFormat; this.mediaFormat = mediaFormat;
this.sampleDescriptionEncryptionBoxes = sampleDescriptionEncryptionBoxes; this.sampleDescriptionEncryptionBoxes = sampleDescriptionEncryptionBoxes;
this.nalUnitLengthFieldLength = nalUnitLengthFieldLength;
} }
} }
...@@ -172,7 +172,8 @@ public class SmoothStreamingChunkSource implements ChunkSource { ...@@ -172,7 +172,8 @@ public class SmoothStreamingChunkSource implements ChunkSource {
FragmentedMp4Extractor extractor = new FragmentedMp4Extractor( FragmentedMp4Extractor extractor = new FragmentedMp4Extractor(
FragmentedMp4Extractor.WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME); FragmentedMp4Extractor.WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME);
extractor.setTrack(new Track(trackIndex, trackType, streamElement.timescale, extractor.setTrack(new Track(trackIndex, trackType, streamElement.timescale,
initialManifest.durationUs, mediaFormat, trackEncryptionBoxes)); initialManifest.durationUs, mediaFormat, trackEncryptionBoxes,
trackType == Track.TYPE_VIDEO ? 4 : -1));
extractorWrappers.put(trackIndex, new ChunkExtractorWrapper(extractor)); extractorWrappers.put(trackIndex, new ChunkExtractorWrapper(extractor));
mediaFormats.put(trackIndex, mediaFormat); mediaFormats.put(trackIndex, mediaFormat);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment