scuffle_flv/video/header/
enhanced.rs

1//! Enhanced video header types and functions.
2
3use std::io::{self, Read};
4
5use byteorder::{BigEndian, ReadBytesExt};
6use bytes::Bytes;
7use nutype_enum::nutype_enum;
8use scuffle_bytes_util::BytesCursorExt;
9
10use super::VideoFrameType;
11use crate::common::AvMultitrackType;
12use crate::error::FlvError;
13use crate::video::header::VideoCommand;
14
15nutype_enum! {
16    /// Different types of video packets.
17    ///
18    /// Defined by:
19    /// - Enhanced RTMP spec, page 27-28, Enhanced Video
20    pub enum VideoPacketType(u8) {
21        /// Sequence start.
22        SequenceStart = 0,
23        /// Coded frames.
24        CodedFrames = 1,
25        /// Sequence end.
26        SequenceEnd = 2,
27        /// Coded frames without extra data.
28        CodedFramesX = 3,
29        /// Metadata.
30        Metadata = 4,
31        /// MPEG-2 TS sequence start.
32        Mpeg2TsSequenceStart = 5,
33        /// Turns on audio multitrack mode.
34        Multitrack = 6,
35        /// Modifier extension.
36        ModEx = 7,
37    }
38}
39
40nutype_enum! {
41    /// Different types of audio packet modifier extensions.
42    pub enum VideoPacketModExType(u8) {
43        /// Timestamp offset in nanoseconds.
44        TimestampOffsetNano = 0,
45    }
46}
47
48/// This is a helper enum to represent the different types of video packet modifier extensions.
49#[derive(Debug, Clone, PartialEq)]
50pub enum VideoPacketModEx {
51    /// Timestamp offset in nanoseconds.
52    TimestampOffsetNano {
53        /// The timestamp offset in nanoseconds.
54        video_timestamp_nano_offset: u32,
55    },
56    /// Any other modifier extension.
57    Other {
58        /// The type of the modifier extension.
59        video_packet_mod_ex_type: VideoPacketModExType,
60        /// The data of the modifier extension.
61        mod_ex_data: Bytes,
62    },
63}
64
65impl VideoPacketModEx {
66    /// Demux a [`VideoPacketModEx`] from the given reader.
67    ///
68    /// Returns the demuxed [`VideoPacketModEx`] and the next [`VideoPacketType`], if successful.
69    pub fn demux(reader: &mut io::Cursor<Bytes>) -> Result<(Self, VideoPacketType), FlvError> {
70        let mut mod_ex_data_size = reader.read_u8()? as usize + 1;
71        if mod_ex_data_size == 256 {
72            mod_ex_data_size = reader.read_u16::<BigEndian>()? as usize + 1;
73        }
74
75        let mod_ex_data = reader.extract_bytes(mod_ex_data_size)?;
76
77        let next_byte = reader.read_u8()?;
78        let video_packet_mod_ex_type = VideoPacketModExType::from(next_byte >> 4); // 0b1111_0000
79        let video_packet_type = VideoPacketType::from(next_byte & 0b0000_1111);
80
81        if video_packet_mod_ex_type == VideoPacketModExType::TimestampOffsetNano {
82            if mod_ex_data_size < 3 {
83                // too few data bytes for the timestamp offset
84                return Err(FlvError::InvalidModExData { expected_bytes: 3 });
85            }
86
87            let mod_ex_data = &mut io::Cursor::new(mod_ex_data);
88
89            Ok((
90                VideoPacketModEx::TimestampOffsetNano {
91                    video_timestamp_nano_offset: mod_ex_data.read_u24::<BigEndian>()?,
92                },
93                video_packet_type,
94            ))
95        } else {
96            Ok((
97                VideoPacketModEx::Other {
98                    video_packet_mod_ex_type,
99                    mod_ex_data,
100                },
101                video_packet_type,
102            ))
103        }
104    }
105}
106
107nutype_enum! {
108    /// Valid FOURCC values for signaling support of video codecs
109    /// in the enhanced FourCC pipeline.
110    ///
111    /// Defined by:
112    /// - Enhanced RTMP spec, page 28, Enhanced Video
113    pub enum VideoFourCc([u8; 4]) {
114        /// VP8
115        Vp8 = *b"vp08",
116        /// VP9
117        Vp9 = *b"vp09",
118        /// AV1
119        Av1 = *b"av01",
120        /// AVC (H.264)
121        Avc = *b"avc1",
122        /// HEVC (H.265)
123        Hevc = *b"hvc1",
124    }
125}
126
127/// This is a helper enum to represent the different types of enhanced video headers.
128#[derive(Debug, Clone, PartialEq)]
129pub enum ExVideoTagHeaderContent {
130    /// Video command.
131    VideoCommand(VideoCommand),
132    /// Not multitrack.
133    NoMultiTrack(VideoFourCc),
134    /// Multirack with one track.
135    OneTrack(VideoFourCc),
136    /// Multitrack with many tracks of the same codec.
137    ManyTracks(VideoFourCc),
138    /// Multitrack with many tracks of different codecs.
139    ManyTracksManyCodecs,
140    /// Unknown multitrack type.
141    Unknown {
142        /// The type of the multitrack video.
143        video_multitrack_type: AvMultitrackType,
144        /// The FOURCC of the video codec.
145        video_four_cc: VideoFourCc,
146    },
147}
148
149/// `ExVideoTagHeader`
150///
151/// Defined by:
152/// - Enhanced RTMP spec, page 27-28, Enhanced Video
153#[derive(Debug, Clone, PartialEq)]
154pub struct ExVideoTagHeader {
155    /// The modifier extensions of the video packet.
156    ///
157    /// This can be empty if there are no modifier extensions.
158    pub video_packet_mod_exs: Vec<VideoPacketModEx>,
159    /// The type of the video packet.
160    pub video_packet_type: VideoPacketType,
161    /// The content of the video packet which contains more information about the multitrack configuration.
162    pub content: ExVideoTagHeaderContent,
163}
164
165impl ExVideoTagHeader {
166    /// Demux an [`ExVideoTagHeader`] from the given reader.
167    ///
168    /// This is implemented as per Enhanced RTMP spec, page 27-28, ExVideoTagHeader.
169    #[allow(clippy::unusual_byte_groupings)]
170    pub fn demux(reader: &mut io::Cursor<Bytes>) -> Result<Self, FlvError> {
171        let byte = reader.read_u8()?;
172        let video_frame_type = VideoFrameType::from((byte & 0b0_111_0000) >> 4);
173        let mut video_packet_type = VideoPacketType::from(byte & 0b0000_1111);
174
175        let mut video_packet_mod_exs = Vec::new();
176
177        // Read all modifier extensions
178        while video_packet_type == VideoPacketType::ModEx {
179            let (mod_ex, next_video_packet_type) = VideoPacketModEx::demux(reader)?;
180            video_packet_mod_exs.push(mod_ex);
181            video_packet_type = next_video_packet_type;
182        }
183
184        let content = if video_packet_type != VideoPacketType::Metadata && video_frame_type == VideoFrameType::Command {
185            let video_command = VideoCommand::from(reader.read_u8()?);
186            ExVideoTagHeaderContent::VideoCommand(video_command)
187        } else if video_packet_type == VideoPacketType::Multitrack {
188            let next_byte = reader.read_u8()?;
189            let video_multitrack_type = AvMultitrackType::from(next_byte >> 4); // 0b1111_0000
190            video_packet_type = VideoPacketType::from(next_byte & 0b0000_1111);
191
192            if video_packet_type == VideoPacketType::Multitrack {
193                // nested multitracks are not allowed
194                return Err(FlvError::NestedMultitracks);
195            }
196
197            let mut video_four_cc = [0; 4];
198            // Only read the FOURCC if it's not ManyTracksManyCodecs
199            if video_multitrack_type != AvMultitrackType::ManyTracksManyCodecs {
200                reader.read_exact(&mut video_four_cc)?;
201            }
202
203            match video_multitrack_type {
204                AvMultitrackType::OneTrack => ExVideoTagHeaderContent::OneTrack(VideoFourCc::from(video_four_cc)),
205                AvMultitrackType::ManyTracks => ExVideoTagHeaderContent::ManyTracks(VideoFourCc::from(video_four_cc)),
206                AvMultitrackType::ManyTracksManyCodecs => ExVideoTagHeaderContent::ManyTracksManyCodecs,
207                _ => ExVideoTagHeaderContent::Unknown {
208                    video_multitrack_type,
209                    video_four_cc: VideoFourCc::from(video_four_cc),
210                },
211            }
212        } else {
213            let mut video_four_cc = [0; 4];
214            reader.read_exact(&mut video_four_cc)?;
215
216            ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc::from(video_four_cc))
217        };
218
219        Ok(Self {
220            video_packet_type,
221            video_packet_mod_exs,
222            content,
223        })
224    }
225}
226
227#[cfg(test)]
228#[cfg_attr(all(test, coverage_nightly), coverage(off))]
229mod tests {
230    use bytes::Bytes;
231
232    use crate::common::AvMultitrackType;
233    use crate::error::FlvError;
234    use crate::video::header::VideoCommand;
235    use crate::video::header::enhanced::{
236        ExVideoTagHeader, ExVideoTagHeaderContent, VideoFourCc, VideoPacketModEx, VideoPacketModExType, VideoPacketType,
237    };
238
239    #[test]
240    fn small_mod_ex_demux() {
241        let data = &[
242            1,  // size 2
243            42, // data
244            42,
245            0b0001_0001, // type 1, next packet 1
246        ];
247
248        let (mod_ex, next_packet) = VideoPacketModEx::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
249
250        assert_eq!(
251            mod_ex,
252            VideoPacketModEx::Other {
253                video_packet_mod_ex_type: VideoPacketModExType(1),
254                mod_ex_data: Bytes::from_static(&[42, 42])
255            }
256        );
257        assert_eq!(next_packet, VideoPacketType::CodedFrames);
258    }
259
260    #[test]
261    fn timestamp_offset_mod_ex_demux() {
262        let data = &[
263            2, // size 3
264            0, // data
265            0,
266            1,
267            0b0000_0000, // type 0, next packet 0
268        ];
269
270        let (mod_ex, next_packet) = VideoPacketModEx::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
271
272        assert_eq!(
273            mod_ex,
274            VideoPacketModEx::TimestampOffsetNano {
275                video_timestamp_nano_offset: 1
276            },
277        );
278        assert_eq!(next_packet, VideoPacketType::SequenceStart);
279    }
280
281    #[test]
282    fn big_mod_ex_demux() {
283        let data = &[
284            255, // size 2
285            0,
286            1,
287            42, // data
288            42,
289            0b0001_0001, // type 1, next packet 1
290        ];
291
292        let (mod_ex, next_packet) = VideoPacketModEx::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
293
294        assert_eq!(
295            mod_ex,
296            VideoPacketModEx::Other {
297                video_packet_mod_ex_type: VideoPacketModExType(1),
298                mod_ex_data: Bytes::from_static(&[42, 42])
299            }
300        );
301        assert_eq!(next_packet, VideoPacketType::CodedFrames);
302    }
303
304    #[test]
305    fn mod_ex_demux_error() {
306        let data = &[
307            0, // size 1
308            42,
309            0b0000_0010, // type 0, next packet 2
310        ];
311
312        let err = VideoPacketModEx::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap_err();
313
314        assert!(matches!(err, FlvError::InvalidModExData { expected_bytes: 3 },));
315    }
316
317    #[test]
318    fn minimal_header() {
319        let data = &[
320            0b0000_0000, // type 0
321            b'a',        // four cc
322            b'v',
323            b'c',
324            b'1',
325        ];
326
327        let header = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
328
329        assert_eq!(header.video_packet_mod_exs.len(), 0);
330        assert_eq!(header.video_packet_type, VideoPacketType::SequenceStart);
331        assert_eq!(header.content, ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc::Avc));
332    }
333
334    #[test]
335    fn header_small_mod_ex() {
336        let data = &[
337            0b0000_0111, // type 7
338            1,           // modex size 2
339            42,          // modex data
340            42,
341            0b0001_0001, // type 1, next packet 1
342            b'a',        // four cc
343            b'v',
344            b'c',
345            b'1',
346        ];
347
348        let header = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
349
350        assert_eq!(header.video_packet_mod_exs.len(), 1);
351        assert_eq!(
352            header.video_packet_mod_exs[0],
353            VideoPacketModEx::Other {
354                video_packet_mod_ex_type: VideoPacketModExType(1),
355                mod_ex_data: Bytes::from_static(&[42, 42])
356            }
357        );
358        assert_eq!(header.video_packet_type, VideoPacketType::CodedFrames);
359        assert_eq!(header.content, ExVideoTagHeaderContent::NoMultiTrack(VideoFourCc::Avc));
360    }
361
362    #[test]
363    fn header_multitrack_one_track() {
364        let data = &[
365            0b0000_0110, // type 6
366            0b0000_0000, // one track, type 0
367            b'a',        // four cc
368            b'v',
369            b'c',
370            b'1',
371        ];
372
373        let header = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
374
375        assert_eq!(header.video_packet_mod_exs.len(), 0);
376        assert_eq!(header.video_packet_type, VideoPacketType::SequenceStart);
377        assert_eq!(header.content, ExVideoTagHeaderContent::OneTrack(VideoFourCc::Avc));
378    }
379
380    #[test]
381    fn header_multitrack_many_tracks() {
382        let data = &[
383            0b0000_0110, // type 6
384            0b0001_0000, // many tracks, type 0
385            b'a',        // four cc
386            b'v',
387            b'c',
388            b'1',
389        ];
390
391        let header = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
392
393        assert_eq!(header.video_packet_mod_exs.len(), 0);
394        assert_eq!(header.video_packet_type, VideoPacketType::SequenceStart);
395        assert_eq!(header.content, ExVideoTagHeaderContent::ManyTracks(VideoFourCc::Avc));
396    }
397
398    #[test]
399    fn header_multitrack_many_tracks_many_codecs() {
400        let data = &[
401            0b0000_0110, // type 6
402            0b0010_0000, // many tracks many codecs, type 0
403            b'a',        // four cc, should be ignored
404            b'v',
405            b'c',
406            b'1',
407        ];
408
409        let header = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
410
411        assert_eq!(header.video_packet_mod_exs.len(), 0);
412        assert_eq!(header.video_packet_type, VideoPacketType::SequenceStart);
413        assert_eq!(header.content, ExVideoTagHeaderContent::ManyTracksManyCodecs);
414    }
415
416    #[test]
417    fn header_multitrack_unknown() {
418        let data = &[
419            0b0000_0110, // type 6
420            0b0011_0000, // unknown, type 0
421            b'a',        // four cc
422            b'v',
423            b'c',
424            b'1',
425        ];
426
427        let header = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
428
429        assert_eq!(header.video_packet_mod_exs.len(), 0);
430        assert_eq!(header.video_packet_type, VideoPacketType::SequenceStart);
431        assert_eq!(
432            header.content,
433            ExVideoTagHeaderContent::Unknown {
434                video_multitrack_type: AvMultitrackType(3),
435                video_four_cc: VideoFourCc::Avc,
436            }
437        );
438    }
439
440    #[test]
441    fn nested_multitrack_error() {
442        let data = &[
443            0b0000_0110, // type 6
444            0b0000_0110, // one track, type 5
445        ];
446
447        let err = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap_err();
448        assert!(matches!(err, FlvError::NestedMultitracks));
449    }
450
451    #[test]
452    fn video_command() {
453        let data = &[
454            0b0101_0000, // frame type 5, type 0
455            0,           // video command 0
456            42,          // should be ignored
457        ];
458
459        let header = ExVideoTagHeader::demux(&mut std::io::Cursor::new(Bytes::from_static(data))).unwrap();
460
461        assert_eq!(header.video_packet_mod_exs.len(), 0);
462        assert_eq!(header.video_packet_type, VideoPacketType::SequenceStart);
463        assert_eq!(header.content, ExVideoTagHeaderContent::VideoCommand(VideoCommand::StartSeek));
464    }
465}