@@ -32,27 +32,59 @@ class StreamMetadata:
3232 duration_seconds_from_header : Optional [float ]
3333 """Duration of the stream, in seconds, obtained from the header (float or
3434 None). This could be inaccurate."""
35+ begin_stream_seconds_from_header : Optional [float ]
36+ """Beginning of the stream, in seconds, obtained from the header (float or
37+ None). Usually, this is equal to 0."""
3538 bit_rate : Optional [float ]
3639 """Bit rate of the stream, in seconds (float or None)."""
40+ codec : Optional [str ]
41+ """Codec (str or None)."""
42+ stream_index : int
43+ """Index of the stream within the video (int)."""
44+
45+ def __repr__ (self ):
46+ s = self .__class__ .__name__ + ":\n "
47+ for field in dataclasses .fields (self ):
48+ s += f"{ SPACES } { field .name } : { getattr (self , field .name )} \n "
49+ return s
50+
51+
52+ @dataclass
53+ class VideoStreamMetadata (StreamMetadata ):
54+ """Metadata of a single video stream."""
55+
3756 begin_stream_seconds_from_content : Optional [float ]
3857 """Beginning of the stream, in seconds (float or None).
39- Conceptually, this corresponds to the first frame's :term:`pts`. It is
40- computed as min(frame.pts) across all frames in the stream. Usually, this is
41- equal to 0."""
58+ Conceptually, this corresponds to the first frame's :term:`pts`. It is only
59+ computed when a :term:`scan` is done as min(frame.pts) across all frames in
60+ the stream. Usually, this is equal to 0."""
4261 end_stream_seconds_from_content : Optional [float ]
4362 """End of the stream, in seconds (float or None).
4463 Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
45- is computed as max(frame.pts + frame.duration) across all frames in the
46- stream. Note that no frame is played at this time value, so calling
47- :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with
48- this value would result in an error. Retrieving the last frame is best done
49- by simply indexing the :class:`~torchcodec.decoders.VideoDecoder`
50- object with ``[-1]``.
64+ is only computed when a :term:`scan` is done as max(frame.pts +
65+ frame.duration) across all frames in the stream. Note that no frame is
66+ played at this time value, so calling
67+ :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with this
68+ value would result in an error. Retrieving the last frame is best done by
69+ simply indexing the :class:`~torchcodec.decoders.VideoDecoder` object with
70+ ``[-1]``.
5171 """
52- codec : Optional [str ]
53- """Codec (str or None)."""
54- stream_index : int
55- """Index of the stream within the video (int)."""
72+ width : Optional [int ]
73+ """Width of the frames (int or None)."""
74+ height : Optional [int ]
75+ """Height of the frames (int or None)."""
76+ num_frames_from_header : Optional [int ]
77+ """Number of frames, from the stream's metadata. This is potentially
78+ inaccurate. We recommend using the ``num_frames`` attribute instead.
79+ (int or None)."""
80+ num_frames_from_content : Optional [int ]
81+ """Number of frames computed by TorchCodec by scanning the stream's
82+ content (the scan doesn't involve decoding). This is more accurate
83+ than ``num_frames_from_header``. We recommend using the
84+ ``num_frames`` attribute instead. (int or None)."""
85+ average_fps_from_header : Optional [float ]
86+ """Averate fps of the stream, obtained from the header (float or None).
87+ We recommend using the ``average_fps`` attribute instead."""
5688
5789 @property
5890 def duration_seconds (self ) -> Optional [float ]:
@@ -94,36 +126,6 @@ def end_stream_seconds(self) -> Optional[float]:
94126 else :
95127 return self .end_stream_seconds_from_content
96128
97- def __repr__ (self ):
98- # Overridden because properites are not printed by default.
99- s = self .__class__ .__name__ + ":\n "
100- s += f"{ SPACES } duration_seconds: { self .duration_seconds } \n "
101- for field in dataclasses .fields (self ):
102- s += f"{ SPACES } { field .name } : { getattr (self , field .name )} \n "
103- return s
104-
105-
106- @dataclass
107- class VideoStreamMetadata (StreamMetadata ):
108- """Metadata of a single video stream."""
109-
110- width : Optional [int ]
111- """Width of the frames (int or None)."""
112- height : Optional [int ]
113- """Height of the frames (int or None)."""
114- num_frames_from_header : Optional [int ]
115- """Number of frames, from the stream's metadata. This is potentially
116- inaccurate. We recommend using the ``num_frames`` attribute instead.
117- (int or None)."""
118- num_frames_from_content : Optional [int ]
119- """Number of frames computed by TorchCodec by scanning the stream's
120- content (the scan doesn't involve decoding). This is more accurate
121- than ``num_frames_from_header``. We recommend using the
122- ``num_frames`` attribute instead. (int or None)."""
123- average_fps_from_header : Optional [float ]
124- """Averate fps of the stream, obtained from the header (float or None).
125- We recommend using the ``average_fps`` attribute instead."""
126-
127129 @property
128130 def num_frames (self ) -> Optional [int ]:
129131 """Number of frames in the stream. This corresponds to
@@ -154,6 +156,9 @@ def average_fps(self) -> Optional[float]:
154156
155157 def __repr__ (self ):
156158 s = super ().__repr__ ()
159+ s += f"{ SPACES } duration_seconds: { self .duration_seconds } \n "
160+ s += f"{ SPACES } begin_stream_seconds: { self .begin_stream_seconds } \n "
161+ s += f"{ SPACES } end_stream_seconds: { self .end_stream_seconds } \n "
157162 s += f"{ SPACES } num_frames: { self .num_frames } \n "
158163 s += f"{ SPACES } average_fps: { self .average_fps } \n "
159164 return s
@@ -224,14 +229,19 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
224229 common_meta = dict (
225230 duration_seconds_from_header = stream_dict .get ("durationSeconds" ),
226231 bit_rate = stream_dict .get ("bitRate" ),
227- begin_stream_seconds_from_content = stream_dict .get ("minPtsSecondsFromScan" ),
228- end_stream_seconds_from_content = stream_dict .get ("maxPtsSecondsFromScan" ),
232+ begin_stream_seconds_from_header = stream_dict .get ("beginStreamFromHeader" ),
229233 codec = stream_dict .get ("codec" ),
230234 stream_index = stream_index ,
231235 )
232236 if stream_dict ["mediaType" ] == "video" :
233237 streams_metadata .append (
234238 VideoStreamMetadata (
239+ begin_stream_seconds_from_content = stream_dict .get (
240+ "minPtsSecondsFromScan"
241+ ),
242+ end_stream_seconds_from_content = stream_dict .get (
243+ "maxPtsSecondsFromScan"
244+ ),
235245 width = stream_dict .get ("width" ),
236246 height = stream_dict .get ("height" ),
237247 num_frames_from_header = stream_dict .get ("numFrames" ),
0 commit comments