From 5873d4ccdd8f132e37c285665a1c5e72a81ecfe6 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 22 Apr 2021 21:16:29 +0200
Subject: [PATCH 01/31] [utils] Improve bug_report_message

Add an optional argument specifying the text that should go before
the message.
---
 yt_dlp/utils.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 40d956808..9ddd6453f 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2340,15 +2340,20 @@ def make_HTTPS_handler(params, **kwargs):
         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 
 
-def bug_reports_message():
+def bug_reports_message(before=';'):
     if ytdl_is_updateable():
         update_cmd = 'type  yt-dlp -U  to update'
     else:
         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
-    msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
+    msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
     msg += ' Make sure you are using the latest version; %s.' % update_cmd
     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
-    return msg
+
+    before = before.rstrip()
+    if not before or before.endswith(('.', '!', '?')):
+        msg = msg[0].title() + msg[1:]
+
+    return (before + ' ' if before else '') + msg
 
 
 class YoutubeDLError(Exception):

From 4d49884c58035f892eed949713ea2931c4d9c6aa Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Fri, 23 Apr 2021 09:11:28 +0200
Subject: [PATCH 02/31] [downloader/fragment] Allow persisting extra state when
 a download is interrupted

---
 yt_dlp/downloader/fragment.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index a0c1d13ac..fadd0dfc5 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -77,7 +77,10 @@ class FragmentFD(FileDownloader):
         assert 'ytdl_corrupt' not in ctx
         stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
         try:
-            ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+            ytdl_data = json.loads(stream.read())
+            ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
+            if 'extra_state' in ytdl_data['downloader']:
+                ctx['extra_state'] = ytdl_data['downloader']['extra_state']
         except Exception:
             ctx['ytdl_corrupt'] = True
         finally:
@@ -90,6 +93,8 @@ class FragmentFD(FileDownloader):
                 'index': ctx['fragment_index'],
             },
         }
+        if 'extra_state' in ctx:
+            downloader['extra_state'] = ctx['extra_state']
         if ctx.get('fragment_count') is not None:
             downloader['fragment_count'] = ctx['fragment_count']
         frag_index_stream.write(json.dumps({'downloader': downloader}))

From d4553567d2f38809df2adaaf3a2257bc01d518f4 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Fri, 23 Apr 2021 09:13:33 +0200
Subject: [PATCH 03/31] [downloader/ism] Prevent writing the header again when
 resuming an interrupted download

---
 yt_dlp/downloader/ism.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index 1ca666b4a..b3e22793b 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -221,10 +221,13 @@ class IsmFD(FragmentFD):
 
         self._prepare_and_start_frag_download(ctx)
 
+        extra_state = ctx.setdefault('extra_state', {
+            'ism_track_written': False,
+        })
+
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
-        track_written = False
         frag_index = 0
         for i, segment in enumerate(segments):
             frag_index += 1
@@ -236,11 +239,11 @@ class IsmFD(FragmentFD):
                     success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
                     if not success:
                         return False
-                    if not track_written:
+                    if not extra_state['ism_track_written']:
                         tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
                         info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
                         write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
-                        track_written = True
+                        extra_state['ism_track_written'] = True
                     self._append_fragment(ctx, frag_content)
                     break
                 except compat_urllib_error.HTTPError as err:

From 19bb39202d10e171378fc407b1e0590bbb9df96b Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Mon, 19 Apr 2021 19:25:54 +0200
Subject: [PATCH 04/31] [extractor/common] Generalise _merge_subtitles

This allows modifying a subtitles dictionary in-place.
---
 yt_dlp/extractor/common.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 4487c5375..ee8a54b66 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3319,12 +3319,22 @@ class InfoExtractor(object):
         return ret
 
     @classmethod
-    def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
-        """ Merge two subtitle dictionaries, language by language. """
-        ret = dict(subtitle_dict1)
-        for lang in subtitle_dict2:
-            ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
-        return ret
+    def _merge_subtitles(cls, *dicts, **kwargs):
+        """ Merge subtitle dictionaries, language by language. """
+
+        target = (lambda target=None: target)(**kwargs)
+        # The above lambda extracts the keyword argument 'target' from kwargs
+        # while ensuring there are no stray ones. When Python 2 support
+        # is dropped, remove it and change the function signature to:
+        #
+        #     def _merge_subtitles(cls, *dicts, target=None):
+
+        if target is None:
+            target = {}
+        for d in dicts:
+            for lang, subs in d.items():
+                target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
+        return target
 
     def extract_automatic_captions(self, *args, **kwargs):
         if (self._downloader.params.get('writeautomaticsub', False)

From a0c3b2d5cf4fe374a2b0119ea53b71c9a06aaee9 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Mon, 7 Nov 2016 15:45:42 +0100
Subject: [PATCH 05/31] [extractor/common] Extract HLS subtitle tracks

_extract_m3u8_formats is renamed to _extract_m3u8_formats_and_subtitles
and extended to handle subtitle tracks instead of skipping them;
a wrapper with the old name is provided for compatibility.

_parse_m3u8_formats is likewise renamed and extended, but without adding
the compatibility wrapper; the test suite is adjusted to test the enhanced
method instead.
---
 test/test_InfoExtractor.py |  8 +++---
 yt_dlp/extractor/common.py | 53 +++++++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index a08616694..9e059723f 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -684,17 +684,19 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'width': 1920,
                     'height': 1080,
                     'vcodec': 'avc1.64002a',
-                }]
+                }],
+                {}
             ),
         ]
 
-        for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
+        for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
             with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
                          mode='r', encoding='utf-8') as f:
-                formats = self.ie._parse_m3u8_formats(
+                formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
                     f.read(), m3u8_url, ext='mp4')
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
+                expect_value(self, subs, expected_subs, None)
 
     def test_parse_mpd_formats(self):
         _TEST_CASES = [
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index ee8a54b66..c67fb7bbf 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1879,11 +1879,21 @@ class InfoExtractor(object):
             'format_note': 'Quality selection URL',
         }
 
-    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
-                              entry_protocol='m3u8', preference=None, quality=None,
-                              m3u8_id=None, note=None, errnote=None,
-                              fatal=True, live=False, data=None, headers={},
-                              query={}):
+    def _extract_m3u8_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the HLS manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_m3u8_formats_and_subtitles(
+            self, m3u8_url, video_id, ext=None, entry_protocol='m3u8',
+            preference=None, quality=None, m3u8_id=None, note=None,
+            errnote=None, fatal=True, live=False, data=None, headers={},
+            query={}):
+
         res = self._download_webpage_handle(
             m3u8_url, video_id,
             note=note or 'Downloading m3u8 information',
@@ -1891,30 +1901,34 @@ class InfoExtractor(object):
             fatal=fatal, data=data, headers=headers, query=query)
 
         if res is False:
-            return []
+            return [], {}
 
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
 
-        return self._parse_m3u8_formats(
+        return self._parse_m3u8_formats_and_subtitles(
             m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
             preference=preference, quality=quality, m3u8_id=m3u8_id,
             note=note, errnote=errnote, fatal=fatal, live=live, data=data,
             headers=headers, query=query, video_id=video_id)
 
-    def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
-                            entry_protocol='m3u8', preference=None, quality=None,
-                            m3u8_id=None, live=False, note=None, errnote=None,
-                            fatal=True, data=None, headers={}, query={}, video_id=None):
+    def _parse_m3u8_formats_and_subtitles(
+            self, m3u8_doc, m3u8_url, ext=None, entry_protocol='m3u8',
+            preference=None, quality=None, m3u8_id=None, live=False, note=None,
+            errnote=None, fatal=True, data=None, headers={}, query={},
+            video_id=None):
+
         if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
-            return []
+            return [], {}
 
         if (not self._downloader.params.get('allow_unplayable_formats')
                 and re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc)):  # Apple FairPlay
-            return []
+            return [], {}
 
         formats = []
 
+        subtitles = {}
+
         format_url = lambda u: (
             u
             if re.match(r'^https?://', u)
@@ -2001,7 +2015,7 @@ class InfoExtractor(object):
                 }
                 formats.append(f)
 
-            return formats
+            return formats, subtitles
 
         groups = {}
         last_stream_inf = {}
@@ -2013,6 +2027,15 @@ class InfoExtractor(object):
             if not (media_type and group_id and name):
                 return
             groups.setdefault(group_id, []).append(media)
+            # <https://tools.ietf.org/html/rfc8216#section-4.3.4.1>
+            if media_type == 'SUBTITLES':
+                lang = media['LANGUAGE']  # XXX: normalise?
+                url = format_url(media['URI'])
+                sub_info = {
+                    'url': url,
+                    'ext': determine_ext(url),
+                }
+                subtitles.setdefault(lang, []).append(sub_info)
             if media_type not in ('VIDEO', 'AUDIO'):
                 return
             media_url = media.get('URI')
@@ -2160,7 +2183,7 @@ class InfoExtractor(object):
                         formats.append(http_f)
 
                 last_stream_inf = {}
-        return formats
+        return formats, subtitles
 
     @staticmethod
     def _xpath_ns(path, namespace=None):

From 171e59edd465f6c295bf8264f7024e243ff464d6 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 01:49:22 +0200
Subject: [PATCH 06/31] [extractor/common] Extract DASH subtitle tracks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_extract_mpd_formats and _parse_mpd_formats were extended into
_…_formats_and_subtitles; wrappers with old names are provided
for compatibility.
---
 yt_dlp/extractor/common.py | 81 +++++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 27 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index c67fb7bbf..0c56a9015 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2426,23 +2426,44 @@ class InfoExtractor(object):
             })
         return entries
 
-    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the DASH manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_mpd_formats_and_subtitles(
+            self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
+            fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(
             mpd_url, video_id,
             note=note or 'Downloading MPD manifest',
             errnote=errnote or 'Failed to download MPD manifest',
             fatal=fatal, data=data, headers=headers, query=query)
         if res is False:
-            return []
+            return [], {}
         mpd_doc, urlh = res
         if mpd_doc is None:
-            return []
+            return [], {}
         mpd_base_url = base_url(urlh.geturl())
 
-        return self._parse_mpd_formats(
+        return self._parse_mpd_formats_and_subtitles(
             mpd_doc, mpd_id, mpd_base_url, mpd_url)
 
-    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
+    def _parse_mpd_formats(self, *args, **kwargs):
+        fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the DASH manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _parse_mpd_formats_and_subtitles(
+            self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
         """
         Parse formats from MPD manifest.
         References:
@@ -2452,7 +2473,7 @@ class InfoExtractor(object):
         """
         if not self._downloader.params.get('dynamic_mpd', True):
             if mpd_doc.get('type') == 'dynamic':
-                return []
+                return [], {}
 
         namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
 
@@ -2524,6 +2545,7 @@ class InfoExtractor(object):
 
         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
         formats = []
+        subtitles = {}
         for period in mpd_doc.findall(_add_ns('Period')):
             period_duration = parse_duration(period.get('duration')) or mpd_duration
             period_ms_info = extract_multisegment_info(period, {
@@ -2541,11 +2563,9 @@ class InfoExtractor(object):
                     representation_attrib.update(representation.attrib)
                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
                     mime_type = representation_attrib['mimeType']
-                    content_type = mime_type.split('/')[0]
-                    if content_type == 'text':
-                        # TODO implement WebVTT downloading
-                        pass
-                    elif content_type in ('video', 'audio'):
+                    content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
+
+                    if content_type in ('video', 'audio', 'text'):
                         base_url = ''
                         for element in (representation, adaptation_set, period, mpd_doc):
                             base_url_e = element.find(_add_ns('BaseURL'))
@@ -2562,21 +2582,28 @@ class InfoExtractor(object):
                         url_el = representation.find(_add_ns('BaseURL'))
                         filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
                         bandwidth = int_or_none(representation_attrib.get('bandwidth'))
-                        f = {
-                            'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
-                            'manifest_url': mpd_url,
-                            'ext': mimetype2ext(mime_type),
-                            'width': int_or_none(representation_attrib.get('width')),
-                            'height': int_or_none(representation_attrib.get('height')),
-                            'tbr': float_or_none(bandwidth, 1000),
-                            'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
-                            'fps': int_or_none(representation_attrib.get('frameRate')),
-                            'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
-                            'format_note': 'DASH %s' % content_type,
-                            'filesize': filesize,
-                            'container': mimetype2ext(mime_type) + '_dash',
-                        }
-                        f.update(parse_codecs(representation_attrib.get('codecs')))
+                        if content_type in ('video', 'audio'):
+                            f = {
+                                'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+                                'manifest_url': mpd_url,
+                                'ext': mimetype2ext(mime_type),
+                                'width': int_or_none(representation_attrib.get('width')),
+                                'height': int_or_none(representation_attrib.get('height')),
+                                'tbr': float_or_none(bandwidth, 1000),
+                                'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+                                'fps': int_or_none(representation_attrib.get('frameRate')),
+                                'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+                                'format_note': 'DASH %s' % content_type,
+                                'filesize': filesize,
+                                'container': mimetype2ext(mime_type) + '_dash',
+                            }
+                            f.update(parse_codecs(representation_attrib.get('codecs')))
+                        elif content_type == 'text':
+                            f = {
+                                'ext': mimetype2ext(mime_type),
+                                'manifest_url': mpd_url,
+                                'filesize': filesize,
+                            }
                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
 
                         def prepare_template(template_name, identifiers):
@@ -2726,7 +2753,7 @@ class InfoExtractor(object):
                         formats.append(f)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        return formats
+        return formats, subtitles
 
     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(

From fd76a14259ed4f9685e0cddae5a111ac8b4aa300 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 12:47:42 +0200
Subject: [PATCH 07/31] [extractor/common, downloader/ism] Extract SSTR
 subtitle tracks

_parse_ism_formats was extended into _parse_ism_formats_and_subtitles;
all direct users were updated, though _extract_ism_formats was left
as a compatibility wrapper.

The SSTR downloader was also modified in order to prepare for muxing
subtitle streams, although no support for any subtitle codecs was
added in this commit.
---
 yt_dlp/downloader/ism.py   |  38 ++++++++++----
 yt_dlp/extractor/common.py | 105 ++++++++++++++++++++++++-------------
 2 files changed, 98 insertions(+), 45 deletions(-)

diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index b3e22793b..103064df1 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -48,7 +48,7 @@ def write_piff_header(stream, params):
     language = params.get('language', 'und')
     height = params.get('height', 0)
     width = params.get('width', 0)
-    is_audio = width == 0 and height == 0
+    stream_type = params['stream_type']
     creation_time = modification_time = int(time.time())
 
     ftyp_payload = b'isml'  # major brand
@@ -77,7 +77,7 @@ def write_piff_header(stream, params):
     tkhd_payload += u32.pack(0) * 2  # reserved
     tkhd_payload += s16.pack(0)  # layer
     tkhd_payload += s16.pack(0)  # alternate group
-    tkhd_payload += s88.pack(1 if is_audio else 0)  # volume
+    tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0)  # volume
     tkhd_payload += u16.pack(0)  # reserved
     tkhd_payload += unity_matrix
     tkhd_payload += u1616.pack(width)
@@ -93,19 +93,35 @@ def write_piff_header(stream, params):
     mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload)  # Media Header Box
 
     hdlr_payload = u32.pack(0)  # pre defined
-    hdlr_payload += b'soun' if is_audio else b'vide'  # handler type
-    hdlr_payload += u32.pack(0) * 3  # reserved
-    hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0'  # name
+    if stream_type == 'audio':  # handler type
+        hdlr_payload += b'soun'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'SoundHandler\0'  # name
+    elif stream_type == 'video':
+        hdlr_payload += b'vide'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'VideoHandler\0'  # name
+    elif stream_type == 'text':
+        hdlr_payload += b'subt'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'SubtitleHandler\0'  # name
+    else:
+        assert False
     mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload)  # Handler Reference Box
 
-    if is_audio:
+    if stream_type == 'audio':
         smhd_payload = s88.pack(0)  # balance
         smhd_payload += u16.pack(0)  # reserved
         media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
-    else:
+    elif stream_type == 'video':
         vmhd_payload = u16.pack(0)  # graphics mode
         vmhd_payload += u16.pack(0) * 3  # opcolor
         media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header
+    elif stream_type == 'text':
+        sthd_payload = u16.pack(0) * 2
+        media_header_box = full_box(b'sthd', 0, 1, sthd_payload)  # Subtitle Media Header
+    else:
+        assert False
     minf_payload = media_header_box
 
     dref_payload = u32.pack(1)  # entry count
@@ -117,7 +133,7 @@ def write_piff_header(stream, params):
 
     sample_entry_payload = u8.pack(0) * 6  # reserved
     sample_entry_payload += u16.pack(1)  # data reference index
-    if is_audio:
+    if stream_type == 'audio':
         sample_entry_payload += u32.pack(0) * 2  # reserved
         sample_entry_payload += u16.pack(params.get('channels', 2))
         sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
@@ -127,7 +143,7 @@ def write_piff_header(stream, params):
 
         if fourcc == 'AACL':
             sample_entry_box = box(b'mp4a', sample_entry_payload)
-    else:
+    elif stream_type == 'video':
         sample_entry_payload += u16.pack(0)  # pre defined
         sample_entry_payload += u16.pack(0)  # reserved
         sample_entry_payload += u32.pack(0) * 3  # pre defined
@@ -155,6 +171,10 @@ def write_piff_header(stream, params):
             avcc_payload += pps
             sample_entry_payload += box(b'avcC', avcc_payload)  # AVC Decoder Configuration Record
             sample_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry
+        else:
+            assert False
+    else:
+        assert False
     stsd_payload += sample_entry_box
 
     stbl_payload = full_box(b'stsd', 0, 0, stsd_payload)  # Sample Description Box
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 0c56a9015..045d463d6 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2750,26 +2750,38 @@ class InfoExtractor(object):
                         else:
                             # Assuming direct URL to unfragmented media.
                             f['url'] = base_url
-                        formats.append(f)
+                        if content_type in ('video', 'audio'):
+                            formats.append(f)
+                        elif content_type == 'text':
+                            subtitles.setdefault(lang or 'und', []).append(f)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats, subtitles
 
-    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+    def _extract_ism_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the ISM manifest; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
         res = self._download_xml_handle(
             ism_url, video_id,
             note=note or 'Downloading ISM manifest',
             errnote=errnote or 'Failed to download ISM manifest',
             fatal=fatal, data=data, headers=headers, query=query)
         if res is False:
-            return []
+            return [], {}
         ism_doc, urlh = res
         if ism_doc is None:
-            return []
+            return [], {}
 
-        return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+        return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
 
-    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+    def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
         """
         Parse formats from ISM manifest.
         References:
@@ -2777,22 +2789,24 @@ class InfoExtractor(object):
             https://msdn.microsoft.com/en-us/library/ff469518.aspx
         """
         if ism_doc.get('IsLive') == 'TRUE':
-            return []
+            return [], {}
         if (not self._downloader.params.get('allow_unplayable_formats')
                 and ism_doc.find('Protection') is not None):
-            return []
+            return [], {}
 
         duration = int(ism_doc.attrib['Duration'])
         timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
 
         formats = []
+        subtitles = {}
         for stream in ism_doc.findall('StreamIndex'):
             stream_type = stream.get('Type')
-            if stream_type not in ('video', 'audio'):
+            if stream_type not in ('video', 'audio', 'text'):
                 continue
             url_pattern = stream.attrib['Url']
             stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
             stream_name = stream.get('Name')
+            stream_language = stream.get('Language', 'und')
             for track in stream.findall('QualityLevel'):
                 fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
                 # TODO: add support for WVC1 and WMAP
@@ -2839,33 +2853,52 @@ class InfoExtractor(object):
                     format_id.append(stream_name)
                 format_id.append(compat_str(tbr))
 
-                formats.append({
-                    'format_id': '-'.join(format_id),
-                    'url': ism_url,
-                    'manifest_url': ism_url,
-                    'ext': 'ismv' if stream_type == 'video' else 'isma',
-                    'width': width,
-                    'height': height,
-                    'tbr': tbr,
-                    'asr': sampling_rate,
-                    'vcodec': 'none' if stream_type == 'audio' else fourcc,
-                    'acodec': 'none' if stream_type == 'video' else fourcc,
-                    'protocol': 'ism',
-                    'fragments': fragments,
-                    '_download_params': {
-                        'duration': duration,
-                        'timescale': stream_timescale,
-                        'width': width or 0,
-                        'height': height or 0,
-                        'fourcc': fourcc,
-                        'codec_private_data': track.get('CodecPrivateData'),
-                        'sampling_rate': sampling_rate,
-                        'channels': int_or_none(track.get('Channels', 2)),
-                        'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
-                        'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
-                    },
-                })
-        return formats
+                if stream_type == 'text':
+                    subtitles.setdefault(stream_language, []).append({
+                        'ext': 'ismt',
+                        'protocol': 'ism',
+                        'url': ism_url,
+                        'manifest_url': ism_url,
+                        'fragments': fragments,
+                        '_download_params': {
+                            'stream_type': stream_type,
+                            'duration': duration,
+                            'timescale': stream_timescale,
+                            'fourcc': fourcc,
+                            'language': stream_language,
+                            'codec_private_data': track.get('CodecPrivateData'),
+                        }
+                    })
+                elif stream_type in ('video', 'audio'):
+                    formats.append({
+                        'format_id': '-'.join(format_id),
+                        'url': ism_url,
+                        'manifest_url': ism_url,
+                        'ext': 'ismv' if stream_type == 'video' else 'isma',
+                        'width': width,
+                        'height': height,
+                        'tbr': tbr,
+                        'asr': sampling_rate,
+                        'vcodec': 'none' if stream_type == 'audio' else fourcc,
+                        'acodec': 'none' if stream_type == 'video' else fourcc,
+                        'protocol': 'ism',
+                        'fragments': fragments,
+                        '_download_params': {
+                            'stream_type': stream_type,
+                            'duration': duration,
+                            'timescale': stream_timescale,
+                            'width': width or 0,
+                            'height': height or 0,
+                            'fourcc': fourcc,
+                            'language': stream_language,
+                            'codec_private_data': track.get('CodecPrivateData'),
+                            'sampling_rate': sampling_rate,
+                            'channels': int_or_none(track.get('Channels', 2)),
+                            'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+                            'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+                        },
+                    })
+        return formats, subtitles
 
     def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
         def absolute_url(item_url):

From f6a1d69a878070392171d807c03b5954c61cf911 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 22 Apr 2021 17:19:26 +0200
Subject: [PATCH 08/31] [extractor/common] Extend _extract_akamai_formats to
 also extract subtitle tracks

---
 yt_dlp/extractor/common.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 045d463d6..6257c17cd 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3023,7 +3023,16 @@ class InfoExtractor(object):
                 entries.append(media_info)
         return entries
 
-    def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+    def _extract_akamai_formats(self, *args, **kwargs):
+        fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
+        if subs:
+            self.report_warning(bug_reports_message(
+                "Ignoring subtitle tracks found in the manifests; "
+                "if any subtitle tracks are missing,"
+            ))
+        return fmts
+
+    def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
         signed = 'hdnea=' in manifest_url
         if not signed:
             # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
@@ -3032,6 +3041,7 @@ class InfoExtractor(object):
                 '', manifest_url).strip('?')
 
         formats = []
+        subtitles = {}
 
         hdcore_sign = 'hdcore=3.7.0'
         f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
@@ -3050,10 +3060,11 @@ class InfoExtractor(object):
         hls_host = hosts.get('hls')
         if hls_host:
             m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
-        m3u8_formats = self._extract_m3u8_formats(
+        m3u8_formats, m3u8_subtitles = self._extract_m3u8_formats_and_subtitles(
             m3u8_url, video_id, 'mp4', 'm3u8_native',
             m3u8_id='hls', fatal=False)
         formats.extend(m3u8_formats)
+        subtitles = self._merge_subtitles(subtitles, m3u8_subtitles)
 
         http_host = hosts.get('http')
         if http_host and m3u8_formats and not signed:
@@ -3077,7 +3088,7 @@ class InfoExtractor(object):
                             formats.append(http_f)
                         i += 1
 
-        return formats
+        return formats, subtitles
 
     def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
         query = compat_urlparse.urlparse(url).query

From 73b9088a1c858673255e69da1ac966e83ae91a30 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sat, 17 Apr 2021 15:49:12 +0200
Subject: [PATCH 09/31] [test] Test subtitle extraction from HLS manifests

---
 test/test_InfoExtractor.py          | 167 ++++++++++++++++++++++++++++
 test/testdata/m3u8/bipbop_16x9.m3u8 |  38 +++++++
 2 files changed, 205 insertions(+)
 create mode 100644 test/testdata/m3u8/bipbop_16x9.m3u8

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 9e059723f..b4984b5ed 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -687,6 +687,173 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 }],
                 {}
             ),
+            (
+                'bipbop_16x9',
+                'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8',
+                [{
+                    "format_id": "bipbop_audio-BipBop Audio 2",
+                    "format_index": None,
+                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/alternate_audio_aac/prog_index.m3u8",
+                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
+                    "language": "eng",
+                    "ext": "mp4",
+                    "protocol": "m3u8",
+                    "preference": None,
+                    "quality": None,
+                    "vcodec": "none",
+                    "audio_ext": "mp4",
+                    "video_ext": "none",
+                }, {
+                    "format_id": "41",
+                    "format_index": None,
+                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear0/prog_index.m3u8",
+                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
+                    "tbr": 41.457,
+                    "ext": "mp4",
+                    "fps": None,
+                    "protocol": "m3u8",
+                    "preference": None,
+                    "quality": None,
+                    "vcodec": "none",
+                    "acodec": "mp4a.40.2",
+                    "audio_ext": "mp4",
+                    "video_ext": "none",
+                    "abr": 41.457,
+                }, {
+                    "format_id": "263",
+                    "format_index": None,
+                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear1/prog_index.m3u8",
+                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
+                    "tbr": 263.851,
+                    "ext": "mp4",
+                    "fps": None,
+                    "protocol": "m3u8",
+                    "preference": None,
+                    "quality": None,
+                    "width": 416,
+                    "height": 234,
+                    "vcodec": "avc1.4d400d",
+                    "acodec": "mp4a.40.2",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 263.851,
+                    "abr": 0,
+                }, {
+                    "format_id": "577",
+                    "format_index": None,
+                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear2/prog_index.m3u8",
+                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
+                    "tbr": 577.61,
+                    "ext": "mp4",
+                    "fps": None,
+                    "protocol": "m3u8",
+                    "preference": None,
+                    "quality": None,
+                    "width": 640,
+                    "height": 360,
+                    "vcodec": "avc1.4d401e",
+                    "acodec": "mp4a.40.2",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 577.61,
+                    "abr": 0,
+                }, {
+                    "format_id": "915",
+                    "format_index": None,
+                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear3/prog_index.m3u8",
+                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
+                    "tbr": 915.905,
+                    "ext": "mp4",
+                    "fps": None,
+                    "protocol": "m3u8",
+                    "preference": None,
+                    "quality": None,
+                    "width": 960,
+                    "height": 540,
+                    "vcodec": "avc1.4d401f",
+                    "acodec": "mp4a.40.2",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 915.905,
+                    "abr": 0,
+                }, {
+                    "format_id": "1030",
+                    "format_index": None,
+                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear4/prog_index.m3u8",
+                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
+                    "tbr": 1030.138,
+                    "ext": "mp4",
+                    "fps": None,
+                    "protocol": "m3u8",
+                    "preference": None,
+                    "quality": None,
+                    "width": 1280,
+                    "height": 720,
+                    "vcodec": "avc1.4d401f",
+                    "acodec": "mp4a.40.2",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 1030.138,
+                    "abr": 0,
+                }, {
+                    "format_id": "1924",
+                    "format_index": None,
+                    "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear5/prog_index.m3u8",
+                    "manifest_url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8",
+                    "tbr": 1924.009,
+                    "ext": "mp4",
+                    "fps": None,
+                    "protocol": "m3u8",
+                    "preference": None,
+                    "quality": None,
+                    "width": 1920,
+                    "height": 1080,
+                    "vcodec": "avc1.4d401f",
+                    "acodec": "mp4a.40.2",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 1924.009,
+                    "abr": 0,
+                }],
+                {
+                    "en": [{
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }, {
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }],
+                    "fr": [{
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }, {
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }],
+                    "es": [{
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }, {
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }],
+                    "ja": [{
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }, {
+                        "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8",
+                        "ext": "vtt",
+                        "protocol": "m3u8_native"
+                    }],
+                }
+            ),
         ]
 
         for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
diff --git a/test/testdata/m3u8/bipbop_16x9.m3u8 b/test/testdata/m3u8/bipbop_16x9.m3u8
new file mode 100644
index 000000000..1ce87dd04
--- /dev/null
+++ b/test/testdata/m3u8/bipbop_16x9.m3u8
@@ -0,0 +1,38 @@
+#EXTM3U
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8"
+
+
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8"
+#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8"
+
+
+#EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear1/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear2/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear3/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear4/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs"
+gear5/prog_index.m3u8
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8"
+
+#EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs"
+gear0/prog_index.m3u8

From becdc7f82cd2d9bb669aa127e80a6a94a4989ebd Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 07:55:33 +0200
Subject: [PATCH 10/31] [test] Test subtitle extraction from DASH manifests

---
 test/test_InfoExtractor.py      | 131 +++++++++++-
 test/testdata/mpd/subtitles.mpd | 351 ++++++++++++++++++++++++++++++++
 2 files changed, 477 insertions(+), 5 deletions(-)
 create mode 100644 test/testdata/mpd/subtitles.mpd

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index b4984b5ed..5fbea0473 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -949,7 +949,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 5997.485,
                     'width': 1920,
                     'height': 1080,
-                }]
+                }],
+                {},
             ), (
                 # https://github.com/ytdl-org/youtube-dl/pull/14844
                 'urls_only',
@@ -1032,7 +1033,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'tbr': 4400,
                     'width': 1920,
                     'height': 1080,
-                }]
+                }],
+                {},
             ), (
                 # https://github.com/ytdl-org/youtube-dl/issues/20346
                 # Media considered unfragmented even though it contains
@@ -1078,18 +1080,137 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                     'width': 360,
                     'height': 360,
                     'fps': 30,
-                }]
+                }],
+                {},
+            ), (
+                'subtitles',
+                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
+                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
+                [{
+                    "format_id": "audio=128001",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "ext": "m4a",
+                    "tbr": 128.001,
+                    "asr": 48000,
+                    "format_note": "DASH audio",
+                    "container": "m4a_dash",
+                    "vcodec": "none",
+                    "acodec": "mp4a.40.2",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
+                    "protocol": "http_dash_segments",
+                    "audio_ext": "m4a",
+                    "video_ext": "none",
+                    "abr": 128.001,
+                }, {
+                    "format_id": "video=100000",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "ext": "mp4",
+                    "width": 336,
+                    "height": 144,
+                    "tbr": 100,
+                    "format_note": "DASH video",
+                    "container": "mp4_dash",
+                    "vcodec": "avc1.4D401F",
+                    "acodec": "none",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
+                    "protocol": "http_dash_segments",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 100,
+                }, {
+                    "format_id": "video=326000",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "ext": "mp4",
+                    "width": 562,
+                    "height": 240,
+                    "tbr": 326,
+                    "format_note": "DASH video",
+                    "container": "mp4_dash",
+                    "vcodec": "avc1.4D401F",
+                    "acodec": "none",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
+                    "protocol": "http_dash_segments",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 326,
+                }, {
+                    "format_id": "video=698000",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "ext": "mp4",
+                    "width": 844,
+                    "height": 360,
+                    "tbr": 698,
+                    "format_note": "DASH video",
+                    "container": "mp4_dash",
+                    "vcodec": "avc1.4D401F",
+                    "acodec": "none",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
+                    "protocol": "http_dash_segments",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 698,
+                }, {
+                    "format_id": "video=1493000",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "ext": "mp4",
+                    "width": 1126,
+                    "height": 480,
+                    "tbr": 1493,
+                    "format_note": "DASH video",
+                    "container": "mp4_dash",
+                    "vcodec": "avc1.4D401F",
+                    "acodec": "none",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
+                    "protocol": "http_dash_segments",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 1493,
+                }, {
+                    "format_id": "video=4482000",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "ext": "mp4",
+                    "width": 1688,
+                    "height": 720,
+                    "tbr": 4482,
+                    "format_note": "DASH video",
+                    "container": "mp4_dash",
+                    "vcodec": "avc1.4D401F",
+                    "acodec": "none",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                    "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
+                    "protocol": "http_dash_segments",
+                    "video_ext": "mp4",
+                    "audio_ext": "none",
+                    "vbr": 4482,
+                }],
+                {
+                    "en": [
+                        {
+                            "ext": "mp4",
+                            "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                            "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd",
+                            "fragment_base_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/",
+                            "protocol": "http_dash_segments",
+                        }
+                    ]
+                },
             )
         ]
 
-        for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
+        for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
             with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
                          mode='r', encoding='utf-8') as f:
-                formats = self.ie._parse_mpd_formats(
+                formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
                     compat_etree_fromstring(f.read().encode('utf-8')),
                     mpd_base_url=mpd_base_url, mpd_url=mpd_url)
                 self.ie._sort_formats(formats)
                 expect_value(self, formats, expected_formats, None)
+                expect_value(self, subtitles, expected_subtitles, None)
 
     def test_parse_f4m_formats(self):
         _TEST_CASES = [
diff --git a/test/testdata/mpd/subtitles.mpd b/test/testdata/mpd/subtitles.mpd
new file mode 100644
index 000000000..6f948adba
--- /dev/null
+++ b/test/testdata/mpd/subtitles.mpd
@@ -0,0 +1,351 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<MPD
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xmlns="urn:mpeg:dash:schema:mpd:2011"
+  xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
+  type="static"
+  mediaPresentationDuration="PT14M48S"
+  maxSegmentDuration="PT1M"
+  minBufferTime="PT10S"
+  profiles="urn:mpeg:dash:profile:isoff-live:2011">
+  <Period
+    id="1"
+    duration="PT14M48S">
+    <BaseURL>dash/</BaseURL>
+    <AdaptationSet
+      id="1"
+      group="1"
+      contentType="audio"
+      segmentAlignment="true"
+      audioSamplingRate="48000"
+      mimeType="audio/mp4"
+      codecs="mp4a.40.2"
+      startWithSAP="1">
+      <AudioChannelConfiguration
+        schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
+        value="2" />
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+      <SegmentTemplate
+        timescale="48000"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="96256" r="2" />
+          <S d="95232" />
+          <S d="3584" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="audio=128001"
+        bandwidth="128001">
+      </Representation>
+    </AdaptationSet>
+    <AdaptationSet
+      id="2"
+      group="3"
+      contentType="text"
+      lang="en"
+      mimeType="application/mp4"
+      codecs="stpp"
+      startWithSAP="1">
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
+      <SegmentTemplate
+        timescale="1000"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="60000" r="9" />
+          <S d="24000" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="textstream_eng=1000"
+        bandwidth="1000">
+      </Representation>
+    </AdaptationSet>
+    <AdaptationSet
+      id="3"
+      group="2"
+      contentType="video"
+      par="960:409"
+      minBandwidth="100000"
+      maxBandwidth="4482000"
+      maxWidth="1689"
+      maxHeight="720"
+      segmentAlignment="true"
+      mimeType="video/mp4"
+      codecs="avc1.4D401F"
+      startWithSAP="1">
+      <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
+      <SegmentTemplate
+        timescale="12288"
+        initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
+        media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
+        <SegmentTimeline>
+          <S t="0" d="24576" r="443" />
+        </SegmentTimeline>
+      </SegmentTemplate>
+      <Representation
+        id="video=100000"
+        bandwidth="100000"
+        width="336"
+        height="144"
+        sar="2880:2863"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=326000"
+        bandwidth="326000"
+        width="562"
+        height="240"
+        sar="115200:114929"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=698000"
+        bandwidth="698000"
+        width="844"
+        height="360"
+        sar="86400:86299"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=1493000"
+        bandwidth="1493000"
+        width="1126"
+        height="480"
+        sar="230400:230267"
+        scanType="progressive">
+      </Representation>
+      <Representation
+        id="video=4482000"
+        bandwidth="4482000"
+        width="1688"
+        height="720"
+        sar="86400:86299"
+        scanType="progressive">
+      </Representation>
+    </AdaptationSet>
+  </Period>
+</MPD>

From 5fbcebed8c41f4406d88318203c69efdc73f4e03 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Mon, 19 Apr 2021 12:40:29 +0200
Subject: [PATCH 11/31] [test] Test SSTR manifest parsing

---
 test/test_InfoExtractor.py        | 191 ++++++
 test/testdata/ism/sintel.Manifest | 988 ++++++++++++++++++++++++++++++
 2 files changed, 1179 insertions(+)
 create mode 100644 test/testdata/ism/sintel.Manifest

diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 5fbea0473..f3578efe1 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1212,6 +1212,197 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
                 expect_value(self, formats, expected_formats, None)
                 expect_value(self, subtitles, expected_subtitles, None)
 
+    def test_parse_ism_formats(self):
+        _TEST_CASES = [
+            (
+                'sintel',
+                'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
+                [{
+                    "format_id": "audio-128",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "ext": "isma",
+                    "tbr": 128,
+                    "asr": 48000,
+                    "vcodec": "none",
+                    "acodec": "AACL",
+                    "protocol": "ism",
+                    "_download_params": {
+                        "stream_type": "audio",
+                        "duration": 8880746666,
+                        "timescale": 10000000,
+                        "width": 0,
+                        "height": 0,
+                        "fourcc": "AACL",
+                        "codec_private_data": "1190",
+                        "sampling_rate": 48000,
+                        "channels": 2,
+                        "bits_per_sample": 16,
+                        "nal_unit_length_field": 4
+                    },
+                    "audio_ext": "isma",
+                    "video_ext": "none",
+                    "abr": 128,
+                }, {
+                    "format_id": "video-100",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "ext": "ismv",
+                    "width": 336,
+                    "height": 144,
+                    "tbr": 100,
+                    "vcodec": "AVC1",
+                    "acodec": "none",
+                    "protocol": "ism",
+                    "_download_params": {
+                        "stream_type": "video",
+                        "duration": 8880746666,
+                        "timescale": 10000000,
+                        "width": 336,
+                        "height": 144,
+                        "fourcc": "AVC1",
+                        "codec_private_data": "00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8",
+                        "channels": 2,
+                        "bits_per_sample": 16,
+                        "nal_unit_length_field": 4
+                    },
+                    "video_ext": "ismv",
+                    "audio_ext": "none",
+                    "vbr": 100,
+                }, {
+                    "format_id": "video-326",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "ext": "ismv",
+                    "width": 562,
+                    "height": 240,
+                    "tbr": 326,
+                    "vcodec": "AVC1",
+                    "acodec": "none",
+                    "protocol": "ism",
+                    "_download_params": {
+                        "stream_type": "video",
+                        "duration": 8880746666,
+                        "timescale": 10000000,
+                        "width": 562,
+                        "height": 240,
+                        "fourcc": "AVC1",
+                        "codec_private_data": "00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8",
+                        "channels": 2,
+                        "bits_per_sample": 16,
+                        "nal_unit_length_field": 4
+                    },
+                    "video_ext": "ismv",
+                    "audio_ext": "none",
+                    "vbr": 326,
+                }, {
+                    "format_id": "video-698",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "ext": "ismv",
+                    "width": 844,
+                    "height": 360,
+                    "tbr": 698,
+                    "vcodec": "AVC1",
+                    "acodec": "none",
+                    "protocol": "ism",
+                    "_download_params": {
+                        "stream_type": "video",
+                        "duration": 8880746666,
+                        "timescale": 10000000,
+                        "width": 844,
+                        "height": 360,
+                        "fourcc": "AVC1",
+                        "codec_private_data": "00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8",
+                        "channels": 2,
+                        "bits_per_sample": 16,
+                        "nal_unit_length_field": 4
+                    },
+                    "video_ext": "ismv",
+                    "audio_ext": "none",
+                    "vbr": 698,
+                }, {
+                    "format_id": "video-1493",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "ext": "ismv",
+                    "width": 1126,
+                    "height": 480,
+                    "tbr": 1493,
+                    "vcodec": "AVC1",
+                    "acodec": "none",
+                    "protocol": "ism",
+                    "_download_params": {
+                        "stream_type": "video",
+                        "duration": 8880746666,
+                        "timescale": 10000000,
+                        "width": 1126,
+                        "height": 480,
+                        "fourcc": "AVC1",
+                        "codec_private_data": "00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8",
+                        "channels": 2,
+                        "bits_per_sample": 16,
+                        "nal_unit_length_field": 4
+                    },
+                    "video_ext": "ismv",
+                    "audio_ext": "none",
+                    "vbr": 1493,
+                }, {
+                    "format_id": "video-4482",
+                    "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                    "ext": "ismv",
+                    "width": 1688,
+                    "height": 720,
+                    "tbr": 4482,
+                    "vcodec": "AVC1",
+                    "acodec": "none",
+                    "protocol": "ism",
+                    "_download_params": {
+                        "stream_type": "video",
+                        "duration": 8880746666,
+                        "timescale": 10000000,
+                        "width": 1688,
+                        "height": 720,
+                        "fourcc": "AVC1",
+                        "codec_private_data": "00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8",
+                        "channels": 2,
+                        "bits_per_sample": 16,
+                        "nal_unit_length_field": 4
+                    },
+                    "video_ext": "ismv",
+                    "audio_ext": "none",
+                    "vbr": 4482,
+                }],
+                {
+                    "eng": [
+                        {
+                            "ext": "ismt",
+                            "protocol": "ism",
+                            "url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                            "manifest_url": "https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest",
+                            "_download_params": {
+                                "stream_type": "text",
+                                "duration": 8880746666,
+                                "timescale": 10000000,
+                                "fourcc": "TTML",
+                                "codec_private_data": ""
+                            }
+                        }
+                    ]
+                },
+            ),
+        ]
+
+        for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
+            with io.open('./test/testdata/ism/%s.Manifest' % ism_file,
+                         mode='r', encoding='utf-8') as f:
+                formats, subtitles = self.ie._parse_ism_formats_and_subtitles(
+                    compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url)
+                self.ie._sort_formats(formats)
+                expect_value(self, formats, expected_formats, None)
+                expect_value(self, subtitles, expected_subtitles, None)
+
     def test_parse_f4m_formats(self):
         _TEST_CASES = [
             (
diff --git a/test/testdata/ism/sintel.Manifest b/test/testdata/ism/sintel.Manifest
new file mode 100644
index 000000000..2ff8c2447
--- /dev/null
+++ b/test/testdata/ism/sintel.Manifest
@@ -0,0 +1,988 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
+<SmoothStreamingMedia
+  MajorVersion="2"
+  MinorVersion="0"
+  TimeScale="10000000"
+  Duration="8880746666">
+  <StreamIndex
+    Type="audio"
+    QualityLevels="1"
+    TimeScale="10000000"
+    Name="audio"
+    Chunks="445"
+    Url="QualityLevels({bitrate})/Fragments(audio={start time})">
+    <QualityLevel
+      Index="0"
+      Bitrate="128001"
+      CodecPrivateData="1190"
+      SamplingRate="48000"
+      Channels="2"
+      BitsPerSample="16"
+      PacketSize="4"
+      AudioTag="255"
+      FourCC="AACL" />
+    <c t="0" d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="20053333" />
+    <c d="20053333" />
+    <c d="20053334" />
+    <c d="19840000" />
+    <c d="746666" />
+  </StreamIndex>
+  <StreamIndex
+    Type="text"
+    QualityLevels="1"
+    TimeScale="10000000"
+    Language="eng"
+    Subtype="CAPT"
+    Name="textstream_eng"
+    Chunks="11"
+    Url="QualityLevels({bitrate})/Fragments(textstream_eng={start time})">
+    <QualityLevel
+      Index="0"
+      Bitrate="1000"
+      CodecPrivateData=""
+      FourCC="TTML" />
+    <c t="0" d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="600000000" />
+    <c d="240000000" />
+  </StreamIndex>
+  <StreamIndex
+    Type="video"
+    QualityLevels="5"
+    TimeScale="10000000"
+    Name="video"
+    Chunks="444"
+    Url="QualityLevels({bitrate})/Fragments(video={start time})"
+    MaxWidth="1688"
+    MaxHeight="720"
+    DisplayWidth="1689"
+    DisplayHeight="720">
+    <QualityLevel
+      Index="0"
+      Bitrate="100000"
+      CodecPrivateData="00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8"
+      MaxWidth="336"
+      MaxHeight="144"
+      FourCC="AVC1" />
+    <QualityLevel
+      Index="1"
+      Bitrate="326000"
+      CodecPrivateData="00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8"
+      MaxWidth="562"
+      MaxHeight="240"
+      FourCC="AVC1" />
+    <QualityLevel
+      Index="2"
+      Bitrate="698000"
+      CodecPrivateData="00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8"
+      MaxWidth="844"
+      MaxHeight="360"
+      FourCC="AVC1" />
+    <QualityLevel
+      Index="3"
+      Bitrate="1493000"
+      CodecPrivateData="00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8"
+      MaxWidth="1126"
+      MaxHeight="480"
+      FourCC="AVC1" />
+    <QualityLevel
+      Index="4"
+      Bitrate="4482000"
+      CodecPrivateData="00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8"
+      MaxWidth="1688"
+      MaxHeight="720"
+      FourCC="AVC1" />
+    <c t="0" d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+    <c d="20000000" />
+  </StreamIndex>
+</SmoothStreamingMedia>

From 4a2f19abbd61274358211c2e3b1d9658cfbdcdde Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Wed, 28 Apr 2021 16:17:30 +0530
Subject: [PATCH 12/31] [downloader/hls] Assemble single-file WebVTT subtitles
 from HLS segments

---
 yt_dlp/compat.py           |  14 ++
 yt_dlp/downloader/hls.py   |  44 +++++
 yt_dlp/extractor/common.py |   6 +
 yt_dlp/webvtt.py           | 368 +++++++++++++++++++++++++++++++++++++
 4 files changed, 432 insertions(+)
 create mode 100644 yt_dlp/webvtt.py

diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py
index 3ebf1ee7a..863bd2287 100644
--- a/yt_dlp/compat.py
+++ b/yt_dlp/compat.py
@@ -3018,10 +3018,24 @@ else:
         return ctypes.WINFUNCTYPE(*args, **kwargs)
 
 
+try:
+    compat_Pattern = re.Pattern
+except AttributeError:
+    compat_Pattern = type(re.compile(''))
+
+
+try:
+    compat_Match = re.Match
+except AttributeError:
+    compat_Match = type(re.compile('').match(''))
+
+
 __all__ = [
     'compat_HTMLParseError',
     'compat_HTMLParser',
     'compat_HTTPError',
+    'compat_Match',
+    'compat_Pattern',
     'compat_Struct',
     'compat_b64decode',
     'compat_basestring',
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index f4e41a6c7..cee3807ce 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import errno
 import re
+import io
 import binascii
 try:
     from Crypto.Cipher import AES
@@ -27,7 +28,9 @@ from ..utils import (
     parse_m3u8_attributes,
     sanitize_open,
     update_url_query,
+    bug_reports_message,
 )
+from .. import webvtt
 
 
 class HlsFD(FragmentFD):
@@ -78,6 +81,8 @@ class HlsFD(FragmentFD):
         man_url = info_dict['url']
         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
 
+        is_webvtt = info_dict['ext'] == 'vtt'
+
         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
         man_url = urlh.geturl()
         s = urlh.read().decode('utf-8', 'ignore')
@@ -142,6 +147,8 @@ class HlsFD(FragmentFD):
         else:
             self._prepare_and_start_frag_download(ctx)
 
+        extra_state = ctx.setdefault('extra_state', {})
+
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
         test = self.params.get('test', False)
@@ -308,6 +315,42 @@ class HlsFD(FragmentFD):
 
                 return frag_content, frag_index
 
+            pack_fragment = lambda frag_content, _: frag_content
+
+            if is_webvtt:
+                def pack_fragment(frag_content, frag_index):
+                    output = io.StringIO()
+                    adjust = 0
+                    for block in webvtt.parse_fragment(frag_content):
+                        if isinstance(block, webvtt.CueBlock):
+                            block.start += adjust
+                            block.end += adjust
+                        elif isinstance(block, webvtt.Magic):
+                            # XXX: we do not handle MPEGTS overflow
+                            if frag_index == 1:
+                                extra_state['webvtt_mpegts'] = block.mpegts or 0
+                                extra_state['webvtt_local'] = block.local or 0
+                                # XXX: block.local = block.mpegts = None ?
+                            else:
+                                if block.mpegts is not None and block.local is not None:
+                                    adjust = (
+                                        (block.mpegts - extra_state.get('webvtt_mpegts', 0))
+                                        - (block.local - extra_state.get('webvtt_local', 0))
+                                    )
+                                continue
+                        elif isinstance(block, webvtt.HeaderBlock):
+                            if frag_index != 1:
+                                # XXX: this should probably be silent as well
+                                # or verify that all segments contain the same data
+                                self.report_warning(bug_reports_message(
+                                    'Discarding a %s block found in the middle of the stream; '
+                                    'if the subtitles display incorrectly,'
+                                    % (type(block).__name__)))
+                                continue
+                        block.write_into(output)
+
+                    return output.getvalue().encode('utf-8')
+
             def append_fragment(frag_content, frag_index):
                 if frag_content:
                     fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
@@ -315,6 +358,7 @@ class HlsFD(FragmentFD):
                         file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
                         ctx['fragment_filename_sanitized'] = frag_sanitized
                         file.close()
+                        frag_content = pack_fragment(frag_content, frag_index)
                         self._append_fragment(ctx, frag_content)
                         return True
                     except EnvironmentError as ose:
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 6257c17cd..803c7fa06 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2035,6 +2035,12 @@ class InfoExtractor(object):
                     'url': url,
                     'ext': determine_ext(url),
                 }
+                if sub_info['ext'] == 'm3u8':
+                    # Per RFC 8216 §3.1, the only possible subtitle format m3u8
+                    # files may contain is WebVTT:
+                    # <https://tools.ietf.org/html/rfc8216#section-3.1>
+                    sub_info['ext'] = 'vtt'
+                    sub_info['protocol'] = 'm3u8_native'
                 subtitles.setdefault(lang, []).append(sub_info)
             if media_type not in ('VIDEO', 'AUDIO'):
                 return
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py
new file mode 100644
index 000000000..4d026834a
--- /dev/null
+++ b/yt_dlp/webvtt.py
@@ -0,0 +1,368 @@
+# coding: utf-8
+from __future__ import unicode_literals, print_function, division
+
+"""
+A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
+to be able to assemble a single stand-alone subtitle file, suitably adjusting
+timestamps on the way, while everything else is passed through unmodified.
+
+Regular expressions based on the W3C WebVTT specification
+<https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
+in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
+"""
+
+import re
+import io
+from .utils import int_or_none
+from .compat import (
+    compat_str as str,
+    compat_Pattern,
+    compat_Match,
+)
+
+
+class _MatchParser(object):
+    """
+    An object that maintains the current parsing position and allows
+    conveniently advancing it as syntax elements are successfully parsed.
+    """
+
+    def __init__(self, string):
+        self._data = string
+        self._pos = 0
+
+    def match(self, r):
+        if isinstance(r, compat_Pattern):
+            return r.match(self._data, self._pos)
+        if isinstance(r, str):
+            if self._data.startswith(r, self._pos):
+                return len(r)
+            return None
+        raise ValueError(r)
+
+    def advance(self, by):
+        if by is None:
+            amt = 0
+        elif isinstance(by, compat_Match):
+            amt = len(by.group(0))
+        elif isinstance(by, str):
+            amt = len(by)
+        elif isinstance(by, int):
+            amt = by
+        else:
+            raise ValueError(by)
+        self._pos += amt
+        return by
+
+    def consume(self, r):
+        return self.advance(self.match(r))
+
+    def child(self):
+        return _MatchChildParser(self)
+
+
+class _MatchChildParser(_MatchParser):
+    """
+    A child parser state, which advances through the same data as
+    its parent, but has an independent position. This is useful when
+    advancing through syntax elements we might later want to backtrack
+    from.
+    """
+
+    def __init__(self, parent):
+        super(_MatchChildParser, self).__init__(parent._data)
+        self.__parent = parent
+        self._pos = parent._pos
+
+    def commit(self):
+        """
+        Advance the parent state to the current position of this child state.
+        """
+        self.__parent._pos = self._pos
+        return self.__parent
+
+
+class ParseError(Exception):
+    def __init__(self, parser):
+        super(ParseError, self).__init__("Parse error at position %u (near %r)" % (
+            parser._pos, parser._data[parser._pos:parser._pos + 20]
+        ))
+
+
+_REGEX_TS = re.compile(r'''(?x)
+    (?:([0-9]{2,}):)?
+    ([0-9]{2}):
+    ([0-9]{2})\.
+    ([0-9]{3})?
+''')
+_REGEX_EOF = re.compile(r'\Z')
+_REGEX_NL = re.compile(r'(?:\r\n|[\r\n])')
+_REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+')
+
+
+def _parse_ts(ts):
+    """
+    Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
+    into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
+    """
+
+    h, min, s, ms = ts.groups()
+    return 90 * (
+        int(h or 0) * 3600000 +  # noqa: W504,E221,E222
+        int(min)    *   60000 +  # noqa: W504,E221,E222
+        int(s)      *    1000 +  # noqa: W504,E221,E222
+        int(ms)                  # noqa: W504,E221,E222
+    )
+
+
+def _format_ts(ts):
+    """
+    Convert an MPEG PES timestamp into a WebVTT timestamp.
+    This will lose sub-millisecond precision.
+    """
+
+    ts = int((ts + 45) // 90)
+    ms , ts = divmod(ts, 1000)  # noqa: W504,E221,E222,E203
+    s  , ts = divmod(ts, 60)    # noqa: W504,E221,E222,E203
+    min, h  = divmod(ts, 60)    # noqa: W504,E221,E222
+    return '%02u:%02u:%02u.%03u' % (h, min, s, ms)
+
+
+class Block(object):
+    """
+    An abstract WebVTT block.
+    """
+
+    def __init__(self, **kwargs):
+        for key, val in kwargs.items():
+            setattr(self, key, val)
+
+    @classmethod
+    def parse(cls, parser):
+        m = parser.match(cls._REGEX)
+        if not m:
+            return None
+        parser.advance(m)
+        return cls(raw=m.group(0))
+
+    def write_into(self, stream):
+        stream.write(self.raw)
+
+
+class HeaderBlock(Block):
+    """
+    A WebVTT block that may only appear in the header part of the file,
+    i.e. before any cue blocks.
+    """
+
+    pass
+
+
+class Magic(HeaderBlock):
+    _REGEX = re.compile(r'\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n|[\r\n])')
+
+    # XXX: The X-TIMESTAMP-MAP extension is described in RFC 8216 §3.5
+    # <https://tools.ietf.org/html/rfc8216#section-3.5>, but the RFC
+    # doesn’t specify the exact grammar nor where in the WebVTT
+    # syntax it should be placed; the below has been devised based
+    # on usage in the wild
+    #
+    # And strictly speaking, the presence of this extension violates
+    # the W3C WebVTT spec. Oh well.
+
+    _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
+    _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
+    _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
+
+    @classmethod
+    def __parse_tsmap(cls, parser):
+        parser = parser.child()
+
+        while True:
+            m = parser.consume(cls._REGEX_TSMAP_LOCAL)
+            if m:
+                m = parser.consume(_REGEX_TS)
+                if m is None:
+                    raise ParseError(parser)
+                local = _parse_ts(m)
+                if local is None:
+                    raise ParseError(parser)
+            else:
+                m = parser.consume(cls._REGEX_TSMAP_MPEGTS)
+                if m:
+                    mpegts = int_or_none(m.group(1))
+                    if mpegts is None:
+                        raise ParseError(parser)
+                else:
+                    raise ParseError(parser)
+            if parser.consume(','):
+                continue
+            if parser.consume(_REGEX_NL):
+                break
+            raise ParseError(parser)
+
+        parser.commit()
+        return local, mpegts
+
+    @classmethod
+    def parse(cls, parser):
+        parser = parser.child()
+
+        m = parser.consume(cls._REGEX)
+        if not m:
+            raise ParseError(parser)
+
+        extra = m.group(1)
+        local, mpegts = None, None
+        if parser.consume(cls._REGEX_TSMAP):
+            local, mpegts = cls.__parse_tsmap(parser)
+        if not parser.consume(_REGEX_NL):
+            raise ParseError(parser)
+        parser.commit()
+        return cls(extra=extra, mpegts=mpegts, local=local)
+
+    def write_into(self, stream):
+        stream.write('WEBVTT')
+        if self.extra is not None:
+            stream.write(self.extra)
+        stream.write('\n')
+        if self.local or self.mpegts:
+            stream.write('X-TIMESTAMP-MAP=LOCAL:')
+            stream.write(_format_ts(self.local if self.local is not None else 0))
+            stream.write(',MPEGTS:')
+            stream.write(str(self.mpegts if self.mpegts is not None else 0))
+            stream.write('\n')
+        stream.write('\n')
+
+
+class StyleBlock(HeaderBlock):
+    _REGEX = re.compile(r'''(?x)
+        STYLE[\ \t]*(?:\r\n|[\r\n])
+        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+        (?:\r\n|[\r\n])
+    ''')
+
+
+class RegionBlock(HeaderBlock):
+    _REGEX = re.compile(r'''(?x)
+        REGION[\ \t]*
+        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+        (?:\r\n|[\r\n])
+    ''')
+
+
+class CommentBlock(Block):
+    _REGEX = re.compile(r'''(?x)
+        NOTE(?:\r\n|[\ \t\r\n])
+        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
+        (?:\r\n|[\r\n])
+    ''')
+
+
+class CueBlock(Block):
+    """
+    A cue block. The payload is not interpreted.
+    """
+
+    _REGEX_ID = re.compile(r'((?:(?!-->)[^\r\n])+)(?:\r\n|[\r\n])')
+    _REGEX_ARROW = re.compile(r'[ \t]+-->[ \t]+')
+    _REGEX_SETTINGS = re.compile(r'[ \t]+((?:(?!-->)[^\r\n])+)')
+    _REGEX_PAYLOAD = re.compile(r'[^\r\n]+(?:\r\n|[\r\n])?')
+
+    @classmethod
+    def parse(cls, parser):
+        parser = parser.child()
+
+        id = None
+        m = parser.consume(cls._REGEX_ID)
+        if m:
+            id = m.group(1)
+
+        m0 = parser.consume(_REGEX_TS)
+        if not m0:
+            return None
+        if not parser.consume(cls._REGEX_ARROW):
+            return None
+        m1 = parser.consume(_REGEX_TS)
+        if not m1:
+            return None
+        m2 = parser.consume(cls._REGEX_SETTINGS)
+        if not parser.consume(_REGEX_NL):
+            return None
+
+        start = _parse_ts(m0)
+        end = _parse_ts(m1)
+        settings = m2.group(1) if m2 is not None else None
+
+        text = io.StringIO()
+        while True:
+            m = parser.consume(cls._REGEX_PAYLOAD)
+            if not m:
+                break
+            text.write(m.group(0))
+
+        parser.commit()
+        return cls(
+            id=id,
+            start=start, end=end, settings=settings,
+            text=text.getvalue()
+        )
+
+    def write_into(self, stream):
+        if self.id is not None:
+            stream.write(self.id)
+            stream.write('\n')
+        stream.write(_format_ts(self.start))
+        stream.write(' --> ')
+        stream.write(_format_ts(self.end))
+        if self.settings is not None:
+            stream.write(' ')
+            stream.write(self.settings)
+        stream.write('\n')
+        stream.write(self.text)
+        stream.write('\n')
+
+
+def parse_fragment(frag_content):
+    """
+    A generator that yields (partially) parsed WebVTT blocks when given
+    a bytes object containing the raw contents of a WebVTT file.
+    """
+
+    parser = _MatchParser(frag_content.decode('utf-8'))
+
+    yield Magic.parse(parser)
+
+    while not parser.match(_REGEX_EOF):
+        if parser.consume(_REGEX_BLANK):
+            continue
+
+        block = RegionBlock.parse(parser)
+        if block:
+            yield block
+            continue
+        block = StyleBlock.parse(parser)
+        if block:
+            yield block
+            continue
+        block = CommentBlock.parse(parser)
+        if block:
+            yield block  # XXX: or skip
+            continue
+
+        break
+
+    while not parser.match(_REGEX_EOF):
+        if parser.consume(_REGEX_BLANK):
+            continue
+
+        block = CommentBlock.parse(parser)
+        if block:
+            yield block  # XXX: or skip
+            continue
+        block = CueBlock.parse(parser)
+        if block:
+            yield block
+            continue
+
+        raise ParseError(parser)

From 333217f43e58f93fc8088d4854044b907adddce5 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Fri, 23 Apr 2021 10:52:21 +0200
Subject: [PATCH 13/31] [downloader/hls] Remove duplicate cues using a sliding
 window of candidates

---
 yt_dlp/downloader/hls.py | 25 +++++++++++++++++++++++++
 yt_dlp/webvtt.py         | 10 ++++++++++
 2 files changed, 35 insertions(+)

diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index cee3807ce..c0e52d35d 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -325,6 +325,31 @@ class HlsFD(FragmentFD):
                         if isinstance(block, webvtt.CueBlock):
                             block.start += adjust
                             block.end += adjust
+
+                            dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
+                            cue = block.as_json
+
+                            # skip the cue if an identical one appears
+                            # in the window of potential duplicates
+                            # and prune the window of unviable candidates
+                            i = 0
+                            skip = True
+                            while i < len(dedup_window):
+                                window_cue = dedup_window[i]
+                                if window_cue == cue:
+                                    break
+                                if window_cue['end'] >= cue['start']:
+                                    i += 1
+                                    continue
+                                del dedup_window[i]
+                            else:
+                                skip = False
+
+                            if skip:
+                                continue
+
+                            # add the cue to the window
+                            dedup_window.append(cue)
                         elif isinstance(block, webvtt.Magic):
                             # XXX: we do not handle MPEGTS overflow
                             if frag_index == 1:
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py
index 4d026834a..a184ee369 100644
--- a/yt_dlp/webvtt.py
+++ b/yt_dlp/webvtt.py
@@ -322,6 +322,16 @@ class CueBlock(Block):
         stream.write(self.text)
         stream.write('\n')
 
+    @property
+    def as_json(self):
+        return {
+            'id': self.id,
+            'start': self.start,
+            'end': self.end,
+            'text': self.text,
+            'settings': self.settings,
+        }
+
 
 def parse_fragment(frag_content):
     """

From 15828bcf25adb2d9ce2e9e591cc527f695e50420 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Fri, 23 Apr 2021 10:55:09 +0200
Subject: [PATCH 14/31] [downloader/hls] Handle MPEG-2 PES timestamp overflow

---
 yt_dlp/downloader/hls.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index c0e52d35d..270b33b22 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -351,7 +351,16 @@ class HlsFD(FragmentFD):
                             # add the cue to the window
                             dedup_window.append(cue)
                         elif isinstance(block, webvtt.Magic):
-                            # XXX: we do not handle MPEGTS overflow
+                            # take care of MPEG PES timestamp overflow
+                            if block.mpegts is None:
+                                block.mpegts = 0
+                            extra_state.setdefault('webvtt_mpegts_adjust', 0)
+                            block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
+                            if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
+                                extra_state['webvtt_mpegts_adjust'] += 1
+                                block.mpegts += 1 << 33
+                            extra_state['webvtt_mpegts_last'] = block.mpegts
+
                             if frag_index == 1:
                                 extra_state['webvtt_mpegts'] = block.mpegts or 0
                                 extra_state['webvtt_local'] = block.local or 0

From 66a1b8643ad3ef492dfd55692dce6be66397e2b4 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 12:48:39 +0200
Subject: [PATCH 15/31] [downloader/ism] Support muxing TTML subtitles

---
 yt_dlp/downloader/ism.py   | 11 +++++++++--
 yt_dlp/extractor/common.py |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index 103064df1..07d74aef0 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -118,8 +118,7 @@ def write_piff_header(stream, params):
         vmhd_payload += u16.pack(0) * 3  # opcolor
         media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header
     elif stream_type == 'text':
-        sthd_payload = u16.pack(0) * 2
-        media_header_box = full_box(b'sthd', 0, 1, sthd_payload)  # Subtitle Media Header
+        media_header_box = full_box(b'sthd', 0, 0, b'')  # Subtitle Media Header
     else:
         assert False
     minf_payload = media_header_box
@@ -173,6 +172,14 @@ def write_piff_header(stream, params):
             sample_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry
         else:
             assert False
+    elif stream_type == 'text':
+        if fourcc == 'TTML':
+            sample_entry_payload += b'http://www.w3.org/ns/ttml\0'  # namespace
+            sample_entry_payload += b'\0'  # schema location
+            sample_entry_payload += b'\0'  # auxilary mime types(??)
+            sample_entry_box = box(b'stpp', sample_entry_payload)
+        else:
+            assert False
     else:
         assert False
     stsd_payload += sample_entry_box
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 803c7fa06..2ca25951b 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2816,7 +2816,7 @@ class InfoExtractor(object):
             for track in stream.findall('QualityLevel'):
                 fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
                 # TODO: add support for WVC1 and WMAP
-                if fourcc not in ('H264', 'AVC1', 'AACL'):
+                if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
                     self.report_warning('%s is not a supported codec' % fourcc)
                     continue
                 tbr = int(track.attrib['Bitrate']) // 1000

From c26326c1bebaf7c784d470b6a9502bac4dc37e96 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 15 Apr 2021 10:15:43 +0200
Subject: [PATCH 16/31] [generic] Extract subtitles from direct HLS manifest
 links

---
 yt_dlp/extractor/generic.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 4250d1093..e41ea1260 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2444,8 +2444,9 @@ class GenericIE(InfoExtractor):
         m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
         if m:
             format_id = compat_str(m.group('format_id'))
+            subtitles = {}
             if format_id.endswith('mpegurl'):
-                formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+                formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
             elif format_id == 'f4m':
                 formats = self._extract_f4m_formats(url, video_id)
             else:
@@ -2457,6 +2458,7 @@ class GenericIE(InfoExtractor):
                 info_dict['direct'] = True
             self._sort_formats(formats)
             info_dict['formats'] = formats
+            info_dict['subtitles'] = subtitles
             return info_dict
 
         if not self._downloader.params.get('test', False) and not is_intentional:

From 7de27caf1633a58c92ecea800017f0fc103fecc3 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 08:41:28 +0200
Subject: [PATCH 17/31] [generic] Extract subtitles from direct DASH manifest
 links

---
 yt_dlp/extractor/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index e41ea1260..f3d50312f 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2526,7 +2526,7 @@ class GenericIE(InfoExtractor):
                         xspf_base_url=full_response.geturl()),
                     video_id)
             elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
-                info_dict['formats'] = self._parse_mpd_formats(
+                info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
                     doc,
                     mpd_base_url=full_response.geturl().rpartition('/')[0],
                     mpd_url=url)

From 7a450a3b1c2a6f7140083aa2fae416de8b9f7f3c Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 14:26:32 +0200
Subject: [PATCH 18/31] [generic] Extract subtitles from direct SSTR manifest
 links

---
 yt_dlp/extractor/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index f3d50312f..32815476f 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2512,7 +2512,7 @@ class GenericIE(InfoExtractor):
             if doc.tag == 'rss':
                 return self._extract_rss(url, video_id, doc)
             elif doc.tag == 'SmoothStreamingMedia':
-                info_dict['formats'] = self._parse_ism_formats(doc, url)
+                info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
                 self._sort_formats(info_dict['formats'])
                 return info_dict
             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):

From 64a5cf7929f0df9220fd25f404dd19179f44623c Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 08:40:39 +0200
Subject: [PATCH 19/31] [byutv] Extract subtitles from streaming manifests

---
 yt_dlp/extractor/byutv.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py
index 0b11bf11f..7c6c826d7 100644
--- a/yt_dlp/extractor/byutv.py
+++ b/yt_dlp/extractor/byutv.py
@@ -82,6 +82,7 @@ class BYUtvIE(InfoExtractor):
 
         info = {}
         formats = []
+        subtitles = {}
         for format_id, ep in video.items():
             if not isinstance(ep, dict):
                 continue
@@ -90,12 +91,16 @@ class BYUtvIE(InfoExtractor):
                 continue
             ext = determine_ext(video_url)
             if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                     video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    m3u8_id='hls', fatal=False)
+                formats.extend(m3u8_fmts)
+                subtitles = self._merge_subtitles(subtitles, m3u8_subs)
             elif ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    video_url, video_id, mpd_id='dash', fatal=False))
+                mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
+                    video_url, video_id, mpd_id='dash', fatal=False)
+                formats.extend(mpd_fmts)
+                subtitles = self._merge_subtitles(subtitles, mpd_subs)
             else:
                 formats.append({
                     'url': video_url,
@@ -114,4 +119,5 @@ class BYUtvIE(InfoExtractor):
             'display_id': display_id,
             'title': display_id,
             'formats': formats,
+            'subtitles': subtitles,
         })

From 0c541b563f566dab1ccdd8e31ba0ac2959670248 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 15 Apr 2021 10:19:32 +0200
Subject: [PATCH 20/31] [tv4] Extract subtitles from streaming manifests

---
 yt_dlp/extractor/tv4.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/yt_dlp/extractor/tv4.py b/yt_dlp/extractor/tv4.py
index b8ad4fafc..4043e6366 100644
--- a/yt_dlp/extractor/tv4.py
+++ b/yt_dlp/extractor/tv4.py
@@ -93,18 +93,31 @@ class TV4IE(InfoExtractor):
                 'device': 'browser',
                 'protocol': 'hls',
             })['playbackItem']['manifestUrl']
-        formats = self._extract_m3u8_formats(
+        formats = []
+        subtitles = {}
+
+        fmts, subs = self._extract_m3u8_formats_and_subtitles(
             manifest_url, video_id, 'mp4',
             'm3u8_native', m3u8_id='hls', fatal=False)
-        formats.extend(self._extract_mpd_formats(
+        formats.extend(fmts)
+        subtitles = self._merge_subtitles(subtitles, subs)
+
+        fmts, subs = self._extract_mpd_formats_and_subtitles(
             manifest_url.replace('.m3u8', '.mpd'),
-            video_id, mpd_id='dash', fatal=False))
-        formats.extend(self._extract_f4m_formats(
+            video_id, mpd_id='dash', fatal=False)
+        formats.extend(fmts)
+        subtitles = self._merge_subtitles(subtitles, subs)
+
+        fmts = self._extract_f4m_formats(
             manifest_url.replace('.m3u8', '.f4m'),
-            video_id, f4m_id='hds', fatal=False))
-        formats.extend(self._extract_ism_formats(
+            video_id, f4m_id='hds', fatal=False)
+        formats.extend(fmts)
+
+        fmts, subs = self._extract_ism_formats_and_subtitles(
             re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
-            video_id, ism_id='mss', fatal=False))
+            video_id, ism_id='mss', fatal=False)
+        formats.extend(fmts)
+        subtitles = self._merge_subtitles(subtitles, subs)
 
         if not formats and info.get('is_geo_restricted'):
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
@@ -115,7 +128,7 @@ class TV4IE(InfoExtractor):
             'id': video_id,
             'title': title,
             'formats': formats,
-            # 'subtitles': subtitles,
+            'subtitles': subtitles,
             'description': info.get('description'),
             'timestamp': parse_iso8601(info.get('broadcast_date_time')),
             'duration': int_or_none(info.get('duration')),

From a00d781b730b052d8a6486a03854ca4122389af8 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 02:09:48 +0200
Subject: [PATCH 21/31] [elonet] Use common code for subtitle extraction

---
 yt_dlp/extractor/elonet.py | 82 ++++++++------------------------------
 1 file changed, 17 insertions(+), 65 deletions(-)

diff --git a/yt_dlp/extractor/elonet.py b/yt_dlp/extractor/elonet.py
index 3647c0a9c..eefba4e24 100644
--- a/yt_dlp/extractor/elonet.py
+++ b/yt_dlp/extractor/elonet.py
@@ -1,9 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import os
 import re
-import tempfile
 
 from .common import InfoExtractor
 from ..utils import (
@@ -12,12 +10,12 @@ from ..utils import (
     try_get,
 )
 from ..compat import compat_str
-from ..downloader.hls import HlsFD
 
 
 class ElonetIE(InfoExtractor):
     _VALID_URL = r'https?://elonet\.finna\.fi/Record/kavi\.elonet_elokuva_(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
+        # m3u8 with subtitles
         'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_107867',
         'md5': '8efc954b96c543711707f87de757caea',
         'info_dict': {
@@ -27,62 +25,17 @@ class ElonetIE(InfoExtractor):
             'description': 'Valkoinen peura (1952) on Erik Blombergin ohjaama ja yhdessä Mirjami Kuosmasen kanssa käsikirjoittama tarunomainen kertomus valkoisen peuran hahmossa lii...',
             'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_107867&index=0&size=large',
         },
-    }
-
-    def _download_m3u8_chunked_subtitle(self, chunklist_url):
-        """
-        Download VTT subtitles from pieces in manifest URL.
-        Return a string containing joined chunks with extra headers removed.
-        """
-        with tempfile.NamedTemporaryFile(delete=True) as outfile:
-            fname = outfile.name
-        hlsdl = HlsFD(self._downloader, {})
-        hlsdl.download(compat_str(fname), {"url": chunklist_url})
-        with open(fname, 'r') as fin:
-            # Remove (some) headers
-            fdata = re.sub(r'X-TIMESTAMP-MAP.*\n+|WEBVTT\n+', '', fin.read())
-        os.remove(fname)
-        return "WEBVTT\n\n" + fdata
-
-    def _parse_m3u8_subtitles(self, m3u8_doc, m3u8_url):
-        """
-        Parse subtitles from HLS / m3u8 manifest.
-        """
-        subtitles = {}
-        baseurl = m3u8_url[:m3u8_url.rindex('/') + 1]
-        for line in m3u8_doc.split('\n'):
-            if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line:
-                lang = self._search_regex(
-                    r'LANGUAGE="(.+?)"', line, 'lang', default=False)
-                uri = self._search_regex(
-                    r'URI="(.+?)"', line, 'uri', default=False)
-                if lang and uri:
-                    data = self._download_m3u8_chunked_subtitle(baseurl + uri)
-                    subtitles[lang] = [{'ext': 'vtt', 'data': data}]
-        return subtitles
-
-    def _parse_mpd_subtitles(self, mpd_doc):
-        """
-        Parse subtitles from MPD manifest.
-        """
-        ns = '{urn:mpeg:dash:schema:mpd:2011}'
-        subtitles = {}
-        for aset in mpd_doc.findall(".//%sAdaptationSet[@mimeType='text/vtt']" % (ns)):
-            lang = aset.attrib.get('lang', 'unk')
-            url = aset.find("./%sRepresentation/%sBaseURL" % (ns, ns)).text
-            subtitles[lang] = [{'ext': 'vtt', 'url': url}]
-        return subtitles
-
-    def _get_subtitles(self, fmt, doc, url):
-        if fmt == 'm3u8':
-            subs = self._parse_m3u8_subtitles(doc, url)
-        elif fmt == 'mpd':
-            subs = self._parse_mpd_subtitles(doc)
-        else:
-            self.report_warning(
-                "Cannot download subtitles from '%s' streams." % (fmt))
-            subs = {}
-        return subs
+    }, {
+        # DASH with subtitles
+        'url': 'https://elonet.finna.fi/Record/kavi.elonet_elokuva_116539',
+        'info_dict': {
+            'id': '116539',
+            'ext': 'mp4',
+            'title': 'Minulla on tiikeri',
+            'description': 'Pienellä pojalla, joka asuu kerrostalossa, on kotieläimenä tiikeri. Se on kuitenkin salaisuus. Kerrostalon räpätäti on Kotilaisen täti, joka on aina vali...',
+            'thumbnail': 'https://elonet.finna.fi/Cover/Show?id=kavi.elonet_elokuva_116539&index=0&size=large&source=Solr',
+        }
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -101,8 +54,8 @@ class ElonetIE(InfoExtractor):
             self._parse_json(json_s, video_id),
             lambda x: x[0]["src"], compat_str)
         formats = []
+        subtitles = {}
         if re.search(r'\.m3u8\??', src):
-            fmt = 'm3u8'
             res = self._download_webpage_handle(
                 # elonet servers have certificate problems
                 src.replace('https:', 'http:'), video_id,
@@ -111,11 +64,10 @@ class ElonetIE(InfoExtractor):
             if res:
                 doc, urlh = res
                 url = urlh.geturl()
-                formats = self._parse_m3u8_formats(doc, url)
+                formats, subtitles = self._parse_m3u8_formats_and_subtitles(doc, url)
                 for f in formats:
                     f['ext'] = 'mp4'
         elif re.search(r'\.mpd\??', src):
-            fmt = 'mpd'
             res = self._download_xml_handle(
                 src, video_id,
                 note='Downloading MPD manifest',
@@ -123,7 +75,7 @@ class ElonetIE(InfoExtractor):
             if res:
                 doc, urlh = res
                 url = base_url(urlh.geturl())
-                formats = self._parse_mpd_formats(doc, mpd_base_url=url)
+                formats, subtitles = self._parse_mpd_formats_and_subtitles(doc, mpd_base_url=url)
         else:
             raise ExtractorError("Unknown streaming format")
 
@@ -133,5 +85,5 @@ class ElonetIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'formats': formats,
-            'subtitles': self.extract_subtitles(fmt, doc, url),
+            'subtitles': subtitles,
         }

From 015c10aeec3bc6d513de16f6fe8c36f382956126 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 15 Apr 2021 10:18:13 +0200
Subject: [PATCH 22/31] [roosterteeth] Use common code for subtitle extraction

---
 yt_dlp/extractor/roosterteeth.py | 31 ++-----------------------------
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py
index 0724cef26..2c815bda6 100644
--- a/yt_dlp/extractor/roosterteeth.py
+++ b/yt_dlp/extractor/roosterteeth.py
@@ -103,7 +103,7 @@ class RoosterTeethIE(InfoExtractor):
                 api_episode_url + '/videos', display_id,
                 'Downloading video JSON metadata')['data'][0]
             m3u8_url = video_data['attributes']['url']
-            subtitle_m3u8_url = video_data['links']['download']
+            # XXX: additional URL at video_data['links']['download']
         except ExtractorError as e:
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                 if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
@@ -111,7 +111,7 @@ class RoosterTeethIE(InfoExtractor):
                         '%s is only available for FIRST members' % display_id)
             raise
 
-        formats = self._extract_m3u8_formats(
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
             m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
         self._sort_formats(formats)
 
@@ -134,33 +134,6 @@ class RoosterTeethIE(InfoExtractor):
                             'url': img_url,
                         })
 
-        subtitles = {}
-        res = self._download_webpage_handle(
-            subtitle_m3u8_url, display_id,
-            'Downloading m3u8 information',
-            'Failed to download m3u8 information',
-            fatal=True, data=None, headers={}, query={})
-        if res is not False:
-            subtitle_m3u8_doc, _ = res
-            for line in subtitle_m3u8_doc.split('\n'):
-                if 'EXT-X-MEDIA:TYPE=SUBTITLES' in line:
-                    parts = line.split(',')
-                    for part in parts:
-                        if 'LANGUAGE' in part:
-                            lang = part[part.index('=') + 2:-1]
-                        elif 'URI' in part:
-                            uri = part[part.index('=') + 2:-1]
-                    res = self._download_webpage_handle(
-                        uri, display_id,
-                        'Downloading m3u8 information',
-                        'Failed to download m3u8 information',
-                        fatal=True, data=None, headers={}, query={})
-                    doc, _ = res
-                    for l in doc.split('\n'):
-                        if not l.startswith('#'):
-                            subtitles[lang] = [{'url': uri[:-uri[::-1].index('/')] + l}]
-                            break
-
         return {
             'id': video_id,
             'display_id': display_id,

From 47f4203dd3cf0f75eee2b61b7be8a84aa4947f26 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 15 Apr 2021 10:14:46 +0200
Subject: [PATCH 23/31] [nytimes] Extract subtitles from HLS manifests

---
 yt_dlp/extractor/nytimes.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py
index 1f03a9462..99964737d 100644
--- a/yt_dlp/extractor/nytimes.py
+++ b/yt_dlp/extractor/nytimes.py
@@ -46,6 +46,7 @@ class NYTimesBaseIE(InfoExtractor):
 
         urls = []
         formats = []
+        subtitles = {}
         for video in video_data.get('renditions', []):
             video_url = video.get('url')
             format_id = video.get('type')
@@ -54,9 +55,11 @@ class NYTimesBaseIE(InfoExtractor):
             urls.append(video_url)
             ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
             if ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                     video_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id=format_id or 'hls', fatal=False))
+                    m3u8_id=format_id or 'hls', fatal=False)
+                formats.extend(m3u8_fmts)
+                subtitles = self._merge_subtitles(subtitles, m3u8_subs)
             elif ext == 'mpd':
                 continue
             #     formats.extend(self._extract_mpd_formats(
@@ -96,6 +99,7 @@ class NYTimesBaseIE(InfoExtractor):
             'uploader': video_data.get('byline'),
             'duration': float_or_none(video_data.get('duration'), 1000),
             'formats': formats,
+            'subtitles': subtitles,
             'thumbnails': thumbnails,
         }
 

From efe9dba595cb8b1c129b9e65541adf5febf421e6 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 15 Apr 2021 10:16:15 +0200
Subject: [PATCH 24/31] [srgssr] Extract subtitles from HLS manifests

---
 yt_dlp/extractor/srgssr.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/srgssr.py b/yt_dlp/extractor/srgssr.py
index ac018e740..2977b5e67 100644
--- a/yt_dlp/extractor/srgssr.py
+++ b/yt_dlp/extractor/srgssr.py
@@ -87,6 +87,7 @@ class SRGSSRIE(InfoExtractor):
         title = media_data['title']
 
         formats = []
+        subtitles = {}
         q = qualities(['SD', 'HD'])
         for source in (media_data.get('resourceList') or []):
             format_url = source.get('url')
@@ -104,12 +105,16 @@ class SRGSSRIE(InfoExtractor):
                 if source.get('tokenType') == 'AKAMAI':
                     format_url = self._get_tokenized_src(
                         format_url, media_id, format_id)
-                    formats.extend(self._extract_akamai_formats(
-                        format_url, media_id))
+                    fmts, subs = self._extract_akamai_formats_and_subtitles(
+                        format_url, media_id)
+                    formats.extend(fmts)
+                    subtitles = self._merge_subtitles(subtitles, subs)
                 elif protocol == 'HLS':
-                    formats.extend(self._extract_m3u8_formats(
+                    m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                         format_url, media_id, 'mp4', 'm3u8_native',
-                        m3u8_id=format_id, fatal=False))
+                        m3u8_id=format_id, fatal=False)
+                    formats.extend(m3u8_fmts)
+                    subtitles = self._merge_subtitles(subtitles, m3u8_subs)
             elif protocol in ('HTTP', 'HTTPS'):
                 formats.append({
                     'format_id': format_id,
@@ -133,7 +138,6 @@ class SRGSSRIE(InfoExtractor):
                 })
         self._sort_formats(formats)
 
-        subtitles = {}
         if media_type == 'video':
             for sub in (media_data.get('subtitleList') or []):
                 sub_url = sub.get('url')

From 4bed436371ea43c9068f5dee9cfda38dfef0b719 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Thu, 15 Apr 2021 14:12:59 +0200
Subject: [PATCH 25/31] [twitter] Extract subtitles from HLS manifests

---
 yt_dlp/extractor/twitter.py | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 8a2a77b71..63c11bd47 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -36,9 +36,9 @@ class TwitterBaseIE(InfoExtractor):
     def _extract_variant_formats(self, variant, video_id):
         variant_url = variant.get('url')
         if not variant_url:
-            return []
+            return [], {}
         elif '.m3u8' in variant_url:
-            return self._extract_m3u8_formats(
+            return self._extract_m3u8_formats_and_subtitles(
                 variant_url, video_id, 'mp4', 'm3u8_native',
                 m3u8_id='hls', fatal=False)
         else:
@@ -49,22 +49,27 @@ class TwitterBaseIE(InfoExtractor):
                 'tbr': tbr,
             }
             self._search_dimensions_in_video_url(f, variant_url)
-            return [f]
+            return [f], {}
 
     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
         vmap_data = self._download_xml(vmap_url, video_id)
         formats = []
+        subtitles = {}
         urls = []
         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
             video_variant.attrib['url'] = compat_urllib_parse_unquote(
                 video_variant.attrib['url'])
             urls.append(video_variant.attrib['url'])
-            formats.extend(self._extract_variant_formats(
-                video_variant.attrib, video_id))
+            fmts, subs = self._extract_variant_formats(
+                video_variant.attrib, video_id)
+            formats.extend(fmts)
+            subtitles = self._merge_subtitles(subtitles, subs)
         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
         if video_url not in urls:
-            formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
-        return formats
+            fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
+            formats.extend(fmts)
+            subtitles = self._merge_subtitles(subtitles, subs)
+        return formats, subtitles
 
     @staticmethod
     def _search_dimensions_in_video_url(a_format, video_url):
@@ -471,8 +476,11 @@ class TwitterIE(TwitterBaseIE):
             video_info = media.get('video_info') or {}
 
             formats = []
+            subtitles = {}
             for variant in video_info.get('variants', []):
-                formats.extend(self._extract_variant_formats(variant, twid))
+                fmts, subs = self._extract_variant_formats(variant, twid)
+                subtitles = self._merge_subtitles(subtitles, subs)
+                formats.extend(fmts)
             self._sort_formats(formats)
 
             thumbnails = []
@@ -491,6 +499,7 @@ class TwitterIE(TwitterBaseIE):
 
             info.update({
                 'formats': formats,
+                'subtitles': subtitles,
                 'thumbnails': thumbnails,
                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
             })
@@ -540,7 +549,7 @@ class TwitterIE(TwitterBaseIE):
                     is_amplify = card_name == 'amplify'
                     vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
                     content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
-                    formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+                    formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
                     self._sort_formats(formats)
 
                     thumbnails = []
@@ -558,6 +567,7 @@ class TwitterIE(TwitterBaseIE):
 
                     info.update({
                         'formats': formats,
+                        'subtitles': subtitles,
                         'thumbnails': thumbnails,
                         'duration': int_or_none(get_binding_value(
                             'content_duration_seconds')),

From 2de3b21e05a619047ca4ae5af7932cca0ae3a5cb Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 08:41:46 +0200
Subject: [PATCH 26/31] [uplynk] Extract subtitles from HLS manifests

---
 yt_dlp/extractor/uplynk.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py
index f06bf5b12..c0dba0a6a 100644
--- a/yt_dlp/extractor/uplynk.py
+++ b/yt_dlp/extractor/uplynk.py
@@ -30,7 +30,7 @@ class UplynkIE(InfoExtractor):
     def _extract_uplynk_info(self, uplynk_content_url):
         path, external_id, video_id, session_id = re.match(UplynkIE._VALID_URL, uplynk_content_url).groups()
         display_id = video_id or external_id
-        formats = self._extract_m3u8_formats(
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
             'http://content.uplynk.com/%s.m3u8' % path,
             display_id, 'mp4', 'm3u8_native')
         if session_id:
@@ -48,6 +48,7 @@ class UplynkIE(InfoExtractor):
             'duration': float_or_none(asset.get('duration')),
             'uploader_id': asset.get('owner'),
             'formats': formats,
+            'subtitles': subtitles,
         }
 
     def _real_extract(self, url):

From b2cd5da460d4ee2d376f0af68bdaadce93d0dd58 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 15:47:49 +0200
Subject: [PATCH 27/31] [francetv] Extract subtitles from the HLS manifest

---
 yt_dlp/extractor/francetv.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py
index 313de343e..e57e165fc 100644
--- a/yt_dlp/extractor/francetv.py
+++ b/yt_dlp/extractor/francetv.py
@@ -151,6 +151,7 @@ class FranceTVIE(InfoExtractor):
                     videos.append(fallback_info['video'])
 
         formats = []
+        subtitles = {}
         for video in videos:
             video_url = video.get('url')
             if not video_url:
@@ -171,10 +172,12 @@ class FranceTVIE(InfoExtractor):
                     sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
                     video_id, f4m_id=format_id, fatal=False))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                     sign(video_url, format_id), video_id, 'mp4',
                     entry_protocol='m3u8_native', m3u8_id=format_id,
-                    fatal=False))
+                    fatal=False)
+                formats.extend(m3u8_fmts)
+                subtitles = self._merge_subtitles(subtitles, m3u8_subs)
             elif ext == 'mpd':
                 formats.extend(self._extract_mpd_formats(
                     sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
@@ -199,13 +202,12 @@ class FranceTVIE(InfoExtractor):
             title += ' - %s' % subtitle
         title = title.strip()
 
-        subtitles = {}
-        subtitles_list = [{
-            'url': subformat['url'],
-            'ext': subformat.get('format'),
-        } for subformat in info.get('subtitles', []) if subformat.get('url')]
-        if subtitles_list:
-            subtitles['fr'] = subtitles_list
+        subtitles.setdefault('fr', []).extend(
+            [{
+                'url': subformat['url'],
+                'ext': subformat.get('format'),
+            } for subformat in info.get('subtitles', []) if subformat.get('url')]
+        )
 
         return {
             'id': video_id,

From c811e8d8bdff723930a1e2def6f144499af98bde Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 16:45:04 +0200
Subject: [PATCH 28/31] [atresplayer] Extract subtitles from streaming
 manifests

---
 yt_dlp/extractor/atresplayer.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py
index c2cec9845..4afde8f90 100644
--- a/yt_dlp/extractor/atresplayer.py
+++ b/yt_dlp/extractor/atresplayer.py
@@ -86,18 +86,19 @@ class AtresPlayerIE(InfoExtractor):
         title = episode['titulo']
 
         formats = []
+        subtitles = {}
         for source in episode.get('sources', []):
             src = source.get('src')
             if not src:
                 continue
             src_type = source.get('type')
             if src_type == 'application/vnd.apple.mpegurl':
-                formats.extend(self._extract_m3u8_formats(
+                formats, subtitles = self._extract_m3u8_formats(
                     src, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    m3u8_id='hls', fatal=False)
             elif src_type == 'application/dash+xml':
-                formats.extend(self._extract_mpd_formats(
-                    src, video_id, mpd_id='dash', fatal=False))
+                formats, subtitles = self._extract_mpd_formats(
+                    src, video_id, mpd_id='dash', fatal=False)
         self._sort_formats(formats)
 
         heartbeat = episode.get('heartbeat') or {}
@@ -115,4 +116,5 @@ class AtresPlayerIE(InfoExtractor):
             'channel': get_meta('channel'),
             'season': get_meta('season'),
             'episode_number': int_or_none(get_meta('episodeNumber')),
+            'subtitles': subtitles,
         }

From ec4f374c05d96c5d49836a2a5bb5dc893b9efedc Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Sun, 18 Apr 2021 16:55:14 +0200
Subject: [PATCH 29/31] [wat] Extract subtitles from streaming manifests

---
 yt_dlp/extractor/wat.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py
index 05dcc1f17..0f1d08da3 100644
--- a/yt_dlp/extractor/wat.py
+++ b/yt_dlp/extractor/wat.py
@@ -69,19 +69,24 @@ class WatIE(InfoExtractor):
         title = video_info['title']
 
         formats = []
+        subtitles = {}
 
         def extract_formats(manifest_urls):
             for f, f_url in manifest_urls.items():
                 if not f_url:
                     continue
                 if f in ('dash', 'mpd'):
-                    formats.extend(self._extract_mpd_formats(
+                    fmts, subs = self._extract_mpd_formats_and_subtitles(
                         f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
-                        video_id, mpd_id='dash', fatal=False))
+                        video_id, mpd_id='dash', fatal=False)
                 elif f == 'hls':
-                    formats.extend(self._extract_m3u8_formats(
+                    fmts, subs = self._extract_m3u8_formats_and_subtitles(
                         f_url, video_id, 'mp4',
-                        'm3u8_native', m3u8_id='hls', fatal=False))
+                        'm3u8_native', m3u8_id='hls', fatal=False)
+                else:
+                    continue
+                formats.extend(fmts)
+                self._merge_subtitles(subs, target=subtitles)
 
         delivery = video_data.get('delivery') or {}
         extract_formats({delivery.get('format'): delivery.get('url')})
@@ -103,4 +108,5 @@ class WatIE(InfoExtractor):
                 video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])),
             'duration': int_or_none(video_info.get('duration')),
             'formats': formats,
+            'subtitles': subtitles,
         }

From e0e624ca7f72b5191a3e3b1d96cc6a7db3676143 Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Mon, 19 Apr 2021 18:57:25 +0200
Subject: [PATCH 30/31] [canvas] Extract subtitles from streaming manifests

---
 yt_dlp/extractor/canvas.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py
index eefbab241..1b7c1d2ff 100644
--- a/yt_dlp/extractor/canvas.py
+++ b/yt_dlp/extractor/canvas.py
@@ -83,24 +83,31 @@ class CanvasIE(InfoExtractor):
         description = data.get('description')
 
         formats = []
+        subtitles = {}
         for target in data['targetUrls']:
             format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
             if not format_url or not format_type:
                 continue
             format_type = format_type.upper()
             if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
-                formats.extend(self._extract_m3u8_formats(
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                     format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
-                    m3u8_id=format_type, fatal=False))
+                    m3u8_id=format_type, fatal=False)
+                formats.extend(fmts)
+                subtitles = self._merge_subtitles(subtitles, subs)
             elif format_type == 'HDS':
                 formats.extend(self._extract_f4m_formats(
                     format_url, video_id, f4m_id=format_type, fatal=False))
             elif format_type == 'MPEG_DASH':
-                formats.extend(self._extract_mpd_formats(
-                    format_url, video_id, mpd_id=format_type, fatal=False))
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
+                    format_url, video_id, mpd_id=format_type, fatal=False)
+                formats.extend(fmts)
+                subtitles = self._merge_subtitles(subtitles, subs)
             elif format_type == 'HSS':
-                formats.extend(self._extract_ism_formats(
-                    format_url, video_id, ism_id='mss', fatal=False))
+                fmts, subs = self._extract_ism_formats_and_subtitles(
+                    format_url, video_id, ism_id='mss', fatal=False)
+                formats.extend(fmts)
+                subtitles = self._merge_subtitles(subtitles, subs)
             else:
                 formats.append({
                     'format_id': format_type,
@@ -108,7 +115,6 @@ class CanvasIE(InfoExtractor):
                 })
         self._sort_formats(formats)
 
-        subtitles = {}
         subtitle_urls = data.get('subtitleUrls')
         if isinstance(subtitle_urls, list):
             for subtitle in subtitle_urls:

From e8f834cd8dfc07011d1080321e42bc130e7201bb Mon Sep 17 00:00:00 2001
From: Felix S <felix.von.s@posteo.de>
Date: Mon, 19 Apr 2021 21:42:51 +0200
Subject: [PATCH 31/31] [threeqsdn] Extract subtitles from streaming manifests

---
 yt_dlp/extractor/threeqsdn.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py
index 5eaa991eb..bb7610352 100644
--- a/yt_dlp/extractor/threeqsdn.py
+++ b/yt_dlp/extractor/threeqsdn.py
@@ -99,16 +99,21 @@ class ThreeQSDNIE(InfoExtractor):
         aspect = float_or_none(config.get('aspect'))
 
         formats = []
+        subtitles = {}
         for source_type, source in (config.get('sources') or {}).items():
             if not source:
                 continue
             if source_type == 'dash':
-                formats.extend(self._extract_mpd_formats(
-                    source, video_id, mpd_id='mpd', fatal=False))
+                fmts, subs = self._extract_mpd_formats_and_subtitles(
+                    source, video_id, mpd_id='mpd', fatal=False)
+                formats.extend(fmts)
+                subtitles = self._merge_subtitles(subtitles, subs)
             elif source_type == 'hls':
-                formats.extend(self._extract_m3u8_formats(
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                     source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
+                    m3u8_id='hls', fatal=False)
+                formats.extend(fmts)
+                subtitles = self._merge_subtitles(subtitles, subs)
             elif source_type == 'progressive':
                 for s in source:
                     src = s.get('src')
@@ -138,7 +143,6 @@ class ThreeQSDNIE(InfoExtractor):
         # behaviour is being kept as-is
         self._sort_formats(formats, ('res', 'source_preference'))
 
-        subtitles = {}
         for subtitle in (config.get('subtitles') or []):
             src = subtitle.get('src')
             if not src: