kopia lustrzana https://github.com/bellingcat/auto-archiver
Fix lingering merge conflict issues
rodzic
59b910ec30
commit
06b1f4c0ca
|
@ -1,6 +1,7 @@
|
||||||
import re
|
import re
|
||||||
from .dropin import GenericDropin
|
from .dropin import GenericDropin
|
||||||
from auto_archiver.core.metadata import Metadata
|
from auto_archiver.core.metadata import Metadata
|
||||||
|
from yt_dlp.extractor.facebook import FacebookIE
|
||||||
|
|
||||||
# TODO: Remove if / when https://github.com/yt-dlp/yt-dlp/pull/12275 is merged
|
# TODO: Remove if / when https://github.com/yt-dlp/yt-dlp/pull/12275 is merged
|
||||||
from yt_dlp.utils import (
|
from yt_dlp.utils import (
|
||||||
|
@ -121,11 +122,7 @@ def _extract_metadata(self, webpage, video_id):
|
||||||
|
|
||||||
|
|
||||||
class Facebook(GenericDropin):
|
class Facebook(GenericDropin):
|
||||||
def extract_post(self, url: str, ie_instance):
|
def extract_post(self, url: str, ie_instance: FacebookIE):
|
||||||
video_id = ie_instance._match_valid_url(url).group("id")
|
|
||||||
ie_instance._download_webpage(url.replace("://m.facebook.com/", "://www.facebook.com/"), video_id)
|
|
||||||
webpage = ie_instance._download_webpage(url, ie_instance._match_valid_url(url).group("id"))
|
|
||||||
|
|
||||||
post_id_regex = r"(?P<id>pfbid[A-Za-z0-9]+|\d+|t\.(\d+\/\d+))"
|
post_id_regex = r"(?P<id>pfbid[A-Za-z0-9]+|\d+|t\.(\d+\/\d+))"
|
||||||
post_id = re.search(post_id_regex, url).group("id")
|
post_id = re.search(post_id_regex, url).group("id")
|
||||||
webpage = ie_instance._download_webpage(url.replace("://m.facebook.com/", "://www.facebook.com/"), post_id)
|
webpage = ie_instance._download_webpage(url.replace("://m.facebook.com/", "://www.facebook.com/"), post_id)
|
||||||
|
@ -137,7 +134,7 @@ class Facebook(GenericDropin):
|
||||||
post_data = _extract_metadata(ie_instance, webpage, post_id)
|
post_data = _extract_metadata(ie_instance, webpage, post_id)
|
||||||
return post_data
|
return post_data
|
||||||
|
|
||||||
def create_metadata(self, post: dict, ie_instance, archiver, url):
|
def create_metadata(self, post: dict, ie_instance: FacebookIE, archiver, url):
|
||||||
result = Metadata()
|
result = Metadata()
|
||||||
result.set_content(post.get("description", ""))
|
result.set_content(post.get("description", ""))
|
||||||
result.set_title(post.get("title", ""))
|
result.set_title(post.get("title", ""))
|
||||||
|
@ -145,11 +142,11 @@ class Facebook(GenericDropin):
|
||||||
result.set_url(url)
|
result.set_url(url)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def is_suitable(self, url, info_extractor):
|
def is_suitable(self, url, info_extractor: FacebookIE):
|
||||||
regex = r"(?:https?://(?:[\w-]+\.)?(?:facebook\.com||facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd\.onion)/)"
|
regex = r"(?:https?://(?:[\w-]+\.)?(?:facebook\.com||facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd\.onion)/)"
|
||||||
return re.match(regex, url)
|
return re.match(regex, url)
|
||||||
|
|
||||||
def skip_ytdlp_download(self, url: str, ie_instance):
|
def skip_ytdlp_download(self, url: str, is_instance: FacebookIE):
|
||||||
"""
|
"""
|
||||||
Skip using the ytdlp download method for Facebook *photo* posts, they have a URL with an id of t.XXXXX/XXXXX
|
Skip using the ytdlp download method for Facebook *photo* posts, they have a URL with an id of t.XXXXX/XXXXX
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -352,7 +352,7 @@ class GenericExtractor(Extractor):
|
||||||
dropin_submodule = self.dropin_for_name(info_extractor.ie_key())
|
dropin_submodule = self.dropin_for_name(info_extractor.ie_key())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if dropin_submodule and dropin_submodule.skip_ytdlp_download(info_extractor, url):
|
if dropin_submodule and dropin_submodule.skip_ytdlp_download(url, info_extractor):
|
||||||
logger.debug(f"Skipping using ytdlp to download files for {info_extractor.ie_key()}")
|
logger.debug(f"Skipping using ytdlp to download files for {info_extractor.ie_key()}")
|
||||||
raise SkipYtdlp()
|
raise SkipYtdlp()
|
||||||
|
|
||||||
|
|
Ładowanie…
Reference in New Issue