Update twitter scraper (to yt-dlp)

This commit is contained in:
AlberLC
2024-06-08 15:25:25 +02:00
parent 25c5391f5a
commit d6986d8b63

View File

@@ -8,7 +8,7 @@ from collections import defaultdict
from typing import Iterable from typing import Iterable
import flanautils import flanautils
from flanaapis import InstagramMediaNotFoundError, RedditMediaNotFoundError, instagram, reddit, tiktok, twitter, yt_dlp_wrapper from flanaapis import InstagramMediaNotFoundError, RedditMediaNotFoundError, instagram, reddit, tiktok, yt_dlp_wrapper
from flanautils import Media, MediaType, OrderedSet, return_if_first_empty from flanautils import Media, MediaType, OrderedSet, return_if_first_empty
from multibot import MultiBot, RegisteredCallback, SendError, constants as multibot_constants, owner, reply from multibot import MultiBot, RegisteredCallback, SendError, constants as multibot_constants, owner, reply
@@ -47,7 +47,6 @@ class ScraperBot(MultiBot, ABC):
@staticmethod @staticmethod
async def _find_ids(text: str) -> tuple[OrderedSet[str], ...]: async def _find_ids(text: str) -> tuple[OrderedSet[str], ...]:
return ( return (
twitter.find_ids(text),
instagram.find_ids(text), instagram.find_ids(text),
reddit.find_ids(text), reddit.find_ids(text),
await tiktok.find_users_and_ids(text), await tiktok.find_users_and_ids(text),
@@ -189,30 +188,40 @@ class ScraperBot(MultiBot, ABC):
ids[i] |= platform_ids ids[i] |= platform_ids
except IndexError: except IndexError:
ids.append(platform_ids) ids.append(platform_ids)
if not any(ids) and flanautils.find_urls(text_part):
if force: if (
media_urls.append(text_part) not any(ids)
elif not any(domain.lower() in text_part for domain in multibot_constants.GIF_DOMAINS): and
media_urls.append(text_part) flanautils.find_urls(text_part)
and
(
force
or
not any(domain.lower() in text_part for domain in multibot_constants.GIF_DOMAINS)
)
):
media_urls.append(text_part)
if not any(ids) and not media_urls: if not any(ids) and not media_urls:
return medias return medias
bot_state_message = await self.send(random.choice(constants.SCRAPING_PHRASES), message) bot_state_message = await self.send(random.choice(constants.SCRAPING_PHRASES), message)
tweet_ids, instagram_ids, reddit_ids, tiktok_users_and_ids, tiktok_download_urls = ids instagram_ids, reddit_ids, tiktok_users_and_ids, tiktok_download_urls = ids
try: try:
reddit_medias = await reddit.get_medias(reddit_ids, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media) reddit_medias = await reddit.get_medias(reddit_ids, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media)
except RedditMediaNotFoundError as e: except RedditMediaNotFoundError as e:
exceptions.append(e) exceptions.append(e)
reddit_medias = () reddit_medias = ()
reddit_urls = [] reddit_urls = []
for reddit_media in reddit_medias: for reddit_media in reddit_medias:
if reddit_media.source: if reddit_media.source:
medias.add(reddit_media) medias.add(reddit_media)
else: else:
reddit_urls.append(reddit_media.url) reddit_urls.append(reddit_media.url)
if force: if force:
media_urls.extend(reddit_urls) media_urls.extend(reddit_urls)
else: else:
@@ -225,7 +234,6 @@ class ScraperBot(MultiBot, ABC):
media_urls.append(reddit_url) media_urls.append(reddit_url)
gather_future = asyncio.gather( gather_future = asyncio.gather(
twitter.get_medias(tweet_ids, audio_only),
tiktok.get_medias(tiktok_users_and_ids, tiktok_download_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media), tiktok.get_medias(tiktok_users_and_ids, tiktok_download_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media),
yt_dlp_wrapper.get_medias(media_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media), yt_dlp_wrapper.get_medias(media_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media),
return_exceptions=True return_exceptions=True