Update twitter scraper (to yt-dlp)
This commit is contained in:
@@ -8,7 +8,7 @@ from collections import defaultdict
|
|||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
import flanautils
|
import flanautils
|
||||||
from flanaapis import InstagramMediaNotFoundError, RedditMediaNotFoundError, instagram, reddit, tiktok, twitter, yt_dlp_wrapper
|
from flanaapis import InstagramMediaNotFoundError, RedditMediaNotFoundError, instagram, reddit, tiktok, yt_dlp_wrapper
|
||||||
from flanautils import Media, MediaType, OrderedSet, return_if_first_empty
|
from flanautils import Media, MediaType, OrderedSet, return_if_first_empty
|
||||||
from multibot import MultiBot, RegisteredCallback, SendError, constants as multibot_constants, owner, reply
|
from multibot import MultiBot, RegisteredCallback, SendError, constants as multibot_constants, owner, reply
|
||||||
|
|
||||||
@@ -47,7 +47,6 @@ class ScraperBot(MultiBot, ABC):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
async def _find_ids(text: str) -> tuple[OrderedSet[str], ...]:
|
async def _find_ids(text: str) -> tuple[OrderedSet[str], ...]:
|
||||||
return (
|
return (
|
||||||
twitter.find_ids(text),
|
|
||||||
instagram.find_ids(text),
|
instagram.find_ids(text),
|
||||||
reddit.find_ids(text),
|
reddit.find_ids(text),
|
||||||
await tiktok.find_users_and_ids(text),
|
await tiktok.find_users_and_ids(text),
|
||||||
@@ -189,30 +188,40 @@ class ScraperBot(MultiBot, ABC):
|
|||||||
ids[i] |= platform_ids
|
ids[i] |= platform_ids
|
||||||
except IndexError:
|
except IndexError:
|
||||||
ids.append(platform_ids)
|
ids.append(platform_ids)
|
||||||
if not any(ids) and flanautils.find_urls(text_part):
|
|
||||||
if force:
|
if (
|
||||||
media_urls.append(text_part)
|
not any(ids)
|
||||||
elif not any(domain.lower() in text_part for domain in multibot_constants.GIF_DOMAINS):
|
and
|
||||||
media_urls.append(text_part)
|
flanautils.find_urls(text_part)
|
||||||
|
and
|
||||||
|
(
|
||||||
|
force
|
||||||
|
or
|
||||||
|
not any(domain.lower() in text_part for domain in multibot_constants.GIF_DOMAINS)
|
||||||
|
)
|
||||||
|
):
|
||||||
|
media_urls.append(text_part)
|
||||||
|
|
||||||
if not any(ids) and not media_urls:
|
if not any(ids) and not media_urls:
|
||||||
return medias
|
return medias
|
||||||
|
|
||||||
bot_state_message = await self.send(random.choice(constants.SCRAPING_PHRASES), message)
|
bot_state_message = await self.send(random.choice(constants.SCRAPING_PHRASES), message)
|
||||||
|
|
||||||
tweet_ids, instagram_ids, reddit_ids, tiktok_users_and_ids, tiktok_download_urls = ids
|
instagram_ids, reddit_ids, tiktok_users_and_ids, tiktok_download_urls = ids
|
||||||
|
|
||||||
try:
|
try:
|
||||||
reddit_medias = await reddit.get_medias(reddit_ids, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media)
|
reddit_medias = await reddit.get_medias(reddit_ids, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media)
|
||||||
except RedditMediaNotFoundError as e:
|
except RedditMediaNotFoundError as e:
|
||||||
exceptions.append(e)
|
exceptions.append(e)
|
||||||
reddit_medias = ()
|
reddit_medias = ()
|
||||||
|
|
||||||
reddit_urls = []
|
reddit_urls = []
|
||||||
for reddit_media in reddit_medias:
|
for reddit_media in reddit_medias:
|
||||||
if reddit_media.source:
|
if reddit_media.source:
|
||||||
medias.add(reddit_media)
|
medias.add(reddit_media)
|
||||||
else:
|
else:
|
||||||
reddit_urls.append(reddit_media.url)
|
reddit_urls.append(reddit_media.url)
|
||||||
|
|
||||||
if force:
|
if force:
|
||||||
media_urls.extend(reddit_urls)
|
media_urls.extend(reddit_urls)
|
||||||
else:
|
else:
|
||||||
@@ -225,7 +234,6 @@ class ScraperBot(MultiBot, ABC):
|
|||||||
media_urls.append(reddit_url)
|
media_urls.append(reddit_url)
|
||||||
|
|
||||||
gather_future = asyncio.gather(
|
gather_future = asyncio.gather(
|
||||||
twitter.get_medias(tweet_ids, audio_only),
|
|
||||||
tiktok.get_medias(tiktok_users_and_ids, tiktok_download_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media),
|
tiktok.get_medias(tiktok_users_and_ids, tiktok_download_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media),
|
||||||
yt_dlp_wrapper.get_medias(media_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media),
|
yt_dlp_wrapper.get_medias(media_urls, preferred_video_codec, preferred_extension, force, audio_only, timeout_for_media),
|
||||||
return_exceptions=True
|
return_exceptions=True
|
||||||
|
|||||||
Reference in New Issue
Block a user