Update instagram scraper

This commit is contained in:
AlberLC
2023-04-03 12:00:20 +02:00
parent ae7fb1b9b2
commit 8594351229
2 changed files with 30 additions and 6 deletions

View File

@@ -1,13 +1,14 @@
__all__ = ['ScraperBot']
import asyncio
import datetime
import random
from abc import ABC
from collections import defaultdict
from typing import Iterable
import flanautils
from flanaapis import RedditMediaNotFoundError, instagram, reddit, tiktok, twitter, yt_dlp_wrapper
from flanaapis import InstagramMediaNotFoundError, RedditMediaNotFoundError, instagram, reddit, tiktok, twitter, yt_dlp_wrapper
from flanautils import Media, MediaType, OrderedSet, return_if_first_empty
from multibot import MultiBot, RegisteredCallback, SendError, constants as multibot_constants, reply
@@ -19,12 +20,22 @@ from flanabot.models import Action, BotAction, Message
# --------------------------------------------- SCRAPER_BOT --------------------------------------------- #
# ------------------------------------------------------------------------------------------------------- #
class ScraperBot(MultiBot, ABC):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.owner_chat = None
self.instagram_ban_date = None
# -------------------------------------------------------- #
# ------------------- PROTECTED METHODS ------------------ #
# -------------------------------------------------------- #
def _add_handlers(self):
super()._add_handlers()
self.register(self._on_no_scraping, (multibot_constants.KEYWORDS['negate'], constants.KEYWORDS['scraping']))
self.register(self._on_reset_instagram_ban, (multibot_constants.KEYWORDS['delete'], ('instagram',)))
self.register(self._on_reset_instagram_ban, (multibot_constants.KEYWORDS['reset'], ('instagram',)))
self.register(self._on_scraping, constants.KEYWORDS['scraping'])
self.register(self._on_scraping, constants.KEYWORDS['force'])
self.register(self._on_scraping, multibot_constants.KEYWORDS['audio'])
@@ -32,8 +43,6 @@ class ScraperBot(MultiBot, ABC):
self.register(lambda message: self._on_scraping(message, delete=False), (multibot_constants.KEYWORDS['negate'], multibot_constants.KEYWORDS['message']))
self.register(lambda message: self._on_scraping(message, delete=False), (multibot_constants.KEYWORDS['negate'], multibot_constants.KEYWORDS['delete'], multibot_constants.KEYWORDS['message']))
self.register(self._on_no_scraping, (multibot_constants.KEYWORDS['negate'], constants.KEYWORDS['scraping']))
self.register(self._on_song_info, constants.KEYWORDS['song_info'])
@staticmethod
@@ -71,7 +80,7 @@ class ScraperBot(MultiBot, ABC):
@staticmethod
def _medias_sended_info(medias: Iterable[Media]) -> str:
medias_count = defaultdict(lambda: defaultdict(int))
medias_count: dict = defaultdict(lambda: defaultdict(int))
for media in medias:
if not media.source or isinstance(media.source, str):
medias_count[media.source][media.type_] += 1
@@ -211,16 +220,27 @@ class ScraperBot(MultiBot, ABC):
gather_result = asyncio.gather(
twitter.get_medias(tweet_ids, audio_only),
instagram.get_medias(instagram_ids, audio_only),
tiktok.get_medias(tiktok_users_and_ids, tiktok_download_urls, 'h264', 'mp4', force, audio_only, timeout_for_media),
yt_dlp_wrapper.get_medias(media_urls, 'h264', 'mp4', force, audio_only, timeout_for_media),
return_exceptions=True
)
await gather_result
instagram_results = []
if not self.instagram_ban_date or self.instagram_ban_date + constants.INSTAGRAM_BAN_SLEEP >= datetime.datetime.now(datetime.timezone.utc):
try:
instagram_results = await instagram.get_medias(instagram_ids, audio_only)
except InstagramMediaNotFoundError:
self.instagram_ban_date = datetime.datetime.now(datetime.timezone.utc)
if not self.owner_chat:
self.owner_chat = await self.get_chat(self.owner_id) or await self.get_chat(await self.get_user(self.owner_id))
await self.send('Instagram limite excedido.', self.owner_chat)
if not instagram_results:
instagram_results = await instagram.get_medias_v2(instagram_ids, audio_only)
await self.delete_message(bot_state_message)
gather_medias, gather_exceptions = flanautils.filter_exceptions(gather_result.result())
gather_medias, gather_exceptions = flanautils.filter_exceptions(gather_result.result() + instagram_results)
await self._manage_exceptions(exceptions + gather_exceptions, message, print_traceback=True)
return medias | gather_medias
@@ -234,6 +254,9 @@ class ScraperBot(MultiBot, ABC):
async def _on_recover_message(self, message: Message):
pass
async def _on_reset_instagram_ban(self, _message: Message):
self.instagram_ban_date = None
async def _on_scraping(
self,
message: Message,

View File

@@ -15,6 +15,7 @@ FLOOD_2s_LIMIT = 2
FLOOD_7s_LIMIT = 4
HEAT_PERIOD_SECONDS = datetime.timedelta(minutes=15).total_seconds()
HELP_MINUTES_LIMIT = 1
INSTAGRAM_BAN_SLEEP = datetime.timedelta(days=1)
INSULT_PROBABILITY = 0.00166666667
MAX_PLACE_QUERY_LENGTH = 50
PUNISHMENT_INCREMENT_EXPONENT = 6