Files
JapariArchive/Twitter/downloader.py
2026-01-08 22:34:54 +01:00

148 lines
6.4 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING
import asyncio
from datetime import datetime
import gc
import traceback
import tracemalloc
from Classifier.classifyHelper import classify_all
from Database.x_classes import DownloadMode
from Discord import discordHelper
from Twitter import tweetHelper
from tweety.types import Tweet
from exceptions import ACCOUNT_DEAD, ACCOUNT_SKIP, DOWNLOAD_FAIL, NO_CHANNEL, OTHER_ERROR
from Database.x_classes import ActionTaken, DownloadMode, ErrorID, HavoxLabel, PostRating, x_posts, x_posts_images, x_accounts
from Database.db_schema import x_accounts as schema_x_accounts
if TYPE_CHECKING:
from runtimeBotData import RuntimeBotData
async def download_loop(botData: RuntimeBotData):
guild = botData.client.guilds[0]
try:
results = {}
botData.new_accounts = []
for artist in botData.db.x_get_all_accounts():
if artist.is_deleted: continue
#sleep to avoid rate limits
await asyncio.sleep(5)
print("Artist:", artist.name)
#wait for ALL new posts to be found
try:
match artist.download_mode:
case DownloadMode.NO_DOWNLOAD:
continue
case DownloadMode.DOWNLOAD:
await discordHelper.ensure_has_channel_or_thread(artist, guild, botData.db)
new_posts = await tweetHelper.UpdateMediaPosts(artist, botData)
case DownloadMode.DOWNLOAD_ALL:
await discordHelper.ensure_has_channel_or_thread(artist, guild, botData.db)
new_posts = await tweetHelper.DownloadAllMediaPosts(artist, botData)
case _:
continue
except ACCOUNT_DEAD:
botData.db.x_update_account_properties(artist.id, [(schema_x_accounts.is_deleted, True)])
continue
except ACCOUNT_SKIP:
continue
if len(new_posts) == 0: continue
new_posts_count = len([post for post in new_posts if len(post.media) > 0])
if new_posts_count > 20:
#skips posting to discord if there are too many posts
botData.new_accounts.append(artist.name)
new_posts.sort(key= lambda x: x.date)
for tweet in new_posts: #posts should arrive here in chronological order
await download_post(artist, tweet, botData)
gc.collect()
print(tracemalloc.get_traced_memory())
results[artist.name] = new_posts_count
if artist.download_mode == DownloadMode.DOWNLOAD_ALL:
botData.db.x_update_account_properties(artist.id, [(schema_x_accounts.download_mode, DownloadMode.DOWNLOAD)])
await discordHelper.post_result(results, guild, botData.new_accounts)
except Exception as ex:
print(ex)
await discordHelper.send_error(traceback.format_exc()[0:256], botData)
async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotData):
x_post = x_posts(id = tweet.id, account_id = tweet.author.id, date = tweet.date, text = tweet.text)
if len(tweet.media) == 0:
x_post.error_id = ErrorID.NO_ART
botData.db.x_insert_post(x_post, commit=True)
return
print("New media post:", str(tweet.url))
media = await tweetHelper.GetTweetMediaUrls(tweet)
image_containers = [x_posts_images(tweet.id, idx, file = url) for idx, url in enumerate(media)]
try:
downloaded_media = await tweetHelper.DownloadMedia(tweet.id, tweet.author.id, tweet.author.username, media, botData.session)
except DOWNLOAD_FAIL as e:
x_post.error_id = e.code
botData.db.x_insert_post(x_post, commit=False)
for image in image_containers:
image.error_id = ErrorID.DOWNLOAD_FAIL
botData.db.x_insert_image(image, commit=False)
botData.db.conn.commit()
return
def get_rating_value(rating):
return 4 if rating == PostRating.Explicit else 3 if rating == PostRating.Questionable else 2 if rating == PostRating.Sensitive else 1 if rating == PostRating.General else 0
vox_labels = []
final_filtered_tags = {}
duplicates = []
for idx, attachment in enumerate(downloaded_media):
container = image_containers[idx]
container.saved_file = attachment.file_name
container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes, botData.classifier, botData.vox)
if container.vox_label not in vox_labels:
vox_labels.append(container.vox_label)
if container.phash != None:
duplicate = botData.db.x_search_duplicate(user_id=x_post.account_id, max_id = x_post.id, phash=container.phash)
if duplicate != None:
container.duplicate_id = duplicate.post_id
container.duplicate_index = duplicate.index
if duplicate.post_id not in duplicates:
duplicates.append(duplicate.post_id)
x_post.tags = list(set(x_post.tags + container.tags))
x_post.rating = container.rating if get_rating_value(container.rating) > get_rating_value(x_post.rating) else x_post.rating
final_filtered_tags = final_filtered_tags | filtered_tags
is_filtered = len(vox_labels) == 1 and vox_labels[0] == HavoxLabel.Rejected
try:
discord_post = await discordHelper.send_x_post(tweet, artist, botData.client.guilds[0], botData.new_accounts, downloaded_media, is_filtered, rating=x_post.rating, tags=final_filtered_tags, vox_labels = vox_labels, duplicate_posts=duplicates, xView=botData.xView, yView=botData.yView)
except (NO_CHANNEL, OTHER_ERROR) as e:
x_post.error_id = e.code
x_post.discord_post_id = 0
else:
x_post.discord_post_id = discord_post.id
x_post.action_taken = ActionTaken.Rejected if is_filtered else ActionTaken.Null
try:
if not botData.db.x_insert_post(x_post, commit = False):
raise Exception("Transaction error")
for image in image_containers:
botData.db.x_insert_image(image, False)
except Exception as ex:
botData.db.conn.rollback()
raise ex
else:
botData.db.conn.commit()