You've already forked JapariArchive
fresh start
This commit is contained in:
147
Twitter/downloader.py
Normal file
147
Twitter/downloader.py
Normal file
@@ -0,0 +1,147 @@
|
||||
from __future__ import annotations
|
||||
from typing import TYPE_CHECKING
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
import gc
|
||||
import traceback
|
||||
import tracemalloc
|
||||
|
||||
from Classifier.classifyHelper import classify_all
|
||||
from Database.x_classes import DownloadMode
|
||||
from Discord import discordHelper
|
||||
from Twitter import tweetHelper
|
||||
from tweety.types import Tweet
|
||||
from exceptions import ACCOUNT_DEAD, ACCOUNT_SKIP, DOWNLOAD_FAIL, NO_CHANNEL, OTHER_ERROR
|
||||
from Database.x_classes import ActionTaken, DownloadMode, ErrorID, HavoxLabel, PostRating, x_posts, x_posts_images, x_accounts
|
||||
from Database.db_schema import x_accounts as schema_x_accounts
|
||||
if TYPE_CHECKING:
|
||||
from runtimeBotData import RuntimeBotData
|
||||
|
||||
async def download_loop(botData: RuntimeBotData):
|
||||
guild = botData.client.guilds[0]
|
||||
try:
|
||||
results = {}
|
||||
botData.new_accounts = []
|
||||
|
||||
for artist in botData.db.x_get_all_accounts():
|
||||
if artist.is_deleted: continue
|
||||
|
||||
#sleep to avoid rate limits
|
||||
await asyncio.sleep(5)
|
||||
|
||||
print("Artist:", artist.name)
|
||||
|
||||
#wait for ALL new posts to be found
|
||||
try:
|
||||
match artist.download_mode:
|
||||
case DownloadMode.NO_DOWNLOAD:
|
||||
continue
|
||||
case DownloadMode.DOWNLOAD:
|
||||
await discordHelper.ensure_has_channel_or_thread(artist, guild, botData.db)
|
||||
new_posts = await tweetHelper.UpdateMediaPosts(artist, botData)
|
||||
case DownloadMode.DOWNLOAD_ALL:
|
||||
await discordHelper.ensure_has_channel_or_thread(artist, guild, botData.db)
|
||||
new_posts = await tweetHelper.DownloadAllMediaPosts(artist, botData)
|
||||
case _:
|
||||
continue
|
||||
except ACCOUNT_DEAD:
|
||||
botData.db.x_update_account_properties(artist.id, [(schema_x_accounts.is_deleted, True)])
|
||||
continue
|
||||
except ACCOUNT_SKIP:
|
||||
continue
|
||||
|
||||
if len(new_posts) == 0: continue
|
||||
|
||||
new_posts_count = len([post for post in new_posts if len(post.media) > 0])
|
||||
if new_posts_count > 20:
|
||||
#skips posting to discord if there are too many posts
|
||||
botData.new_accounts.append(artist.name)
|
||||
|
||||
new_posts.sort(key= lambda x: x.date)
|
||||
|
||||
for tweet in new_posts: #posts should arrive here in chronological order
|
||||
await download_post(artist, tweet, botData)
|
||||
gc.collect()
|
||||
print(tracemalloc.get_traced_memory())
|
||||
|
||||
results[artist.name] = new_posts_count
|
||||
if artist.download_mode == DownloadMode.DOWNLOAD_ALL:
|
||||
botData.db.x_update_account_properties(artist.id, [(schema_x_accounts.download_mode, DownloadMode.DOWNLOAD)])
|
||||
await discordHelper.post_result(results, guild, botData.new_accounts)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
await discordHelper.send_error(traceback.format_exc()[0:256], botData)
|
||||
|
||||
async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotData):
|
||||
x_post = x_posts(id = tweet.id, account_id = tweet.author.id, date = tweet.date, text = tweet.text)
|
||||
|
||||
if len(tweet.media) == 0:
|
||||
x_post.error_id = ErrorID.NO_ART
|
||||
botData.db.x_insert_post(x_post, commit=True)
|
||||
return
|
||||
|
||||
print("New media post:", str(tweet.url))
|
||||
media = await tweetHelper.GetTweetMediaUrls(tweet)
|
||||
image_containers = [x_posts_images(tweet.id, idx, file = url) for idx, url in enumerate(media)]
|
||||
|
||||
try:
|
||||
downloaded_media = await tweetHelper.DownloadMedia(tweet.id, tweet.author.id, tweet.author.username, media, botData.session)
|
||||
except DOWNLOAD_FAIL as e:
|
||||
x_post.error_id = e.code
|
||||
|
||||
botData.db.x_insert_post(x_post, commit=False)
|
||||
for image in image_containers:
|
||||
image.error_id = ErrorID.DOWNLOAD_FAIL
|
||||
botData.db.x_insert_image(image, commit=False)
|
||||
botData.db.conn.commit()
|
||||
return
|
||||
|
||||
def get_rating_value(rating):
|
||||
return 4 if rating == PostRating.Explicit else 3 if rating == PostRating.Questionable else 2 if rating == PostRating.Sensitive else 1 if rating == PostRating.General else 0
|
||||
|
||||
vox_labels = []
|
||||
final_filtered_tags = {}
|
||||
duplicates = []
|
||||
for idx, attachment in enumerate(downloaded_media):
|
||||
container = image_containers[idx]
|
||||
container.saved_file = attachment.file_name
|
||||
container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes, botData.classifier, botData.vox)
|
||||
|
||||
if container.vox_label not in vox_labels:
|
||||
vox_labels.append(container.vox_label)
|
||||
|
||||
if container.phash != None:
|
||||
duplicate = botData.db.x_search_duplicate(user_id=x_post.account_id, max_id = x_post.id, phash=container.phash)
|
||||
if duplicate != None:
|
||||
container.duplicate_id = duplicate.post_id
|
||||
container.duplicate_index = duplicate.index
|
||||
if duplicate.post_id not in duplicates:
|
||||
duplicates.append(duplicate.post_id)
|
||||
|
||||
x_post.tags = list(set(x_post.tags + container.tags))
|
||||
x_post.rating = container.rating if get_rating_value(container.rating) > get_rating_value(x_post.rating) else x_post.rating
|
||||
final_filtered_tags = final_filtered_tags | filtered_tags
|
||||
|
||||
is_filtered = len(vox_labels) == 1 and vox_labels[0] == HavoxLabel.Rejected
|
||||
|
||||
try:
|
||||
discord_post = await discordHelper.send_x_post(tweet, artist, botData.client.guilds[0], botData.new_accounts, downloaded_media, is_filtered, rating=x_post.rating, tags=final_filtered_tags, vox_labels = vox_labels, duplicate_posts=duplicates, xView=botData.xView, yView=botData.yView)
|
||||
except (NO_CHANNEL, OTHER_ERROR) as e:
|
||||
x_post.error_id = e.code
|
||||
x_post.discord_post_id = 0
|
||||
else:
|
||||
x_post.discord_post_id = discord_post.id
|
||||
|
||||
x_post.action_taken = ActionTaken.Rejected if is_filtered else ActionTaken.Null
|
||||
|
||||
try:
|
||||
if not botData.db.x_insert_post(x_post, commit = False):
|
||||
raise Exception("Transaction error")
|
||||
|
||||
for image in image_containers:
|
||||
botData.db.x_insert_image(image, False)
|
||||
except Exception as ex:
|
||||
botData.db.conn.rollback()
|
||||
raise ex
|
||||
else:
|
||||
botData.db.conn.commit()
|
||||
131
Twitter/tweetHelper.py
Normal file
131
Twitter/tweetHelper.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from __future__ import annotations
|
||||
import os
|
||||
from Database.x_classes import x_accounts
|
||||
from config import Global_Config
|
||||
import downloadHelper
|
||||
from tweety.types.twDataTypes import Tweet
|
||||
|
||||
from exceptions import ACCOUNT_DEAD, ACCOUNT_SKIP
|
||||
from tweety.exceptions_ import UserNotFound, UserProtected
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from runtimeBotData import RuntimeBotData
|
||||
|
||||
class TweetMedia:
|
||||
url : str
|
||||
file_name : str
|
||||
|
||||
def __init__(self, url, file_name):
|
||||
self.url = url
|
||||
self.file_name = file_name
|
||||
|
||||
class DownloadedMedia:
|
||||
file_bytes : str
|
||||
file_name : str
|
||||
|
||||
def __init__(self, bytes, file_name):
|
||||
self.file_bytes = bytes
|
||||
self.file_name = file_name
|
||||
|
||||
async def GetTweetMedia(tweet : Tweet) -> list[TweetMedia]:
|
||||
mediaList : list[TweetMedia] = []
|
||||
for idx, media in enumerate(tweet.media):
|
||||
if media.file_format == 'mp4':
|
||||
best_stream = await media.best_stream()
|
||||
fileName = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{media.file_format}"
|
||||
mediaList.append(TweetMedia(best_stream.direct_url, fileName))
|
||||
else:
|
||||
best_stream = await media.best_stream()
|
||||
extension = best_stream.file_format
|
||||
fileName = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{extension}"
|
||||
mediaList.append(TweetMedia(best_stream.direct_url, fileName))
|
||||
|
||||
return mediaList
|
||||
|
||||
async def GetTweetMediaUrls(tweet : Tweet):
|
||||
mediaList = await GetTweetMedia(tweet)
|
||||
return [media.url for media in mediaList]
|
||||
|
||||
async def DownloadMedia(post_id, account_id, account_name, url_list : list, session) -> list[DownloadedMedia]:
|
||||
result : list[DownloadedMedia] = []
|
||||
path = f"{Global_Config("x_download_path")}{account_id}"
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
for idx, file_url in enumerate(url_list):
|
||||
file_name = get_file_name(account_name, post_id, idx, file_url)
|
||||
full_path = f"{path}/{file_name}"
|
||||
|
||||
photo_bytes = await downloadHelper.save_to_file(file_url, full_path, session)
|
||||
|
||||
result.append(DownloadedMedia(photo_bytes, file_name))
|
||||
|
||||
return result
|
||||
|
||||
def get_file_name(account_name: str, post_id: int, image_index: int, image_url: str, account_id : int = None, base_path : str = None):
|
||||
'''
|
||||
`account_id` and `base_path` are optional\n
|
||||
In `base_path`, do not include trailing slash\n
|
||||
Example if none are defined:\n `file_name.ext`
|
||||
Example if `base_path` is defined:\n `c:/base_path/file_name.ext`
|
||||
Example if `account_id` is defined:\n `account_id/file_name.ext`
|
||||
Example if both are defined:\n `c:/base_path/account_id/file_name.ext`
|
||||
'''
|
||||
|
||||
ext = image_url.split("?")[0].split(".")[-1]
|
||||
file_name = f"{account_name}_{post_id}_{image_index}.{ext}"
|
||||
if account_id != None and base_path != None:
|
||||
return f"{base_path}/{account_id}/{file_name}"
|
||||
elif base_path != None:
|
||||
return f"{base_path}/{file_name}"
|
||||
elif account_id != None:
|
||||
return f"{account_id}/{file_name}"
|
||||
return file_name
|
||||
|
||||
async def UpdateMediaPosts(account : x_accounts, botData : RuntimeBotData) -> list[Tweet]:
|
||||
all_posts = botData.db.get_all_post_ids(account.id)
|
||||
newest_post = 1 if len(all_posts) == 0 else max(all_posts)
|
||||
posts = []
|
||||
|
||||
try:
|
||||
posts = [tweet async for tweet in botData.twApi.get_tweets(user_name = account.name, bottom_id = newest_post, all_posts = all_posts)]
|
||||
|
||||
except (UserProtected, UserNotFound) as ex:
|
||||
print("User dead: ", account.name, ex)
|
||||
raise ACCOUNT_DEAD(ex)
|
||||
except Exception as ex:
|
||||
print("Error in ", account.name, ex)
|
||||
raise ACCOUNT_SKIP(ex)
|
||||
|
||||
return posts
|
||||
|
||||
async def DownloadAllMediaPosts(account : x_accounts, botData : RuntimeBotData) -> list[Tweet]:
|
||||
all_posts = botData.db.get_all_post_ids(account.id)
|
||||
posts = []
|
||||
|
||||
try:
|
||||
async for tweet in botData.twApi.get_tweets(user_name = account.name, bottom_id = 1, all_posts = []):
|
||||
if int(tweet.id) not in all_posts:
|
||||
posts.append(tweet)
|
||||
|
||||
except (UserProtected, UserNotFound) as ex:
|
||||
print("User dead: ", account.name, ex)
|
||||
raise ACCOUNT_DEAD(ex)
|
||||
except Exception as ex:
|
||||
print("Error in ", account.name, ex)
|
||||
raise ACCOUNT_SKIP(ex)
|
||||
|
||||
return posts
|
||||
|
||||
def parse_x_url(url : str):
|
||||
"return account (handle, post id) from full X post url"
|
||||
url = url.replace("https://", "").replace("http://", "")
|
||||
split = url.split("?")
|
||||
if len(split) > 0:
|
||||
url = split[0]
|
||||
|
||||
split = url.split('/')
|
||||
if split[2] != "status":
|
||||
raise Exception("Invalid Format")
|
||||
|
||||
return split[1], int(split[3])
|
||||
102
Twitter/tweetyapi.py
Normal file
102
Twitter/tweetyapi.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import asyncio
|
||||
from typing import AsyncIterator
|
||||
from tweety import TwitterAsync
|
||||
from tweety.types.twDataTypes import Tweet, SelfThread, ConversationThread
|
||||
from tweety.types.usertweet import UserMedia
|
||||
from tweety.exceptions_ import RateLimitReached
|
||||
from config import Global_Config
|
||||
|
||||
class TweetyApi:
|
||||
async def init(self, skip_login = False, session_name = "session"):
|
||||
if skip_login:
|
||||
self.app = TwitterAsync(session_name)
|
||||
else:
|
||||
if Global_Config["x_cookies"] == None:
|
||||
raise Exception("X cookies are required")
|
||||
cookies = Global_Config["x_cookies"]
|
||||
self.app = TwitterAsync(session_name)
|
||||
await self.app.load_cookies(cookies)
|
||||
print(self.app.user)
|
||||
return self
|
||||
|
||||
async def get_tweet(self, url):
|
||||
try:
|
||||
tweet = await self.app.tweet_detail(url)
|
||||
return tweet
|
||||
except:
|
||||
return None
|
||||
|
||||
async def get_tweets(self, user_name, bottom_id, all_posts : list) -> AsyncIterator[Tweet]:
|
||||
def validate_tweet(tweet : Tweet):
|
||||
tweet_id_num = int(tweet.id)
|
||||
|
||||
past_bounds = False
|
||||
tweet_valid = True
|
||||
|
||||
if tweet_id_num <= bottom_id:
|
||||
past_bounds = True
|
||||
if tweet_id_num in all_posts:
|
||||
tweet_valid = False
|
||||
|
||||
return past_bounds, tweet_valid
|
||||
|
||||
sleep_default = 0.125
|
||||
sleep_exponent = 1
|
||||
user = None
|
||||
|
||||
while user == None:
|
||||
try:
|
||||
user = await self.app.get_user_info(username=user_name)
|
||||
except RateLimitReached as ex:
|
||||
sleep_exponent = await self.sleep_wait(sleep_default, sleep_exponent)
|
||||
except Exception as ex:
|
||||
print("User error: " + str(ex))
|
||||
raise ex
|
||||
|
||||
tweety_api = UserMedia(user.rest_id, self.app, 1, 2, None)
|
||||
sleep_exponent = 1
|
||||
|
||||
while True:
|
||||
await asyncio.sleep(5)
|
||||
old_cursor = tweety_api.cursor
|
||||
|
||||
try:
|
||||
tweets = await tweety_api.get_next_page()
|
||||
sleep_exponent = 1
|
||||
except RateLimitReached as ex:
|
||||
sleep_exponent = await self.sleep_wait(sleep_default, sleep_exponent)
|
||||
tweety_api.cursor = old_cursor
|
||||
continue
|
||||
except Exception as ex:
|
||||
raise ex
|
||||
|
||||
has_valid_tweets = False
|
||||
for tweet in tweets:
|
||||
if isinstance(tweet, ConversationThread) | isinstance(tweet, SelfThread):
|
||||
tweet:ConversationThread | SelfThread
|
||||
for tweet1 in tweet.tweets:
|
||||
_, tweet_valid = validate_tweet(tweet1)
|
||||
if tweet_valid:
|
||||
has_valid_tweets = True
|
||||
yield tweet1
|
||||
else:
|
||||
past_bounds, tweet_valid = validate_tweet(tweet)
|
||||
if past_bounds: continue
|
||||
if tweet_valid:
|
||||
has_valid_tweets = True
|
||||
yield tweet
|
||||
|
||||
if len(tweets) == 0 or not has_valid_tweets:
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
@staticmethod
|
||||
async def sleep_wait(sleep_default, sleep_exponent):
|
||||
sleep_amount = min(sleep_default * pow(2,sleep_exponent), 2)
|
||||
print(f"Sleeping for {round(sleep_amount,2)} hours.")
|
||||
await asyncio.sleep(sleep_amount * 60 * 60)
|
||||
print("Sleep done")
|
||||
sleep_exponent += 1
|
||||
return sleep_exponent
|
||||
|
||||
#asyncio.run(TweetyApi().get_tweets("redhood_depth", 0))
|
||||
41
Twitter/twitterContainer.py
Normal file
41
Twitter/twitterContainer.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from __future__ import annotations
|
||||
from Database.x_classes import AccountRating, DownloadMode, ErrorID
|
||||
from tweety.exceptions_ import UserNotFound, UserProtected
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from runtimeBotData import RuntimeBotData
|
||||
|
||||
class TwitterContainer():
|
||||
id : int
|
||||
name : str
|
||||
rating : str
|
||||
discord_channel_id : int = 0
|
||||
discord_thread_id : int = 0
|
||||
download_mode : int = 0
|
||||
|
||||
def __init__(self, name : str, rating = AccountRating.NSFW, discord_channel_id = 0, discord_thread_id = 0, download_mode = DownloadMode.NO_DOWNLOAD, id = 0, **kwargs):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.rating = rating.upper()
|
||||
self.discord_channel_id = discord_channel_id
|
||||
self.discord_thread_id = discord_thread_id
|
||||
self.download_mode = download_mode
|
||||
|
||||
async def UpdateMediaPosts(self, botData : RuntimeBotData):
|
||||
all_posts = botData.db.get_all_post_ids(self.id)
|
||||
newest_post = 1 if len(all_posts) == 0 else max(all_posts)
|
||||
posts = []
|
||||
|
||||
try:
|
||||
async for tweet in botData.twApi.get_tweets(user_name = self.name, bottom_id = newest_post, all_posts = all_posts):
|
||||
posts.append(tweet)
|
||||
|
||||
except (UserProtected, UserNotFound) as ex:
|
||||
print("User dead: ", self.name, ex)
|
||||
return ErrorID.ACCOUNT_DEAD, []
|
||||
except Exception as ex:
|
||||
print("Error in ", self.name, ex)
|
||||
return ErrorID.ACCOUNT_SKIP, []
|
||||
|
||||
return ErrorID.SUCCESS, posts
|
||||
Reference in New Issue
Block a user