fresh start

This commit is contained in:
2026-01-08 22:07:03 +01:00
commit 715be97289
30 changed files with 2302 additions and 0 deletions

147
Twitter/downloader.py Normal file
View File

@@ -0,0 +1,147 @@
from __future__ import annotations
from typing import TYPE_CHECKING
import asyncio
from datetime import datetime
import gc
import traceback
import tracemalloc
from Classifier.classifyHelper import classify_all
from Database.x_classes import DownloadMode
from Discord import discordHelper
from Twitter import tweetHelper
from tweety.types import Tweet
from exceptions import ACCOUNT_DEAD, ACCOUNT_SKIP, DOWNLOAD_FAIL, NO_CHANNEL, OTHER_ERROR
from Database.x_classes import ActionTaken, DownloadMode, ErrorID, HavoxLabel, PostRating, x_posts, x_posts_images, x_accounts
from Database.db_schema import x_accounts as schema_x_accounts
if TYPE_CHECKING:
from runtimeBotData import RuntimeBotData
async def download_loop(botData: RuntimeBotData):
guild = botData.client.guilds[0]
try:
results = {}
botData.new_accounts = []
for artist in botData.db.x_get_all_accounts():
if artist.is_deleted: continue
#sleep to avoid rate limits
await asyncio.sleep(5)
print("Artist:", artist.name)
#wait for ALL new posts to be found
try:
match artist.download_mode:
case DownloadMode.NO_DOWNLOAD:
continue
case DownloadMode.DOWNLOAD:
await discordHelper.ensure_has_channel_or_thread(artist, guild, botData.db)
new_posts = await tweetHelper.UpdateMediaPosts(artist, botData)
case DownloadMode.DOWNLOAD_ALL:
await discordHelper.ensure_has_channel_or_thread(artist, guild, botData.db)
new_posts = await tweetHelper.DownloadAllMediaPosts(artist, botData)
case _:
continue
except ACCOUNT_DEAD:
botData.db.x_update_account_properties(artist.id, [(schema_x_accounts.is_deleted, True)])
continue
except ACCOUNT_SKIP:
continue
if len(new_posts) == 0: continue
new_posts_count = len([post for post in new_posts if len(post.media) > 0])
if new_posts_count > 20:
#skips posting to discord if there are too many posts
botData.new_accounts.append(artist.name)
new_posts.sort(key= lambda x: x.date)
for tweet in new_posts: #posts should arrive here in chronological order
await download_post(artist, tweet, botData)
gc.collect()
print(tracemalloc.get_traced_memory())
results[artist.name] = new_posts_count
if artist.download_mode == DownloadMode.DOWNLOAD_ALL:
botData.db.x_update_account_properties(artist.id, [(schema_x_accounts.download_mode, DownloadMode.DOWNLOAD)])
await discordHelper.post_result(results, guild, botData.new_accounts)
except Exception as ex:
print(ex)
await discordHelper.send_error(traceback.format_exc()[0:256], botData)
async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotData):
x_post = x_posts(id = tweet.id, account_id = tweet.author.id, date = tweet.date, text = tweet.text)
if len(tweet.media) == 0:
x_post.error_id = ErrorID.NO_ART
botData.db.x_insert_post(x_post, commit=True)
return
print("New media post:", str(tweet.url))
media = await tweetHelper.GetTweetMediaUrls(tweet)
image_containers = [x_posts_images(tweet.id, idx, file = url) for idx, url in enumerate(media)]
try:
downloaded_media = await tweetHelper.DownloadMedia(tweet.id, tweet.author.id, tweet.author.username, media, botData.session)
except DOWNLOAD_FAIL as e:
x_post.error_id = e.code
botData.db.x_insert_post(x_post, commit=False)
for image in image_containers:
image.error_id = ErrorID.DOWNLOAD_FAIL
botData.db.x_insert_image(image, commit=False)
botData.db.conn.commit()
return
def get_rating_value(rating):
return 4 if rating == PostRating.Explicit else 3 if rating == PostRating.Questionable else 2 if rating == PostRating.Sensitive else 1 if rating == PostRating.General else 0
vox_labels = []
final_filtered_tags = {}
duplicates = []
for idx, attachment in enumerate(downloaded_media):
container = image_containers[idx]
container.saved_file = attachment.file_name
container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes, botData.classifier, botData.vox)
if container.vox_label not in vox_labels:
vox_labels.append(container.vox_label)
if container.phash != None:
duplicate = botData.db.x_search_duplicate(user_id=x_post.account_id, max_id = x_post.id, phash=container.phash)
if duplicate != None:
container.duplicate_id = duplicate.post_id
container.duplicate_index = duplicate.index
if duplicate.post_id not in duplicates:
duplicates.append(duplicate.post_id)
x_post.tags = list(set(x_post.tags + container.tags))
x_post.rating = container.rating if get_rating_value(container.rating) > get_rating_value(x_post.rating) else x_post.rating
final_filtered_tags = final_filtered_tags | filtered_tags
is_filtered = len(vox_labels) == 1 and vox_labels[0] == HavoxLabel.Rejected
try:
discord_post = await discordHelper.send_x_post(tweet, artist, botData.client.guilds[0], botData.new_accounts, downloaded_media, is_filtered, rating=x_post.rating, tags=final_filtered_tags, vox_labels = vox_labels, duplicate_posts=duplicates, xView=botData.xView, yView=botData.yView)
except (NO_CHANNEL, OTHER_ERROR) as e:
x_post.error_id = e.code
x_post.discord_post_id = 0
else:
x_post.discord_post_id = discord_post.id
x_post.action_taken = ActionTaken.Rejected if is_filtered else ActionTaken.Null
try:
if not botData.db.x_insert_post(x_post, commit = False):
raise Exception("Transaction error")
for image in image_containers:
botData.db.x_insert_image(image, False)
except Exception as ex:
botData.db.conn.rollback()
raise ex
else:
botData.db.conn.commit()

131
Twitter/tweetHelper.py Normal file
View File

@@ -0,0 +1,131 @@
from __future__ import annotations
import os
from Database.x_classes import x_accounts
from config import Global_Config
import downloadHelper
from tweety.types.twDataTypes import Tweet
from exceptions import ACCOUNT_DEAD, ACCOUNT_SKIP
from tweety.exceptions_ import UserNotFound, UserProtected
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from runtimeBotData import RuntimeBotData
class TweetMedia:
url : str
file_name : str
def __init__(self, url, file_name):
self.url = url
self.file_name = file_name
class DownloadedMedia:
file_bytes : str
file_name : str
def __init__(self, bytes, file_name):
self.file_bytes = bytes
self.file_name = file_name
async def GetTweetMedia(tweet : Tweet) -> list[TweetMedia]:
mediaList : list[TweetMedia] = []
for idx, media in enumerate(tweet.media):
if media.file_format == 'mp4':
best_stream = await media.best_stream()
fileName = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{media.file_format}"
mediaList.append(TweetMedia(best_stream.direct_url, fileName))
else:
best_stream = await media.best_stream()
extension = best_stream.file_format
fileName = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{extension}"
mediaList.append(TweetMedia(best_stream.direct_url, fileName))
return mediaList
async def GetTweetMediaUrls(tweet : Tweet):
mediaList = await GetTweetMedia(tweet)
return [media.url for media in mediaList]
async def DownloadMedia(post_id, account_id, account_name, url_list : list, session) -> list[DownloadedMedia]:
result : list[DownloadedMedia] = []
path = f"{Global_Config("x_download_path")}{account_id}"
os.makedirs(path, exist_ok=True)
for idx, file_url in enumerate(url_list):
file_name = get_file_name(account_name, post_id, idx, file_url)
full_path = f"{path}/{file_name}"
photo_bytes = await downloadHelper.save_to_file(file_url, full_path, session)
result.append(DownloadedMedia(photo_bytes, file_name))
return result
def get_file_name(account_name: str, post_id: int, image_index: int, image_url: str, account_id : int = None, base_path : str = None):
'''
`account_id` and `base_path` are optional\n
In `base_path`, do not include trailing slash\n
Example if none are defined:\n `file_name.ext`
Example if `base_path` is defined:\n `c:/base_path/file_name.ext`
Example if `account_id` is defined:\n `account_id/file_name.ext`
Example if both are defined:\n `c:/base_path/account_id/file_name.ext`
'''
ext = image_url.split("?")[0].split(".")[-1]
file_name = f"{account_name}_{post_id}_{image_index}.{ext}"
if account_id != None and base_path != None:
return f"{base_path}/{account_id}/{file_name}"
elif base_path != None:
return f"{base_path}/{file_name}"
elif account_id != None:
return f"{account_id}/{file_name}"
return file_name
async def UpdateMediaPosts(account : x_accounts, botData : RuntimeBotData) -> list[Tweet]:
all_posts = botData.db.get_all_post_ids(account.id)
newest_post = 1 if len(all_posts) == 0 else max(all_posts)
posts = []
try:
posts = [tweet async for tweet in botData.twApi.get_tweets(user_name = account.name, bottom_id = newest_post, all_posts = all_posts)]
except (UserProtected, UserNotFound) as ex:
print("User dead: ", account.name, ex)
raise ACCOUNT_DEAD(ex)
except Exception as ex:
print("Error in ", account.name, ex)
raise ACCOUNT_SKIP(ex)
return posts
async def DownloadAllMediaPosts(account : x_accounts, botData : RuntimeBotData) -> list[Tweet]:
all_posts = botData.db.get_all_post_ids(account.id)
posts = []
try:
async for tweet in botData.twApi.get_tweets(user_name = account.name, bottom_id = 1, all_posts = []):
if int(tweet.id) not in all_posts:
posts.append(tweet)
except (UserProtected, UserNotFound) as ex:
print("User dead: ", account.name, ex)
raise ACCOUNT_DEAD(ex)
except Exception as ex:
print("Error in ", account.name, ex)
raise ACCOUNT_SKIP(ex)
return posts
def parse_x_url(url : str):
"return account (handle, post id) from full X post url"
url = url.replace("https://", "").replace("http://", "")
split = url.split("?")
if len(split) > 0:
url = split[0]
split = url.split('/')
if split[2] != "status":
raise Exception("Invalid Format")
return split[1], int(split[3])

102
Twitter/tweetyapi.py Normal file
View File

@@ -0,0 +1,102 @@
import asyncio
from typing import AsyncIterator
from tweety import TwitterAsync
from tweety.types.twDataTypes import Tweet, SelfThread, ConversationThread
from tweety.types.usertweet import UserMedia
from tweety.exceptions_ import RateLimitReached
from config import Global_Config
class TweetyApi:
async def init(self, skip_login = False, session_name = "session"):
if skip_login:
self.app = TwitterAsync(session_name)
else:
if Global_Config["x_cookies"] == None:
raise Exception("X cookies are required")
cookies = Global_Config["x_cookies"]
self.app = TwitterAsync(session_name)
await self.app.load_cookies(cookies)
print(self.app.user)
return self
async def get_tweet(self, url):
try:
tweet = await self.app.tweet_detail(url)
return tweet
except:
return None
async def get_tweets(self, user_name, bottom_id, all_posts : list) -> AsyncIterator[Tweet]:
def validate_tweet(tweet : Tweet):
tweet_id_num = int(tweet.id)
past_bounds = False
tweet_valid = True
if tweet_id_num <= bottom_id:
past_bounds = True
if tweet_id_num in all_posts:
tweet_valid = False
return past_bounds, tweet_valid
sleep_default = 0.125
sleep_exponent = 1
user = None
while user == None:
try:
user = await self.app.get_user_info(username=user_name)
except RateLimitReached as ex:
sleep_exponent = await self.sleep_wait(sleep_default, sleep_exponent)
except Exception as ex:
print("User error: " + str(ex))
raise ex
tweety_api = UserMedia(user.rest_id, self.app, 1, 2, None)
sleep_exponent = 1
while True:
await asyncio.sleep(5)
old_cursor = tweety_api.cursor
try:
tweets = await tweety_api.get_next_page()
sleep_exponent = 1
except RateLimitReached as ex:
sleep_exponent = await self.sleep_wait(sleep_default, sleep_exponent)
tweety_api.cursor = old_cursor
continue
except Exception as ex:
raise ex
has_valid_tweets = False
for tweet in tweets:
if isinstance(tweet, ConversationThread) | isinstance(tweet, SelfThread):
tweet:ConversationThread | SelfThread
for tweet1 in tweet.tweets:
_, tweet_valid = validate_tweet(tweet1)
if tweet_valid:
has_valid_tweets = True
yield tweet1
else:
past_bounds, tweet_valid = validate_tweet(tweet)
if past_bounds: continue
if tweet_valid:
has_valid_tweets = True
yield tweet
if len(tweets) == 0 or not has_valid_tweets:
break
await asyncio.sleep(1)
@staticmethod
async def sleep_wait(sleep_default, sleep_exponent):
sleep_amount = min(sleep_default * pow(2,sleep_exponent), 2)
print(f"Sleeping for {round(sleep_amount,2)} hours.")
await asyncio.sleep(sleep_amount * 60 * 60)
print("Sleep done")
sleep_exponent += 1
return sleep_exponent
#asyncio.run(TweetyApi().get_tweets("redhood_depth", 0))

View File

@@ -0,0 +1,41 @@
from __future__ import annotations
from Database.x_classes import AccountRating, DownloadMode, ErrorID
from tweety.exceptions_ import UserNotFound, UserProtected
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from runtimeBotData import RuntimeBotData
class TwitterContainer():
id : int
name : str
rating : str
discord_channel_id : int = 0
discord_thread_id : int = 0
download_mode : int = 0
def __init__(self, name : str, rating = AccountRating.NSFW, discord_channel_id = 0, discord_thread_id = 0, download_mode = DownloadMode.NO_DOWNLOAD, id = 0, **kwargs):
self.id = id
self.name = name
self.rating = rating.upper()
self.discord_channel_id = discord_channel_id
self.discord_thread_id = discord_thread_id
self.download_mode = download_mode
async def UpdateMediaPosts(self, botData : RuntimeBotData):
all_posts = botData.db.get_all_post_ids(self.id)
newest_post = 1 if len(all_posts) == 0 else max(all_posts)
posts = []
try:
async for tweet in botData.twApi.get_tweets(user_name = self.name, bottom_id = newest_post, all_posts = all_posts):
posts.append(tweet)
except (UserProtected, UserNotFound) as ex:
print("User dead: ", self.name, ex)
return ErrorID.ACCOUNT_DEAD, []
except Exception as ex:
print("Error in ", self.name, ex)
return ErrorID.ACCOUNT_SKIP, []
return ErrorID.SUCCESS, posts