fixed memory leak related to video files

This commit is contained in:
2026-03-29 13:31:27 +02:00
parent 5662f079eb
commit 4cdccb7227
8 changed files with 36 additions and 22 deletions

View File

@@ -15,7 +15,7 @@ async def classify_all(photo_bytes, wd_classifier: WDClassifier, vox_classifier
phash = None
dhash = None
error_id = ErrorID.SUCCESS
if not is_filtered:
if not is_filtered: #won't execute if photo bytes is None (is video)
rating, tags, filtered_tags = await wd_classifier.classify_async(photo_bytes, tag_threshold=0.6)
tags = list(tags.keys())

View File

@@ -4,6 +4,8 @@ from io import BytesIO
from concurrent.futures import ThreadPoolExecutor
from transformers import ViTForImageClassification, ViTImageProcessor
from Database.x_classes import HavoxLabel
class VoxClassifier():
def __init__(self):
self.feature_extractor = ViTImageProcessor.from_pretrained('Classifier/HAV0X/')
@@ -21,9 +23,13 @@ class VoxClassifier():
return self.model.config.id2label[predicted_class_idx]
except Exception as ex:
print(ex)
return "Rejected"
return HavoxLabel.Rejected
async def classify_async(self, image_bytes):
if not image_bytes:
#mainly for video files
print("image bytes was null")
return HavoxLabel.Rejected
with ThreadPoolExecutor() as executor:
future = executor.submit(self.classify, image_bytes)

View File

@@ -59,6 +59,10 @@ class WDClassifier():
return "", {}, {}
async def classify_async(self, image_bytes, max_count = -1, tag_threshold = -1):
if not image_bytes:
#mainly for video files
print("image bytes was null")
return "", {}, {}
with ThreadPoolExecutor() as executor:
future = executor.submit(self.classify, image_bytes, max_count, tag_threshold)

View File

@@ -63,10 +63,10 @@ def build_secondary_embed(main_post : Message, handle : str, post : Tweet):
embed.set_author(name=handle, url=main_post.jump_url, icon_url=post.author.profile_image_url_https)
return embeds
async def send_error(ex : Exception, botData : RuntimeBotData):
async def send_error(ex : str, botData : RuntimeBotData):
print(ex)
errors_channel = nextcord.utils.get(botData.client.guilds[0].channels, name="bot-status")
await errors_channel.send(content=str(ex))
await errors_channel.send(content=ex[0:512])
def get_secondary_channel(is_animated, is_filtered, rating, tags : list, artist : x_accounts, guild : nextcord.Guild):
if is_animated:

View File

@@ -33,5 +33,5 @@ async def download_loop(botData: RuntimeBotData):
except Exception as ex:
print(ex)
await discordHelper.send_error(traceback.format_exc()[0:256], botData)
await discordHelper.send_error(str(ex) + " " + traceback.format_exc(), botData)
print("Pixiv done")

View File

@@ -68,7 +68,7 @@ async def download_loop(botData: RuntimeBotData):
await discordHelper.post_result(results, guild, botData.new_accounts)
except Exception as ex:
print(ex)
await discordHelper.send_error(traceback.format_exc()[0:256], botData)
await discordHelper.send_error(str(ex) + " " + traceback.format_exc(), botData)
async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotData):
x_post = x_posts(id = tweet.id, account_id = tweet.author.id, date = tweet.date, text = tweet.text)
@@ -79,8 +79,8 @@ async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotDat
return
print("New media post:", str(tweet.url))
media = await tweetHelper.GetTweetMediaUrls(tweet)
image_containers = [x_posts_images(tweet.id, idx, file = url) for idx, url in enumerate(media)]
media = await tweetHelper.GetTweetMedia(tweet)
image_containers = [x_posts_images(tweet.id, idx, file = med.url) for idx, med in enumerate(media)]
try:
async with aiohttp.ClientSession() as session:
@@ -104,7 +104,7 @@ async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotDat
for idx, attachment in enumerate(downloaded_media):
container = image_containers[idx]
container.saved_file = attachment.file_name
container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes, botData.classifier, botData.vox)
container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes if not attachment.is_video else None, botData.classifier, botData.vox)
if container.vox_label not in vox_labels:
vox_labels.append(container.vox_label)

View File

@@ -15,18 +15,22 @@ if TYPE_CHECKING:
class TweetMedia:
url : str
file_name : str
is_video: bool
def __init__(self, url, file_name):
def __init__(self, url, file_name, is_video: bool):
self.url = url
self.file_name = file_name
self.is_video = is_video
class DownloadedMedia:
file_bytes : str
file_bytes : bytes
file_name : str
is_video: bool
def __init__(self, bytes, file_name):
def __init__(self, bytes, file_name, is_video: bool):
self.file_bytes = bytes
self.file_name = file_name
self.is_video = is_video
async def GetTweetMedia(tweet : Tweet) -> list[TweetMedia]:
mediaList : list[TweetMedia] = []
@@ -34,12 +38,12 @@ async def GetTweetMedia(tweet : Tweet) -> list[TweetMedia]:
if media.file_format == 'mp4':
best_stream = await media.best_stream()
fileName = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{media.file_format}"
mediaList.append(TweetMedia(best_stream.direct_url, fileName))
mediaList.append(TweetMedia(best_stream.direct_url, fileName, True))
else:
best_stream = await media.best_stream()
extension = best_stream.file_format
fileName = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{extension}"
mediaList.append(TweetMedia(best_stream.direct_url, fileName))
mediaList.append(TweetMedia(best_stream.direct_url, fileName, False))
return mediaList
@@ -47,18 +51,18 @@ async def GetTweetMediaUrls(tweet : Tweet):
mediaList = await GetTweetMedia(tweet)
return [media.url for media in mediaList]
async def DownloadMedia(post_id, account_id, account_name, url_list : list, session) -> list[DownloadedMedia]:
async def DownloadMedia(post_id, account_id, account_name, media_list : list[TweetMedia], session) -> list[DownloadedMedia]:
result : list[DownloadedMedia] = []
path = f"{Global_Config["x_download_path"]}{account_id}"
os.makedirs(path, exist_ok=True)
for idx, file_url in enumerate(url_list):
file_name = get_file_name(account_name, post_id, idx, file_url)
for idx, media in enumerate(media_list):
file_name = get_file_name(account_name, post_id, idx, media.url)
full_path = f"{path}/{file_name}"
photo_bytes = await downloadHelper.save_to_file(file_url, full_path, session)
photo_bytes = await downloadHelper.save_to_file(media.url, full_path, session)
result.append(DownloadedMedia(photo_bytes, file_name))
result.append(DownloadedMedia(photo_bytes, file_name, media.is_video))
return result

View File

@@ -83,8 +83,8 @@ class Commands(commands.Cog):
await discordHelper.ensure_has_channel_or_thread(artist, interaction.guild, botData.db)
image_containers : list[x_posts_images] = []
media = await tweetHelper.GetTweetMediaUrls(tweet)
image_containers = [x_posts_images(tweet.id, idx, file = url) for idx, url in enumerate(media)]
media = await tweetHelper.GetTweetMedia(tweet)
image_containers = [x_posts_images(tweet.id, idx, file = med.url) for idx, med in enumerate(media)]
async with aiohttp.ClientSession() as session:
downloaded_media = await tweetHelper.DownloadMedia(tweet.id, tweet.author.id, tweet.author.username, media, session)
@@ -94,7 +94,7 @@ class Commands(commands.Cog):
for idx, attachment in enumerate(downloaded_media):
container = image_containers[idx]
container.saved_file = attachment.file_name
container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes, botData.classifier, botData.vox)
container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes if not attachment.is_video else None, botData.classifier, botData.vox)
if container.vox_label not in vox_labels:
vox_labels.append(container.vox_label)