fixed memory leak related to video files

2026-03-29 13:31:27 +02:00
parent 5662f079eb
commit 4cdccb7227
8 changed files with 36 additions and 22 deletions
--- a/Classifier/classifyHelper.py
+++ b/Classifier/classifyHelper.py
@@ -15,7 +15,7 @@ async def classify_all(photo_bytes, wd_classifier: WDClassifier, vox_classifier
    phash = None
    dhash = None
    error_id = ErrorID.SUCCESS
-    if not is_filtered:
+    if not is_filtered: #won't execute if photo bytes is None (is video)
        rating, tags, filtered_tags = await wd_classifier.classify_async(photo_bytes, tag_threshold=0.6)
        tags = list(tags.keys())
--- a/Classifier/havoxClassifier.py
+++ b/Classifier/havoxClassifier.py
@@ -4,6 +4,8 @@ from io import BytesIO
 from concurrent.futures import ThreadPoolExecutor
 from transformers import ViTForImageClassification, ViTImageProcessor
 from Database.x_classes import HavoxLabel
 class VoxClassifier():
    def __init__(self):
        self.feature_extractor = ViTImageProcessor.from_pretrained('Classifier/HAV0X/')
@@ -21,9 +23,13 @@ class VoxClassifier():
                    return self.model.config.id2label[predicted_class_idx]
        except Exception as ex:
            print(ex)
-            return "Rejected"
+            return HavoxLabel.Rejected
    async def classify_async(self, image_bytes):
        if not image_bytes:
            #mainly for video files
            print("image bytes was null")
            return HavoxLabel.Rejected
        with ThreadPoolExecutor() as executor:
            future = executor.submit(self.classify, image_bytes)
--- a/Classifier/wdClassifier.py
+++ b/Classifier/wdClassifier.py
@@ -59,6 +59,10 @@ class WDClassifier():
            return "", {}, {}
    async def classify_async(self, image_bytes, max_count = -1, tag_threshold = -1):
        if not image_bytes:
            #mainly for video files
            print("image bytes was null")
            return "", {}, {}
        with ThreadPoolExecutor() as executor:
            future = executor.submit(self.classify, image_bytes, max_count, tag_threshold)
--- a/Discord/discordHelper.py
+++ b/Discord/discordHelper.py
@@ -63,10 +63,10 @@ def build_secondary_embed(main_post : Message, handle : str, post : Tweet):
    embed.set_author(name=handle, url=main_post.jump_url, icon_url=post.author.profile_image_url_https)
    return embeds
-async def send_error(ex : Exception, botData : RuntimeBotData):
+async def send_error(ex : str, botData : RuntimeBotData):
    print(ex)
    errors_channel = nextcord.utils.get(botData.client.guilds[0].channels, name="bot-status")
-    await errors_channel.send(content=str(ex))
+    await errors_channel.send(content=ex[0:512])
 def get_secondary_channel(is_animated, is_filtered, rating, tags : list, artist : x_accounts, guild : nextcord.Guild):
    if is_animated:
--- a/Pixiv/downloader.py
+++ b/Pixiv/downloader.py
@@ -33,5 +33,5 @@ async def download_loop(botData: RuntimeBotData):
    except Exception as ex:
        print(ex)
-        await discordHelper.send_error(traceback.format_exc()[0:256], botData)
+        await discordHelper.send_error(str(ex) + " " + traceback.format_exc(), botData)
    print("Pixiv done")
--- a/Twitter/downloader.py
+++ b/Twitter/downloader.py
@@ -68,7 +68,7 @@ async def download_loop(botData: RuntimeBotData):
        await discordHelper.post_result(results, guild, botData.new_accounts)
    except Exception as ex:
        print(ex)
-        await discordHelper.send_error(traceback.format_exc()[0:256], botData)
+        await discordHelper.send_error(str(ex) + " " + traceback.format_exc(), botData)
 async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotData):
    x_post = x_posts(id = tweet.id, account_id = tweet.author.id, date = tweet.date, text = tweet.text)
@@ -79,8 +79,8 @@ async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotDat
        return
    print("New media post:", str(tweet.url))
-    media = await tweetHelper.GetTweetMediaUrls(tweet)
+    media = await tweetHelper.GetTweetMedia(tweet)
-    image_containers = [x_posts_images(tweet.id, idx, file = url) for idx, url in enumerate(media)]
+    image_containers = [x_posts_images(tweet.id, idx, file = med.url) for idx, med in enumerate(media)]
    try:
        async with aiohttp.ClientSession() as session:
@@ -104,7 +104,7 @@ async def download_post(artist: x_accounts, tweet: Tweet, botData: RuntimeBotDat
    for idx, attachment in enumerate(downloaded_media):
        container = image_containers[idx]
        container.saved_file = attachment.file_name
-        container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes, botData.classifier, botData.vox)
+        container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes if not attachment.is_video else None, botData.classifier, botData.vox)
        if container.vox_label not in vox_labels:
            vox_labels.append(container.vox_label)
--- a/Twitter/tweetHelper.py
+++ b/Twitter/tweetHelper.py
@@ -15,18 +15,22 @@ if TYPE_CHECKING:
 class TweetMedia:
    url : str
    file_name : str
    is_video: bool
-    def __init__(self, url, file_name):
+    def __init__(self, url, file_name, is_video: bool):
        self.url = url
        self.file_name = file_name
        self.is_video = is_video
 class DownloadedMedia:
-    file_bytes : str
+    file_bytes : bytes
    file_name : str
    is_video: bool
-    def __init__(self, bytes, file_name):
+    def __init__(self, bytes, file_name, is_video: bool):
        self.file_bytes = bytes
        self.file_name = file_name
        self.is_video = is_video
 async def GetTweetMedia(tweet : Tweet) -> list[TweetMedia]:
    mediaList : list[TweetMedia] = []
@@ -34,12 +38,12 @@ async def GetTweetMedia(tweet : Tweet) -> list[TweetMedia]:
        if media.file_format == 'mp4':
            best_stream = await media.best_stream()
            fileName    = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{media.file_format}"
-            mediaList.append(TweetMedia(best_stream.direct_url, fileName))
+            mediaList.append(TweetMedia(best_stream.direct_url, fileName, True))
        else:
            best_stream = await media.best_stream()
            extension   = best_stream.file_format
            fileName    = f"{tweet.author.screen_name}_{tweet.id}_{idx}.{extension}"
-            mediaList.append(TweetMedia(best_stream.direct_url, fileName))
+            mediaList.append(TweetMedia(best_stream.direct_url, fileName, False))
    return mediaList
@@ -47,18 +51,18 @@ async def GetTweetMediaUrls(tweet : Tweet):
    mediaList = await GetTweetMedia(tweet)
    return [media.url for media in mediaList]
-async def DownloadMedia(post_id, account_id, account_name, url_list : list, session) -> list[DownloadedMedia]:
+async def DownloadMedia(post_id, account_id, account_name, media_list : list[TweetMedia], session) -> list[DownloadedMedia]:
    result : list[DownloadedMedia] = []
    path = f"{Global_Config["x_download_path"]}{account_id}"
    os.makedirs(path, exist_ok=True)
-    for idx, file_url in enumerate(url_list):
+    for idx, media in enumerate(media_list):
-        file_name = get_file_name(account_name, post_id, idx, file_url)
+        file_name = get_file_name(account_name, post_id, idx, media.url)
        full_path = f"{path}/{file_name}"
-        photo_bytes = await downloadHelper.save_to_file(file_url, full_path, session)
+        photo_bytes = await downloadHelper.save_to_file(media.url, full_path, session)
-        result.append(DownloadedMedia(photo_bytes, file_name))
+        result.append(DownloadedMedia(photo_bytes, file_name, media.is_video))
    return result
--- a/commands.py
+++ b/commands.py
@@ -83,8 +83,8 @@ class Commands(commands.Cog):
                await discordHelper.ensure_has_channel_or_thread(artist, interaction.guild, botData.db)
                image_containers : list[x_posts_images] = []
-                media = await tweetHelper.GetTweetMediaUrls(tweet)
+                media = await tweetHelper.GetTweetMedia(tweet)
-                image_containers = [x_posts_images(tweet.id, idx, file = url) for idx, url in enumerate(media)]
+                image_containers = [x_posts_images(tweet.id, idx, file = med.url) for idx, med in enumerate(media)]
                async with aiohttp.ClientSession() as session:
                    downloaded_media = await tweetHelper.DownloadMedia(tweet.id, tweet.author.id, tweet.author.username, media, session)
@@ -94,7 +94,7 @@ class Commands(commands.Cog):
                for idx, attachment in enumerate(downloaded_media):
                    container = image_containers[idx]
                    container.saved_file = attachment.file_name
-                    container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes, botData.classifier, botData.vox)
+                    container.vox_label, container.rating, container.tags, filtered_tags, container.phash, container.dhash, container.error_id = await classify_all(attachment.file_bytes if not attachment.is_video else None, botData.classifier, botData.vox)
                    if container.vox_label not in vox_labels:
                        vox_labels.append(container.vox_label)