update to use queue

2025-02-14 17:48:30 -08:00 · 2025-02-14 17:48:30 -08:00 · a5515b8fd4
commit a5515b8fd4
parent af260d785a
1 changed files with 18 additions and 42 deletions
--- a/plugins/url_title_sniffer.py
+++ b/plugins/url_title_sniffer.py
@ -12,6 +12,7 @@ Features:
    - Built-in exclusion of YouTube URLs to avoid conflicts with dedicated YouTube plugins
    - Error handling for network and parsing operations
    - Proper resource cleanup through session management
+    - Queue-based processing system for better flow control

 Dependencies:
    - aiohttp: For asynchronous HTTP requests
@ -19,7 +20,7 @@ Dependencies:
    - ircstyle: IRC text formatting utilities
    - lxml: HTML parsing capabilities

-Author: Zodiac (simplified by Claude)
+Author: Zodiac
 Date: 2025-02-14
 """

@ -29,6 +30,7 @@ import ircstyle
 from lxml import html
 import irc3
 from irc3 import event
+from irc3.compat import Queue
 from plugins.services.permissions import check_ignore


@ -43,59 +45,44 @@ class URLTitlePlugin:
        bot (irc3.IrcBot): Reference to the main IRC bot instance
        session (aiohttp.ClientSession): Persistent HTTP session for making web requests
        url_pattern (re.Pattern): Compiled regex for URL detection in messages
+        queue (Queue): Processing queue for URL handling tasks
    """

    def __init__(self, bot):
-        """Initialize plugin with bot instance and set up HTTP session.
-        
-        Args:
-            bot (irc3.IrcBot): The IRC bot instance this plugin will be attached to
-        """
+        """Initialize plugin with bot instance and set up components."""
        self.bot = bot
-        # Create persistent HTTP session for better performance
        self.session = aiohttp.ClientSession(loop=self.bot.loop)
-        # Regex pattern matches both http(s):// URLs and www. domains
        self.url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
+        self.queue = Queue()
+        self.bot.create_task(self.process_queue())

    @event(irc3.rfc.PRIVMSG)
    @check_ignore
    async def on_privmsg(self, mask, event, target, data):
-        """Handle incoming private messages by processing any URLs they contain.
-        
-        Args:
-            mask (str): IRC user mask of message sender
-            event (str): IRC event type
-            target (str): Channel or user the message was sent to
-            data (str): Content of the message
-        """
-        # Extract all URLs from the message
+        """Handle incoming messages and enqueue URLs for processing."""
        urls = self.url_pattern.findall(data)
        
        for url in urls:
-            # Skip YouTube URLs as they're typically handled by dedicated plugins
            if "youtube.com" in url.lower() or "youtu.be" in url.lower():
                continue
-                
+            self.queue.put_nowait((target, url))
+
+    async def process_queue(self):
+        """Process URLs from the queue asynchronously."""
+        while True:
+            target, url = await self.queue.get()
            try:
                title = await self.fetch_title(url)
                if title:
                    formatted_message = self.format_message(title, url)
                    await self.bot.privmsg(target, formatted_message)
            except Exception as e:
-                # Log errors but continue processing other URLs
                self.bot.log.error(f"Error processing URL {url}: {e}")
+            finally:
+                self.queue.task_done()

    def format_message(self, title, url):
-        """Create a styled IRC message containing the webpage title and source URL.
-        
-        Args:
-            title (str): The webpage title to display
-            url (str): The source URL
-            
-        Returns:
-            str: Formatted IRC message with styling applied
-        """
-        # Define styled components for the message
+        """Create a styled IRC message containing the webpage title and source URL."""
        prefix = ircstyle.style("►", fg="cyan", bold=True, reset=True)
        title_label = ircstyle.style("Title", fg="blue", bold=True, reset=True)
        title_text = ircstyle.style(title, fg="green", italics=True, underline=True, reset=True)
@ -107,18 +94,7 @@ class URLTitlePlugin:
        return f"{prefix} {title_label}: {title_text} {separator} {url_label}: {url_text} {suffix}"

    async def fetch_title(self, url):
-        """Retrieve the title of a webpage using asynchronous HTTP requests.
-        
-        Args:
-            url (str): The URL to fetch the title from
-            
-        Returns:
-            str: The webpage title or "No title found" if title cannot be extracted
-            
-        Raises:
-            Any exceptions from aiohttp or lxml processing
-        """
-        # Use modern browser User-Agent to avoid being blocked
+        """Retrieve the title of a webpage using asynchronous HTTP requests."""
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                         "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"