diff --git a/plugins/url_title_sniffer.py b/plugins/url_title_sniffer.py index 3939b3a..a1d9f14 100644 --- a/plugins/url_title_sniffer.py +++ b/plugins/url_title_sniffer.py @@ -12,6 +12,7 @@ Features: - Built-in exclusion of YouTube URLs to avoid conflicts with dedicated YouTube plugins - Error handling for network and parsing operations - Proper resource cleanup through session management + - Queue-based processing system for better flow control Dependencies: - aiohttp: For asynchronous HTTP requests @@ -19,7 +20,7 @@ Dependencies: - ircstyle: IRC text formatting utilities - lxml: HTML parsing capabilities -Author: Zodiac (simplified by Claude) +Author: Zodiac Date: 2025-02-14 """ @@ -29,6 +30,7 @@ import ircstyle from lxml import html import irc3 from irc3 import event +from irc3.compat import Queue from plugins.services.permissions import check_ignore @@ -43,59 +45,44 @@ class URLTitlePlugin: bot (irc3.IrcBot): Reference to the main IRC bot instance session (aiohttp.ClientSession): Persistent HTTP session for making web requests url_pattern (re.Pattern): Compiled regex for URL detection in messages + queue (Queue): Processing queue for URL handling tasks """ def __init__(self, bot): - """Initialize plugin with bot instance and set up HTTP session. - - Args: - bot (irc3.IrcBot): The IRC bot instance this plugin will be attached to - """ + """Initialize plugin with bot instance and set up components.""" self.bot = bot - # Create persistent HTTP session for better performance self.session = aiohttp.ClientSession(loop=self.bot.loop) - # Regex pattern matches both http(s):// URLs and www. domains self.url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+") + self.queue = Queue() + self.bot.create_task(self.process_queue()) @event(irc3.rfc.PRIVMSG) @check_ignore async def on_privmsg(self, mask, event, target, data): - """Handle incoming private messages by processing any URLs they contain. - - Args: - mask (str): IRC user mask of message sender - event (str): IRC event type - target (str): Channel or user the message was sent to - data (str): Content of the message - """ - # Extract all URLs from the message + """Handle incoming messages and enqueue URLs for processing.""" urls = self.url_pattern.findall(data) for url in urls: - # Skip YouTube URLs as they're typically handled by dedicated plugins if "youtube.com" in url.lower() or "youtu.be" in url.lower(): continue - + self.queue.put_nowait((target, url)) + + async def process_queue(self): + """Process URLs from the queue asynchronously.""" + while True: + target, url = await self.queue.get() try: title = await self.fetch_title(url) if title: formatted_message = self.format_message(title, url) await self.bot.privmsg(target, formatted_message) except Exception as e: - # Log errors but continue processing other URLs self.bot.log.error(f"Error processing URL {url}: {e}") + finally: + self.queue.task_done() def format_message(self, title, url): - """Create a styled IRC message containing the webpage title and source URL. - - Args: - title (str): The webpage title to display - url (str): The source URL - - Returns: - str: Formatted IRC message with styling applied - """ - # Define styled components for the message + """Create a styled IRC message containing the webpage title and source URL.""" prefix = ircstyle.style("►", fg="cyan", bold=True, reset=True) title_label = ircstyle.style("Title", fg="blue", bold=True, reset=True) title_text = ircstyle.style(title, fg="green", italics=True, underline=True, reset=True) @@ -107,18 +94,7 @@ class URLTitlePlugin: return f"{prefix} {title_label}: {title_text} {separator} {url_label}: {url_text} {suffix}" async def fetch_title(self, url): - """Retrieve the title of a webpage using asynchronous HTTP requests. - - Args: - url (str): The URL to fetch the title from - - Returns: - str: The webpage title or "No title found" if title cannot be extracted - - Raises: - Any exceptions from aiohttp or lxml processing - """ - # Use modern browser User-Agent to avoid being blocked + """Retrieve the title of a webpage using asynchronous HTTP requests.""" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"