From af260d785a8e4fb1b82ddaa9e349b5d3fe83accf Mon Sep 17 00:00:00 2001
From: Zodiac <zodiac@supernets.org>
Date: Fri, 14 Feb 2025 17:16:18 -0800
Subject: [PATCH] update for url sniffer

---
 plugins/url_title_sniffer.py | 187 +++++++++++++++++------------------
 1 file changed, 89 insertions(+), 98 deletions(-)

diff --git a/plugins/url_title_sniffer.py b/plugins/url_title_sniffer.py
index ca015b6..3939b3a 100644
--- a/plugins/url_title_sniffer.py
+++ b/plugins/url_title_sniffer.py
@@ -2,148 +2,139 @@
 """
 IRC3 Bot Plugin: URL Title Fetcher
 
-This plugin for an IRC bot fetches and displays the titles of URLs shared in IRC messages.
-It uses aiohttp for asynchronous HTTP requests and lxml for HTML parsing.
+A plugin for IRC3 bots that monitors chat messages for URLs, fetches their webpage titles, and displays them
+with formatted styling in the chat. Provides visual enhancement to URL sharing in IRC channels.
 
 Features:
-- Listens for PRIVMSG events in the IRC channel.
-- Extracts URLs from messages and fetches their titles.
-- Posts the title and URL back to the IRC channel.
+    - Asynchronous URL processing using aiohttp for efficient network operations
+    - Robust HTML parsing with lxml for accurate title extraction
+    - Configurable message styling with color and formatting options
+    - Built-in exclusion of YouTube URLs to avoid conflicts with dedicated YouTube plugins
+    - Error handling for network and parsing operations
+    - Proper resource cleanup through session management
 
-Usage:
-======
-To use this module, load it as a plugin in your IRC bot configuration.
+Dependencies:
+    - aiohttp: For asynchronous HTTP requests
+    - irc3: Core IRC bot functionality
+    - ircstyle: IRC text formatting utilities
+    - lxml: HTML parsing capabilities
 
-Example:
-    @event
-    def on_privmsg(self, mask, event, target, data):
-        # Extract URLs from messages and fetch their titles.
-
-Author: Zodiac
-Date: 2025-02-13
+Author: Zodiac (simplified by Claude)
+Date: 2025-02-14
 """
 
 import re
-import asyncio
 import aiohttp
+import ircstyle
 from lxml import html
 import irc3
 from irc3 import event
-from irc3.compat import Queue
+from plugins.services.permissions import check_ignore
 
 
 @irc3.plugin
 class URLTitlePlugin:
-    """
-    A plugin to fetch and display the titles of URLs shared in IRC messages.
+    """Plugin for fetching and displaying webpage titles from URLs shared in IRC messages.
+    
+    Monitors IRC messages for URLs, retrieves their webpage titles, and posts formatted responses
+    back to the channel. Supports styled text output with configurable formatting options.
 
     Attributes:
-        bot (irc3.IrcBot): The IRC bot instance.
-        url_queue (Queue): A queue to manage URL processing asynchronously.
-        session (aiohttp.ClientSession): An HTTP session for making requests.
+        bot (irc3.IrcBot): Reference to the main IRC bot instance
+        session (aiohttp.ClientSession): Persistent HTTP session for making web requests
+        url_pattern (re.Pattern): Compiled regex for URL detection in messages
     """
 
     def __init__(self, bot):
-        """
-        Initialize the URLTitlePlugin.
-
+        """Initialize plugin with bot instance and set up HTTP session.
+        
         Args:
-            bot (irc3.IrcBot): The IRC bot instance.
+            bot (irc3.IrcBot): The IRC bot instance this plugin will be attached to
         """
         self.bot = bot
-        self.url_queue = Queue()  # Queue for managing URL processing
+        # Create persistent HTTP session for better performance
         self.session = aiohttp.ClientSession(loop=self.bot.loop)
-        self.bot.create_task(self.process_urls())  # Start URL processor
+        # Regex pattern matches both http(s):// URLs and www. domains
+        self.url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
 
     @event(irc3.rfc.PRIVMSG)
+    @check_ignore
     async def on_privmsg(self, mask, event, target, data):
-        """
-        Listen for PRIVMSG events and check for URLs.
-
+        """Handle incoming private messages by processing any URLs they contain.
+        
         Args:
-            mask (str): The user's mask (e.g., nick!user@host).
-            event (str): The IRC event type (e.g., PRIVMSG).
-            target (str): The target of the message (e.g., channel or user).
-            data (str): The content of the message.
-
-        This method extracts URLs from the message and adds them to the queue
-        for asynchronous processing.
+            mask (str): IRC user mask of message sender
+            event (str): IRC event type
+            target (str): Channel or user the message was sent to
+            data (str): Content of the message
         """
-        url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
-        urls = url_pattern.findall(data)
+        # Extract all URLs from the message
+        urls = self.url_pattern.findall(data)
+        
         for url in urls:
-            # Use put_nowait to avoid blocking if the queue is full
-            await self.url_queue.put((url, target))
-
-    async def process_urls(self):
-        """
-        Process URLs from the queue and fetch their titles.
-
-        This method runs indefinitely, processing one URL at a time from the queue.
-        It fetches the title of each URL and sends it back to the IRC channel.
-        """
-        while True:
-            url, target = await self.url_queue.get()
+            # Skip YouTube URLs as they're typically handled by dedicated plugins
+            if "youtube.com" in url.lower() or "youtu.be" in url.lower():
+                continue
+                
             try:
                 title = await self.fetch_title(url)
                 if title:
-                    # Format the IRC message with colors and styles
-                    formatted_message = (
-                        f"\x02\x0312Title:\x03 \x034{title}\x03 \x02|\x02 "
-                        f"\x032URL:\x03 \x0311{url}\x03"
-                    )
+                    formatted_message = self.format_message(title, url)
                     await self.bot.privmsg(target, formatted_message)
-                else:
-                    # Handle cases where no title is found
-                    pass
             except Exception as e:
+                # Log errors but continue processing other URLs
                 self.bot.log.error(f"Error processing URL {url}: {e}")
-            finally:
-                self.url_queue.task_done()
+
+    def format_message(self, title, url):
+        """Create a styled IRC message containing the webpage title and source URL.
+        
+        Args:
+            title (str): The webpage title to display
+            url (str): The source URL
+            
+        Returns:
+            str: Formatted IRC message with styling applied
+        """
+        # Define styled components for the message
+        prefix = ircstyle.style("►", fg="cyan", bold=True, reset=True)
+        title_label = ircstyle.style("Title", fg="blue", bold=True, reset=True)
+        title_text = ircstyle.style(title, fg="green", italics=True, underline=True, reset=True)
+        separator = ircstyle.style("❘", fg="grey", bold=True, reset=True)
+        url_label = ircstyle.style("Source", fg="blue", bold=True, underline=True, reset=True)
+        url_text = ircstyle.style(url, fg="cyan", italics=True, reset=True)
+        suffix = ircstyle.style("◄", fg="cyan", bold=True, reset=True)
+
+        return f"{prefix} {title_label}: {title_text} {separator} {url_label}: {url_text} {suffix}"
 
     async def fetch_title(self, url):
-        """
-        Fetch the title of a web page using aiohttp and lxml.
-
+        """Retrieve the title of a webpage using asynchronous HTTP requests.
+        
         Args:
-            url (str): The URL of the web page.
-
+            url (str): The URL to fetch the title from
+            
         Returns:
-            str: The title of the web page, or None if it could not be fetched.
-
-        This method makes an HTTP GET request to the URL, parses the HTML content,
-        and extracts the title element.
+            str: The webpage title or "No title found" if title cannot be extracted
+            
+        Raises:
+            Any exceptions from aiohttp or lxml processing
         """
-        headers = {"User-Agent": "Mozilla/5.0"}
-        try:
-            async with self.session.get(url, headers=headers, timeout=10) as response:
-                # Check if the response was successful
-                response.raise_for_status()
-                content = await response.text()
-                tree = html.fromstring(content)
-                title = tree.findtext(".//title")
-                return title.strip() if title else "No title found"
-        except aiohttp.ClientError as e:
-            self.bot.log.error(f"HTTP error for {url}: {e}")
-        except asyncio.TimeoutError:
-            self.bot.log.error(f"Request timed out for {url}")
-        except Exception as e:
-            self.bot.log.error(f"Unexpected error for {url}: {e}")
-        return None
+        # Use modern browser User-Agent to avoid being blocked
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                         "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+
+        async with self.session.get(url, headers=headers, timeout=10) as response:
+            response.raise_for_status()
+            content = await response.text()
+            tree = html.fromstring(content)
+            title = tree.findtext(".//title")
+            return title.strip() if title else "No title found"
 
     async def close(self):
-        """
-        Clean up resources when the plugin is unloaded.
-
-        This method ensures that the aiohttp session is properly closed to avoid
-        resource leaks.
-        """
+        """Clean up resources by closing the HTTP session."""
         await self.session.close()
 
     def __del__(self):
-        """
-        Ensure session closing when the object is destroyed.
-
-        This method schedules the session cleanup task on the bot's event loop.
-        """
+        """Ensure proper cleanup when the plugin is destroyed."""
         self.bot.create_task(self.close())
\ No newline at end of file