update to use queue

This commit is contained in:
Zodiac 2025-02-14 17:48:30 -08:00
parent af260d785a
commit a5515b8fd4

View File

@ -12,6 +12,7 @@ Features:
- Built-in exclusion of YouTube URLs to avoid conflicts with dedicated YouTube plugins - Built-in exclusion of YouTube URLs to avoid conflicts with dedicated YouTube plugins
- Error handling for network and parsing operations - Error handling for network and parsing operations
- Proper resource cleanup through session management - Proper resource cleanup through session management
- Queue-based processing system for better flow control
Dependencies: Dependencies:
- aiohttp: For asynchronous HTTP requests - aiohttp: For asynchronous HTTP requests
@ -19,7 +20,7 @@ Dependencies:
- ircstyle: IRC text formatting utilities - ircstyle: IRC text formatting utilities
- lxml: HTML parsing capabilities - lxml: HTML parsing capabilities
Author: Zodiac (simplified by Claude) Author: Zodiac
Date: 2025-02-14 Date: 2025-02-14
""" """
@ -29,6 +30,7 @@ import ircstyle
from lxml import html from lxml import html
import irc3 import irc3
from irc3 import event from irc3 import event
from irc3.compat import Queue
from plugins.services.permissions import check_ignore from plugins.services.permissions import check_ignore
@ -43,59 +45,44 @@ class URLTitlePlugin:
bot (irc3.IrcBot): Reference to the main IRC bot instance bot (irc3.IrcBot): Reference to the main IRC bot instance
session (aiohttp.ClientSession): Persistent HTTP session for making web requests session (aiohttp.ClientSession): Persistent HTTP session for making web requests
url_pattern (re.Pattern): Compiled regex for URL detection in messages url_pattern (re.Pattern): Compiled regex for URL detection in messages
queue (Queue): Processing queue for URL handling tasks
""" """
def __init__(self, bot): def __init__(self, bot):
"""Initialize plugin with bot instance and set up HTTP session. """Initialize plugin with bot instance and set up components."""
Args:
bot (irc3.IrcBot): The IRC bot instance this plugin will be attached to
"""
self.bot = bot self.bot = bot
# Create persistent HTTP session for better performance
self.session = aiohttp.ClientSession(loop=self.bot.loop) self.session = aiohttp.ClientSession(loop=self.bot.loop)
# Regex pattern matches both http(s):// URLs and www. domains
self.url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+") self.url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
self.queue = Queue()
self.bot.create_task(self.process_queue())
@event(irc3.rfc.PRIVMSG) @event(irc3.rfc.PRIVMSG)
@check_ignore @check_ignore
async def on_privmsg(self, mask, event, target, data): async def on_privmsg(self, mask, event, target, data):
"""Handle incoming private messages by processing any URLs they contain. """Handle incoming messages and enqueue URLs for processing."""
Args:
mask (str): IRC user mask of message sender
event (str): IRC event type
target (str): Channel or user the message was sent to
data (str): Content of the message
"""
# Extract all URLs from the message
urls = self.url_pattern.findall(data) urls = self.url_pattern.findall(data)
for url in urls: for url in urls:
# Skip YouTube URLs as they're typically handled by dedicated plugins
if "youtube.com" in url.lower() or "youtu.be" in url.lower(): if "youtube.com" in url.lower() or "youtu.be" in url.lower():
continue continue
self.queue.put_nowait((target, url))
async def process_queue(self):
"""Process URLs from the queue asynchronously."""
while True:
target, url = await self.queue.get()
try: try:
title = await self.fetch_title(url) title = await self.fetch_title(url)
if title: if title:
formatted_message = self.format_message(title, url) formatted_message = self.format_message(title, url)
await self.bot.privmsg(target, formatted_message) await self.bot.privmsg(target, formatted_message)
except Exception as e: except Exception as e:
# Log errors but continue processing other URLs
self.bot.log.error(f"Error processing URL {url}: {e}") self.bot.log.error(f"Error processing URL {url}: {e}")
finally:
self.queue.task_done()
def format_message(self, title, url): def format_message(self, title, url):
"""Create a styled IRC message containing the webpage title and source URL. """Create a styled IRC message containing the webpage title and source URL."""
Args:
title (str): The webpage title to display
url (str): The source URL
Returns:
str: Formatted IRC message with styling applied
"""
# Define styled components for the message
prefix = ircstyle.style("", fg="cyan", bold=True, reset=True) prefix = ircstyle.style("", fg="cyan", bold=True, reset=True)
title_label = ircstyle.style("Title", fg="blue", bold=True, reset=True) title_label = ircstyle.style("Title", fg="blue", bold=True, reset=True)
title_text = ircstyle.style(title, fg="green", italics=True, underline=True, reset=True) title_text = ircstyle.style(title, fg="green", italics=True, underline=True, reset=True)
@ -107,18 +94,7 @@ class URLTitlePlugin:
return f"{prefix} {title_label}: {title_text} {separator} {url_label}: {url_text} {suffix}" return f"{prefix} {title_label}: {title_text} {separator} {url_label}: {url_text} {suffix}"
async def fetch_title(self, url): async def fetch_title(self, url):
"""Retrieve the title of a webpage using asynchronous HTTP requests. """Retrieve the title of a webpage using asynchronous HTTP requests."""
Args:
url (str): The URL to fetch the title from
Returns:
str: The webpage title or "No title found" if title cannot be extracted
Raises:
Any exceptions from aiohttp or lxml processing
"""
# Use modern browser User-Agent to avoid being blocked
headers = { headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"