130 lines
5.4 KiB
Python
130 lines
5.4 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
IRC3 Bot Plugin: URL Title Fetcher
|
|
|
|
A plugin for IRC3 bots that monitors chat messages for URLs, fetches their webpage titles, and displays them
|
|
with formatted styling in the chat. Provides visual enhancement to URL sharing in IRC channels.
|
|
|
|
Features:
|
|
- Asynchronous URL processing using aiohttp for efficient network operations
|
|
- Robust HTML parsing with lxml for accurate title extraction
|
|
- Configurable message styling with color and formatting options
|
|
- Built-in exclusion of YouTube URLs to avoid conflicts with dedicated YouTube plugins
|
|
- Error handling for network and parsing operations
|
|
- Proper resource cleanup through session management
|
|
- Queue-based processing system with strict rate limiting
|
|
|
|
Dependencies:
|
|
- aiohttp: For asynchronous HTTP requests
|
|
- irc3: Core IRC bot functionality
|
|
- ircstyle: IRC text formatting utilities
|
|
- lxml: HTML parsing capabilities
|
|
|
|
Author: Zodiac
|
|
Date: 2025-02-14
|
|
"""
|
|
|
|
import re
|
|
import time
|
|
import aiohttp
|
|
import ircstyle
|
|
from lxml import html
|
|
import irc3
|
|
from irc3 import event
|
|
from irc3.compat import Queue
|
|
from plugins.services.permissions import check_ignore
|
|
|
|
|
|
@irc3.plugin
|
|
class URLTitlePlugin:
|
|
"""Plugin for fetching and displaying webpage titles from URLs shared in IRC messages.
|
|
|
|
Monitors IRC messages for URLs, retrieves their webpage titles, and posts formatted responses
|
|
back to the channel. Supports styled text output with configurable formatting options.
|
|
|
|
Attributes:
|
|
bot (irc3.IrcBot): Reference to the main IRC bot instance
|
|
session (aiohttp.ClientSession): Persistent HTTP session for making web requests
|
|
url_pattern (re.Pattern): Compiled regex for URL detection in messages
|
|
queue (Queue): Processing queue for URL handling tasks
|
|
last_processed (float): Timestamp of last successful URL processing
|
|
"""
|
|
|
|
def __init__(self, bot):
|
|
"""Initialize plugin with bot instance and set up components."""
|
|
self.bot = bot
|
|
self.session = aiohttp.ClientSession(loop=self.bot.loop)
|
|
self.url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
|
|
self.queue = Queue()
|
|
self.last_processed = 0 # Initialize to epoch start
|
|
self.bot.create_task(self.process_queue())
|
|
|
|
@event(irc3.rfc.PRIVMSG)
|
|
@check_ignore
|
|
async def on_privmsg(self, mask, event, target, data):
|
|
"""Handle incoming messages and enqueue URLs for processing."""
|
|
urls = self.url_pattern.findall(data)
|
|
|
|
for url in urls:
|
|
if "youtube.com" in url.lower() or "youtu.be" in url.lower():
|
|
continue
|
|
self.queue.put_nowait((target, url))
|
|
|
|
async def process_queue(self):
|
|
"""Process URLs from the queue with strict 5-second cooldown between requests."""
|
|
while True:
|
|
target, url = await self.queue.get()
|
|
try:
|
|
current_time = time.time()
|
|
elapsed = current_time - self.last_processed
|
|
|
|
if elapsed < 5:
|
|
self.bot.log.info(f"Rate limited: Waiting {5 - elapsed:.1f}s to process {url}")
|
|
continue
|
|
|
|
title = await self.fetch_title(url)
|
|
if title:
|
|
formatted_message = self.format_message(title, url)
|
|
await self.bot.privmsg(target, formatted_message)
|
|
|
|
self.last_processed = time.time() # Update after successful processing
|
|
|
|
except Exception as e:
|
|
self.bot.log.error(f"Error processing URL {url}: {e}")
|
|
finally:
|
|
self.queue.task_done()
|
|
|
|
def format_message(self, title, url):
|
|
"""Create a styled IRC message containing the webpage title and source URL."""
|
|
prefix = ircstyle.style("►", fg="cyan", bold=True, reset=True)
|
|
title_label = ircstyle.style("Title", fg="blue", bold=True, reset=True)
|
|
title_text = ircstyle.style(title, fg="green", italics=True, underline=True, reset=True)
|
|
separator = ircstyle.style("❘", fg="grey", bold=True, reset=True)
|
|
url_label = ircstyle.style("Source", fg="blue", bold=True, underline=True, reset=True)
|
|
url_text = ircstyle.style(url, fg="cyan", italics=True, reset=True)
|
|
suffix = ircstyle.style("◄", fg="cyan", bold=True, reset=True)
|
|
|
|
return f"{prefix} {title_label}: {title_text} {separator} {url_label}: {url_text} {suffix}"
|
|
|
|
async def fetch_title(self, url):
|
|
"""Retrieve the title of a webpage using asynchronous HTTP requests."""
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
}
|
|
|
|
async with self.session.get(url, headers=headers, timeout=10) as response:
|
|
response.raise_for_status()
|
|
content = await response.text()
|
|
tree = html.fromstring(content)
|
|
title = tree.findtext(".//title")
|
|
return title.strip() if title else "No title found"
|
|
|
|
async def close(self):
|
|
"""Clean up resources by closing the HTTP session."""
|
|
await self.session.close()
|
|
|
|
def __del__(self):
|
|
"""Ensure proper cleanup when the plugin is destroyed."""
|
|
self.bot.create_task(self.close())
|