g1mp/plugins/url_title_sniffer.py

140 lines
5.7 KiB
Python
Raw Normal View History

2025-02-13 06:35:15 +00:00
# -*- coding: utf-8 -*-
2025-02-13 04:55:42 +00:00
"""
2025-02-13 06:35:15 +00:00
IRC3 Bot Plugin: URL Title Fetcher
2025-02-15 01:16:18 +00:00
A plugin for IRC3 bots that monitors chat messages for URLs, fetches their webpage titles, and displays them
with formatted styling in the chat. Provides visual enhancement to URL sharing in IRC channels.
2025-02-13 06:35:15 +00:00
Features:
2025-02-15 01:16:18 +00:00
- Asynchronous URL processing using aiohttp for efficient network operations
- Robust HTML parsing with lxml for accurate title extraction
- Configurable message styling with color and formatting options
- Built-in exclusion of YouTube URLs to avoid conflicts with dedicated YouTube plugins
- Error handling for network and parsing operations
- Proper resource cleanup through session management
Dependencies:
- aiohttp: For asynchronous HTTP requests
- irc3: Core IRC bot functionality
- ircstyle: IRC text formatting utilities
- lxml: HTML parsing capabilities
Author: Zodiac (simplified by Claude)
Date: 2025-02-14
2025-02-13 04:55:42 +00:00
"""
import re
import aiohttp
2025-02-15 01:16:18 +00:00
import ircstyle
2025-02-13 04:55:42 +00:00
from lxml import html
import irc3
from irc3 import event
2025-02-15 01:16:18 +00:00
from plugins.services.permissions import check_ignore
2025-02-13 04:55:42 +00:00
2025-02-13 06:35:15 +00:00
@irc3.plugin
2025-02-13 04:55:42 +00:00
class URLTitlePlugin:
2025-02-15 01:16:18 +00:00
"""Plugin for fetching and displaying webpage titles from URLs shared in IRC messages.
Monitors IRC messages for URLs, retrieves their webpage titles, and posts formatted responses
back to the channel. Supports styled text output with configurable formatting options.
2025-02-13 04:55:42 +00:00
Attributes:
2025-02-15 01:16:18 +00:00
bot (irc3.IrcBot): Reference to the main IRC bot instance
session (aiohttp.ClientSession): Persistent HTTP session for making web requests
url_pattern (re.Pattern): Compiled regex for URL detection in messages
2025-02-13 04:55:42 +00:00
"""
def __init__(self, bot):
2025-02-15 01:16:18 +00:00
"""Initialize plugin with bot instance and set up HTTP session.
2025-02-13 04:55:42 +00:00
Args:
2025-02-15 01:16:18 +00:00
bot (irc3.IrcBot): The IRC bot instance this plugin will be attached to
2025-02-13 04:55:42 +00:00
"""
self.bot = bot
2025-02-15 01:16:18 +00:00
# Create persistent HTTP session for better performance
2025-02-13 04:55:42 +00:00
self.session = aiohttp.ClientSession(loop=self.bot.loop)
2025-02-15 01:16:18 +00:00
# Regex pattern matches both http(s):// URLs and www. domains
self.url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
2025-02-13 04:55:42 +00:00
@event(irc3.rfc.PRIVMSG)
2025-02-15 01:16:18 +00:00
@check_ignore
2025-02-13 04:55:42 +00:00
async def on_privmsg(self, mask, event, target, data):
2025-02-15 01:16:18 +00:00
"""Handle incoming private messages by processing any URLs they contain.
2025-02-13 04:55:42 +00:00
Args:
2025-02-15 01:16:18 +00:00
mask (str): IRC user mask of message sender
event (str): IRC event type
target (str): Channel or user the message was sent to
data (str): Content of the message
2025-02-13 04:55:42 +00:00
"""
2025-02-15 01:16:18 +00:00
# Extract all URLs from the message
urls = self.url_pattern.findall(data)
2025-02-13 04:55:42 +00:00
for url in urls:
2025-02-15 01:16:18 +00:00
# Skip YouTube URLs as they're typically handled by dedicated plugins
if "youtube.com" in url.lower() or "youtu.be" in url.lower():
continue
2025-02-13 04:55:42 +00:00
try:
title = await self.fetch_title(url)
if title:
2025-02-15 01:16:18 +00:00
formatted_message = self.format_message(title, url)
2025-02-13 04:55:42 +00:00
await self.bot.privmsg(target, formatted_message)
except Exception as e:
2025-02-15 01:16:18 +00:00
# Log errors but continue processing other URLs
2025-02-13 04:55:42 +00:00
self.bot.log.error(f"Error processing URL {url}: {e}")
2025-02-15 01:16:18 +00:00
def format_message(self, title, url):
"""Create a styled IRC message containing the webpage title and source URL.
Args:
title (str): The webpage title to display
url (str): The source URL
Returns:
str: Formatted IRC message with styling applied
2025-02-13 04:55:42 +00:00
"""
2025-02-15 01:16:18 +00:00
# Define styled components for the message
prefix = ircstyle.style("", fg="cyan", bold=True, reset=True)
title_label = ircstyle.style("Title", fg="blue", bold=True, reset=True)
title_text = ircstyle.style(title, fg="green", italics=True, underline=True, reset=True)
separator = ircstyle.style("", fg="grey", bold=True, reset=True)
url_label = ircstyle.style("Source", fg="blue", bold=True, underline=True, reset=True)
url_text = ircstyle.style(url, fg="cyan", italics=True, reset=True)
suffix = ircstyle.style("", fg="cyan", bold=True, reset=True)
2025-02-13 04:55:42 +00:00
2025-02-15 01:16:18 +00:00
return f"{prefix} {title_label}: {title_text} {separator} {url_label}: {url_text} {suffix}"
2025-02-13 04:55:42 +00:00
2025-02-15 01:16:18 +00:00
async def fetch_title(self, url):
"""Retrieve the title of a webpage using asynchronous HTTP requests.
Args:
url (str): The URL to fetch the title from
2025-02-13 04:55:42 +00:00
Returns:
2025-02-15 01:16:18 +00:00
str: The webpage title or "No title found" if title cannot be extracted
Raises:
Any exceptions from aiohttp or lxml processing
2025-02-13 04:55:42 +00:00
"""
2025-02-15 01:16:18 +00:00
# Use modern browser User-Agent to avoid being blocked
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
async with self.session.get(url, headers=headers, timeout=10) as response:
response.raise_for_status()
content = await response.text()
tree = html.fromstring(content)
title = tree.findtext(".//title")
return title.strip() if title else "No title found"
2025-02-13 04:55:42 +00:00
async def close(self):
2025-02-15 01:16:18 +00:00
"""Clean up resources by closing the HTTP session."""
2025-02-13 04:55:42 +00:00
await self.session.close()
def __del__(self):
2025-02-15 01:16:18 +00:00
"""Ensure proper cleanup when the plugin is destroyed."""
2025-02-13 04:55:42 +00:00
self.bot.create_task(self.close())