149 lines
5.1 KiB
Python
149 lines
5.1 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
IRC3 Bot Plugin: URL Title Fetcher
|
|
|
|
This plugin for an IRC bot fetches and displays the titles of URLs shared in IRC messages.
|
|
It uses aiohttp for asynchronous HTTP requests and lxml for HTML parsing.
|
|
|
|
Features:
|
|
- Listens for PRIVMSG events in the IRC channel.
|
|
- Extracts URLs from messages and fetches their titles.
|
|
- Posts the title and URL back to the IRC channel.
|
|
|
|
Usage:
|
|
======
|
|
To use this module, load it as a plugin in your IRC bot configuration.
|
|
|
|
Example:
|
|
@event
|
|
def on_privmsg(self, mask, event, target, data):
|
|
# Extract URLs from messages and fetch their titles.
|
|
|
|
Author: Zodiac
|
|
Date: 2025-02-13
|
|
"""
|
|
|
|
import re
|
|
import asyncio
|
|
import aiohttp
|
|
from lxml import html
|
|
import irc3
|
|
from irc3 import event
|
|
from irc3.compat import Queue
|
|
|
|
|
|
@irc3.plugin
|
|
class URLTitlePlugin:
|
|
"""
|
|
A plugin to fetch and display the titles of URLs shared in IRC messages.
|
|
|
|
Attributes:
|
|
bot (irc3.IrcBot): The IRC bot instance.
|
|
url_queue (Queue): A queue to manage URL processing asynchronously.
|
|
session (aiohttp.ClientSession): An HTTP session for making requests.
|
|
"""
|
|
|
|
def __init__(self, bot):
|
|
"""
|
|
Initialize the URLTitlePlugin.
|
|
|
|
Args:
|
|
bot (irc3.IrcBot): The IRC bot instance.
|
|
"""
|
|
self.bot = bot
|
|
self.url_queue = Queue() # Queue for managing URL processing
|
|
self.session = aiohttp.ClientSession(loop=self.bot.loop)
|
|
self.bot.create_task(self.process_urls()) # Start URL processor
|
|
|
|
@event(irc3.rfc.PRIVMSG)
|
|
async def on_privmsg(self, mask, event, target, data):
|
|
"""
|
|
Listen for PRIVMSG events and check for URLs.
|
|
|
|
Args:
|
|
mask (str): The user's mask (e.g., nick!user@host).
|
|
event (str): The IRC event type (e.g., PRIVMSG).
|
|
target (str): The target of the message (e.g., channel or user).
|
|
data (str): The content of the message.
|
|
|
|
This method extracts URLs from the message and adds them to the queue
|
|
for asynchronous processing.
|
|
"""
|
|
url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
|
|
urls = url_pattern.findall(data)
|
|
for url in urls:
|
|
# Use put_nowait to avoid blocking if the queue is full
|
|
await self.url_queue.put((url, target))
|
|
|
|
async def process_urls(self):
|
|
"""
|
|
Process URLs from the queue and fetch their titles.
|
|
|
|
This method runs indefinitely, processing one URL at a time from the queue.
|
|
It fetches the title of each URL and sends it back to the IRC channel.
|
|
"""
|
|
while True:
|
|
url, target = await self.url_queue.get()
|
|
try:
|
|
title = await self.fetch_title(url)
|
|
if title:
|
|
# Format the IRC message with colors and styles
|
|
formatted_message = (
|
|
f"\x02\x0312Title:\x03 \x034{title}\x03 \x02|\x02 "
|
|
f"\x032URL:\x03 \x0311{url}\x03"
|
|
)
|
|
await self.bot.privmsg(target, formatted_message)
|
|
else:
|
|
# Handle cases where no title is found
|
|
pass
|
|
except Exception as e:
|
|
self.bot.log.error(f"Error processing URL {url}: {e}")
|
|
finally:
|
|
self.url_queue.task_done()
|
|
|
|
async def fetch_title(self, url):
|
|
"""
|
|
Fetch the title of a web page using aiohttp and lxml.
|
|
|
|
Args:
|
|
url (str): The URL of the web page.
|
|
|
|
Returns:
|
|
str: The title of the web page, or None if it could not be fetched.
|
|
|
|
This method makes an HTTP GET request to the URL, parses the HTML content,
|
|
and extracts the title element.
|
|
"""
|
|
headers = {"User-Agent": "Mozilla/5.0"}
|
|
try:
|
|
async with self.session.get(url, headers=headers, timeout=10) as response:
|
|
# Check if the response was successful
|
|
response.raise_for_status()
|
|
content = await response.text()
|
|
tree = html.fromstring(content)
|
|
title = tree.findtext(".//title")
|
|
return title.strip() if title else "No title found"
|
|
except aiohttp.ClientError as e:
|
|
self.bot.log.error(f"HTTP error for {url}: {e}")
|
|
except asyncio.TimeoutError:
|
|
self.bot.log.error(f"Request timed out for {url}")
|
|
except Exception as e:
|
|
self.bot.log.error(f"Unexpected error for {url}: {e}")
|
|
return None
|
|
|
|
async def close(self):
|
|
"""
|
|
Clean up resources when the plugin is unloaded.
|
|
|
|
This method ensures that the aiohttp session is properly closed to avoid
|
|
resource leaks.
|
|
"""
|
|
await self.session.close()
|
|
|
|
def __del__(self):
|
|
"""
|
|
Ensure session closing when the object is destroyed.
|
|
|
|
This method schedules the session cleanup task on the bot's event loop.
|
|
"""
|
|
self.bot.create_task(self.close()) |