g1mp/plugins/url_title_sniffer.py

149 lines
5.1 KiB
Python
Raw Normal View History

2025-02-13 06:35:15 +00:00
# -*- coding: utf-8 -*-
2025-02-13 04:55:42 +00:00
"""
2025-02-13 06:35:15 +00:00
IRC3 Bot Plugin: URL Title Fetcher
This plugin for an IRC bot fetches and displays the titles of URLs shared in IRC messages.
It uses aiohttp for asynchronous HTTP requests and lxml for HTML parsing.
Features:
- Listens for PRIVMSG events in the IRC channel.
- Extracts URLs from messages and fetches their titles.
- Posts the title and URL back to the IRC channel.
Usage:
======
To use this module, load it as a plugin in your IRC bot configuration.
Example:
@event
def on_privmsg(self, mask, event, target, data):
# Extract URLs from messages and fetch their titles.
Author: Zodiac
Date: 2025-02-13
2025-02-13 04:55:42 +00:00
"""
import re
import asyncio
import aiohttp
from lxml import html
import irc3
from irc3 import event
from irc3.compat import Queue
2025-02-13 06:35:15 +00:00
@irc3.plugin
2025-02-13 04:55:42 +00:00
class URLTitlePlugin:
"""
A plugin to fetch and display the titles of URLs shared in IRC messages.
Attributes:
bot (irc3.IrcBot): The IRC bot instance.
url_queue (Queue): A queue to manage URL processing asynchronously.
session (aiohttp.ClientSession): An HTTP session for making requests.
"""
def __init__(self, bot):
"""
Initialize the URLTitlePlugin.
Args:
bot (irc3.IrcBot): The IRC bot instance.
"""
self.bot = bot
2025-02-13 06:35:15 +00:00
self.url_queue = Queue() # Queue for managing URL processing
2025-02-13 04:55:42 +00:00
self.session = aiohttp.ClientSession(loop=self.bot.loop)
2025-02-13 06:35:15 +00:00
self.bot.create_task(self.process_urls()) # Start URL processor
2025-02-13 04:55:42 +00:00
@event(irc3.rfc.PRIVMSG)
async def on_privmsg(self, mask, event, target, data):
"""
Listen for PRIVMSG events and check for URLs.
Args:
mask (str): The user's mask (e.g., nick!user@host).
event (str): The IRC event type (e.g., PRIVMSG).
target (str): The target of the message (e.g., channel or user).
data (str): The content of the message.
This method extracts URLs from the message and adds them to the queue
for asynchronous processing.
"""
url_pattern = re.compile(r"https?://[^\s<>\"']+|www\.[^\s<>\"']+")
urls = url_pattern.findall(data)
for url in urls:
# Use put_nowait to avoid blocking if the queue is full
await self.url_queue.put((url, target))
async def process_urls(self):
"""
Process URLs from the queue and fetch their titles.
This method runs indefinitely, processing one URL at a time from the queue.
It fetches the title of each URL and sends it back to the IRC channel.
"""
while True:
url, target = await self.url_queue.get()
try:
title = await self.fetch_title(url)
if title:
# Format the IRC message with colors and styles
formatted_message = (
f"\x02\x0312Title:\x03 \x034{title}\x03 \x02|\x02 "
f"\x032URL:\x03 \x0311{url}\x03"
)
await self.bot.privmsg(target, formatted_message)
else:
2025-02-13 06:35:15 +00:00
# Handle cases where no title is found
2025-02-13 04:55:42 +00:00
pass
except Exception as e:
self.bot.log.error(f"Error processing URL {url}: {e}")
finally:
self.url_queue.task_done()
async def fetch_title(self, url):
"""
Fetch the title of a web page using aiohttp and lxml.
Args:
url (str): The URL of the web page.
Returns:
str: The title of the web page, or None if it could not be fetched.
This method makes an HTTP GET request to the URL, parses the HTML content,
and extracts the title element.
"""
headers = {"User-Agent": "Mozilla/5.0"}
try:
async with self.session.get(url, headers=headers, timeout=10) as response:
# Check if the response was successful
response.raise_for_status()
content = await response.text()
tree = html.fromstring(content)
title = tree.findtext(".//title")
return title.strip() if title else "No title found"
except aiohttp.ClientError as e:
self.bot.log.error(f"HTTP error for {url}: {e}")
except asyncio.TimeoutError:
self.bot.log.error(f"Request timed out for {url}")
except Exception as e:
self.bot.log.error(f"Unexpected error for {url}: {e}")
return None
async def close(self):
"""
Clean up resources when the plugin is unloaded.
This method ensures that the aiohttp session is properly closed to avoid
resource leaks.
"""
await self.session.close()
def __del__(self):
"""
Ensure session closing when the object is destroyed.
This method schedules the session cleanup task on the bot's event loop.
"""
self.bot.create_task(self.close())