mirror of git://git.acid.vegas/dickserv.git
64 lines
1.7 KiB
Python
64 lines
1.7 KiB
Python
#!/usr/bin/env python
|
|
# DickServ IRC Bot - Developed by acidvegas in Python (https://acid.vegas/dickserv)
|
|
# httplib.py
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import urllib.parse
|
|
import urllib.request
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
def clean_url(url):
|
|
for prefix in ('https://', 'http://', 'www.'):
|
|
if url.startswith(prefix):
|
|
url = url[len(prefix):]
|
|
if url[-1:] == '/':
|
|
url = url[:-1]
|
|
return url
|
|
|
|
def data_quote(data):
|
|
return urllib.parse.quote(data)
|
|
|
|
def data_encode(data):
|
|
return urllib.parse.urlencode(data)
|
|
|
|
def get_file(url):
|
|
return os.path.basename(url)
|
|
|
|
def get_json(url):
|
|
return json.loads(get_source(url))
|
|
|
|
def get_size(url):
|
|
content_length = int(get_url(url).getheader('content-length'))
|
|
for unit in ('B','KB','MB','GB','TB','PB','EB','ZB'):
|
|
if abs(content_length) < 1024.0:
|
|
return '{0:.2f}'.format(content_length) + unit
|
|
content_length /= 1024.0
|
|
return '{0:.2f}'.format(content_length) + 'YB'
|
|
|
|
def get_source(url):
|
|
source = get_url(url)
|
|
charset = source.headers.get_content_charset()
|
|
if charset:
|
|
return source.read().decode(charset)
|
|
else:
|
|
return source.read().decode()
|
|
|
|
def get_title(url):
|
|
source = get_source(url)
|
|
soup = BeautifulSoup(source, 'html.parser')
|
|
return ' '.join(soup.title.string.split())
|
|
|
|
def get_type(url):
|
|
return get_url(url).info().get_content_type()
|
|
|
|
def get_url(url):
|
|
req = urllib.request.Request(url)
|
|
req.add_header('User-Agent', 'DickServ/1.0')
|
|
return urllib.request.urlopen(req, timeout=10)
|
|
|
|
def parse_urls(data):
|
|
return re.compile('(?:http[s]?:\/\/|www.)(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', re.IGNORECASE).findall(data)
|