shellscrape renamed to shellsocked, sockhub made concurrent (props agathanonymous), no hardcoded scraping urls, read from file/url now.
This commit is contained in:
parent
d25ddff0c5
commit
3d5838b0fa
57
proxy_sources.txt
Normal file
57
proxy_sources.txt
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
https://api.openproxylist.xyz/socks4.txt
|
||||||
|
https://api.openproxylist.xyz/socks5.txt
|
||||||
|
https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4
|
||||||
|
https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4
|
||||||
|
https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5
|
||||||
|
https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5
|
||||||
|
https://proxy-list.download/api/v1/get?type=socks4
|
||||||
|
https://proxy-list.download/api/v1/get?type=socks5
|
||||||
|
https://proxyscan.io/download?type=socks4
|
||||||
|
https://proxyscan.io/download?type=socks5
|
||||||
|
https://proxyspace.pro/socks4.txt
|
||||||
|
https://proxyspace.pro/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt
|
||||||
|
https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt
|
||||||
|
https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt
|
||||||
|
https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt
|
||||||
|
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt
|
||||||
|
https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt
|
||||||
|
https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt
|
||||||
|
https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt
|
||||||
|
https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt
|
||||||
|
https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt
|
||||||
|
https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt
|
||||||
|
https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt
|
||||||
|
https://spys.me/socks.txt
|
||||||
|
https://spys.one/en/socks-proxy-list/
|
20
shellscrape
20
shellscrape
@ -1,20 +0,0 @@
|
|||||||
#!/bin/env bash
|
|
||||||
# shellscrape - developed by acidvegas (https://git.acid.vegas/proxytools)
|
|
||||||
|
|
||||||
URLS=(
|
|
||||||
"https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt"
|
|
||||||
"https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt"
|
|
||||||
"https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt"
|
|
||||||
"https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt"
|
|
||||||
"https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt"
|
|
||||||
"https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt"
|
|
||||||
"https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt"
|
|
||||||
)
|
|
||||||
|
|
||||||
[ -f proxies.txt ] >proxies.txt
|
|
||||||
for URL in "${URLS[@]}"; do
|
|
||||||
echo "Downloading from $URL"
|
|
||||||
curl -s $URL >> proxies.txt &
|
|
||||||
done
|
|
||||||
sort -u -o proxies.txt proxies.txt
|
|
||||||
echo "done"
|
|
34
shellsocked
Executable file
34
shellsocked
Executable file
@ -0,0 +1,34 @@
|
|||||||
|
#!/bin/env bash
|
||||||
|
# shellsocked - developed by acidvegas (https://git.acid.vegas/proxytools)
|
||||||
|
|
||||||
|
# Probably the most basic proxy scraper ever made, pure POSIX, no dependencies, no bullshit.
|
||||||
|
# Duplicate proxies are removed and the output is sorted and saved to a file.
|
||||||
|
# Feed it a single URL or a file with a list of URLs to scrape.
|
||||||
|
|
||||||
|
scrape_url() {
|
||||||
|
local url="$1"
|
||||||
|
local proxies=$(curl -s -A "ShellSocked/1.0" "$url" | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+' | awk '!seen[$0]++')
|
||||||
|
local count=$(echo "$proxies" | wc -l)
|
||||||
|
PROXIES="${PROXIES}${proxies}"
|
||||||
|
echo -e "Found \033[32m${count}\033[0m proxies on \033[33m${url}\033[0m"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ -n "$1" ]
|
||||||
|
PROXIES=""
|
||||||
|
if [ -f "$1" ]; then
|
||||||
|
while IFS= read -r url; do
|
||||||
|
scrape_url "$url"
|
||||||
|
done < "$1"
|
||||||
|
else
|
||||||
|
scrape_url "$1"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Usage: $0 <input_file | single_url>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PROXIES=$(printf "%s\n" "$PROXIES" | sort -u)
|
||||||
|
printf "%s\n" "$PROXIES" > proxies.txt
|
||||||
|
|
||||||
|
total_count=$(echo "$PROXIES" | wc -l)
|
||||||
|
echo "Grand Total: ${total_count} proxies"
|
161
sockhub.py
161
sockhub.py
@ -1,111 +1,72 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# SockHub Proxy Scraper - Developed by acidvegas in Python (https://git.acid.vegas/proxytools)
|
# SockHub Proxy Scraper - Developed by acidvegas in Python (https://git.acid.vegas/proxytools)
|
||||||
|
|
||||||
|
'''
|
||||||
|
There is a file in this repository called proxy_sources.txt which contains a list of URLs to scrape for proxies.
|
||||||
|
This list it not maintained and may contain dead links or links to sites that no longer contain proxies.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
# Can be any URL containing a list of IP:PORT proxies (does not have to be socks5)
|
# Global
|
||||||
# The current list contains proxy sources that are updated frequently with new proxies
|
|
||||||
# Almost all of the Github repos pull from the same place & contain duplicates (which are removed)
|
|
||||||
urls = set((
|
|
||||||
'https://api.openproxylist.xyz/socks4.txt',
|
|
||||||
'https://api.openproxylist.xyz/socks5.txt',
|
|
||||||
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
|
|
||||||
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4',
|
|
||||||
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5',
|
|
||||||
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5',
|
|
||||||
'https://proxy-list.download/api/v1/get?type=socks4',
|
|
||||||
'https://proxy-list.download/api/v1/get?type=socks5',
|
|
||||||
'https://proxyscan.io/download?type=socks4',
|
|
||||||
'https://proxyscan.io/download?type=socks5',
|
|
||||||
'https://proxyspace.pro/socks4.txt',
|
|
||||||
'https://proxyspace.pro/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt',
|
|
||||||
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt',
|
|
||||||
'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
|
|
||||||
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt',
|
|
||||||
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
|
|
||||||
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
|
|
||||||
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
|
|
||||||
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt',
|
|
||||||
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt',
|
|
||||||
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt',
|
|
||||||
'https://spys.me/socks.txt',
|
|
||||||
'https://spys.one/en/socks-proxy-list/'
|
|
||||||
))
|
|
||||||
|
|
||||||
def get_source(url: str) -> str:
|
|
||||||
''' Get the source of a URL using a Googlebot user-agent. '''
|
|
||||||
req = urllib.request.Request(url)
|
|
||||||
req.add_header('User-Agent', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')
|
|
||||||
source = urllib.request.urlopen(req, timeout=15)
|
|
||||||
return source.read().decode()
|
|
||||||
|
|
||||||
# Main
|
|
||||||
print('#'*56)
|
|
||||||
print('#{0}#'.format(''.center(54)))
|
|
||||||
print('#{0}#'.format('SockHub Proxy Scraper'.center(54)))
|
|
||||||
print('#{0}#'.format('Developed by acidvegas in Python'.center(54)))
|
|
||||||
print('#{0}#'.format('https://git.acid.vegas/proxytools'.center(54)))
|
|
||||||
print('#{0}#'.format(''.center(54)))
|
|
||||||
print('#'*56)
|
|
||||||
total = 0
|
|
||||||
proxies = list()
|
proxies = list()
|
||||||
proxy_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'proxies.txt')
|
|
||||||
print('scanning \033[35m{0:,}\033[0m urls from list...'.format(len(urls)))
|
def find_proxies(url: str) -> str:
|
||||||
for url in urls: # TODO: Maybe add concurrent.futures support for using larger lists
|
'''
|
||||||
|
Check a URL for IP:PORT proxies.
|
||||||
|
|
||||||
|
:param url: The URL to check for proxies.
|
||||||
|
'''
|
||||||
try:
|
try:
|
||||||
source = get_source(url)
|
source = urllib.request.urlopen(urllib.request.Request(url, headers={'User-Agent': 'SockHub/1.0'})).read().decode()
|
||||||
except:
|
if source:
|
||||||
print('found \033[31m0\033[0m new proxies on \033[34m{0}\033[0m \033[30m(failed to load)\033[0m'.format(url))
|
found = set(re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+', source, re.MULTILINE))
|
||||||
else:
|
if (new_proxies := [proxy for proxy in found if proxy not in proxies]):
|
||||||
total+= len(source.split())
|
proxies += new_proxies
|
||||||
found = set([proxy for proxy in re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+', source, re.MULTILINE) if proxy not in proxies])
|
print(f'found \033[32m{len(found):,}\033[0m new proxies on \033[34m{url}\033[0m')
|
||||||
if found:
|
|
||||||
proxies += found
|
|
||||||
print('found \033[32m{0:,}\033[0m new proxies on \033[34m{1}\033[0m'.format(len(found), url))
|
|
||||||
else:
|
else:
|
||||||
print('found \033[31m0\033[0m new proxies on \033[34m{0}\033[0m \033[30m(duplicates)\033[0m'.format(url))
|
logging.warning(f'found \033[31m0\033[0m new proxies on \033[34m{url}\033[0m \033[30m(source is empty)\033[0m')
|
||||||
if proxies:
|
except Exception as ex:
|
||||||
if len(proxies) < total:
|
logging.error(f'found \033[31m0\033[0m new proxies on \033[34m{url}\033[0m \033[30m({ex})\033[0m')
|
||||||
print('found \033[32m{0:,}\033[0m total proxies! \033[30m({1:,} duplicates removed)\033[0m'.format(len(proxies), total-len(proxies)))
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description='SockHub Proxy Scraper - Developed by acidvegas in Python (https://git.acid.vegas/proxytools)')
|
||||||
|
parser.add_argument('-i', '--input', help='input file containing a list of URLs to scrape (one per line) or a single URL')
|
||||||
|
parser.add_argument('-o', '--output', help='output file to save proxies to', default='proxies.txt')
|
||||||
|
parser.add_argument('-c', '--concurrency', help='number of concurrent threads to use (default: 10)', default=10, type=int)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(format='%(level)s %(message)s', level=logging.INFO)
|
||||||
|
|
||||||
|
if not os.path.isfile(args.input):
|
||||||
|
if args.input.startswith('https://') or args.input.startswith('http://'):
|
||||||
|
logging.info('using input as a single url...')
|
||||||
|
proxy_sources = [args.input]
|
||||||
|
else:
|
||||||
|
logging.fatal('input file does not exist!')
|
||||||
|
|
||||||
|
proxy_sources = open(args.input, 'r').read().split('\n')
|
||||||
|
|
||||||
|
if not proxy_sources:
|
||||||
|
logging.fatal('proxy sources input file is empty!')
|
||||||
|
|
||||||
|
logging.debug('scanning \033[35m{len(urls):,}\033[0m urls from list...')
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=args.concurrency) as executor:
|
||||||
|
futures = [executor.submit(find_proxies, url) for url in proxy_sources]
|
||||||
|
concurrent.futures.wait(futures)
|
||||||
|
|
||||||
|
if proxies:
|
||||||
|
logging.info('found \033[32m{len(proxies):,}\033[0m total proxies!')
|
||||||
|
proxies.sort()
|
||||||
|
with open (args.output, 'w') as output_file:
|
||||||
|
for proxy in proxies:
|
||||||
|
output_file.write(proxy + '\n')
|
||||||
else:
|
else:
|
||||||
print('found \033[32m{0:,}\033[0m total proxies!'.format(len(proxies)))
|
logging.warning('no proxies found!')
|
||||||
proxies.sort()
|
|
||||||
with open (proxy_file, 'w') as proxy__file:
|
|
||||||
for proxy in proxies:
|
|
||||||
proxy__file.write(proxy + '\n')
|
|
Loading…
Reference in New Issue
Block a user