shellscrape renamed to shellsocked, sockhub made concurrent (props agathanonymous), no hardcoded scraping urls, read from file/url now.

This commit is contained in:
Dionysus 2023-11-08 19:16:44 -05:00
parent d25ddff0c5
commit 3d5838b0fa
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
5 changed files with 164 additions and 120 deletions

57
proxy_sources.txt Normal file
View File

@ -0,0 +1,57 @@
https://api.openproxylist.xyz/socks4.txt
https://api.openproxylist.xyz/socks5.txt
https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4
https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4
https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5
https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5
https://proxy-list.download/api/v1/get?type=socks4
https://proxy-list.download/api/v1/get?type=socks5
https://proxyscan.io/download?type=socks4
https://proxyscan.io/download?type=socks5
https://proxyspace.pro/socks4.txt
https://proxyspace.pro/socks5.txt
https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt
https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt
https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt
https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt
https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt
https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt
https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt
https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt
https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt
https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt
https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt
https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt
https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt
https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt
https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt
https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt
https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt
https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt
https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt
https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt
https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt
https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt
https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt
https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt
https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt
https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt
https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt
https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt
https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt
https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt
https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt
https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt
https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt
https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt
https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt
https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt
https://spys.me/socks.txt
https://spys.one/en/socks-proxy-list/

View File

@ -1,20 +0,0 @@
#!/bin/env bash
# shellscrape - developed by acidvegas (https://git.acid.vegas/proxytools)
URLS=(
"https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt"
"https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt"
"https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt"
"https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt"
"https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt"
"https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt"
"https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt"
)
[ -f proxies.txt ] >proxies.txt
for URL in "${URLS[@]}"; do
echo "Downloading from $URL"
curl -s $URL >> proxies.txt &
done
sort -u -o proxies.txt proxies.txt
echo "done"

34
shellsocked Executable file
View File

@ -0,0 +1,34 @@
#!/bin/env bash
# shellsocked - developed by acidvegas (https://git.acid.vegas/proxytools)
# Probably the most basic proxy scraper ever made, pure POSIX, no dependencies, no bullshit.
# Duplicate proxies are removed and the output is sorted and saved to a file.
# Feed it a single URL or a file with a list of URLs to scrape.
scrape_url() {
local url="$1"
local proxies=$(curl -s -A "ShellSocked/1.0" "$url" | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+' | awk '!seen[$0]++')
local count=$(echo "$proxies" | wc -l)
PROXIES="${PROXIES}${proxies}"
echo -e "Found \033[32m${count}\033[0m proxies on \033[33m${url}\033[0m"
}
if [ -n "$1" ]
PROXIES=""
if [ -f "$1" ]; then
while IFS= read -r url; do
scrape_url "$url"
done < "$1"
else
scrape_url "$1"
fi
else
echo "Usage: $0 <input_file | single_url>"
exit 1
fi
PROXIES=$(printf "%s\n" "$PROXIES" | sort -u)
printf "%s\n" "$PROXIES" > proxies.txt
total_count=$(echo "$PROXIES" | wc -l)
echo "Grand Total: ${total_count} proxies"

View File

@ -1,111 +1,72 @@
#!/usr/bin/env python #!/usr/bin/env python
# SockHub Proxy Scraper - Developed by acidvegas in Python (https://git.acid.vegas/proxytools) # SockHub Proxy Scraper - Developed by acidvegas in Python (https://git.acid.vegas/proxytools)
'''
There is a file in this repository called proxy_sources.txt which contains a list of URLs to scrape for proxies.
This list it not maintained and may contain dead links or links to sites that no longer contain proxies.
'''
import concurrent.futures
import logging
import os import os
import re import re
import urllib.request import urllib.request
# Can be any URL containing a list of IP:PORT proxies (does not have to be socks5) # Global
# The current list contains proxy sources that are updated frequently with new proxies
# Almost all of the Github repos pull from the same place & contain duplicates (which are removed)
urls = set((
'https://api.openproxylist.xyz/socks4.txt',
'https://api.openproxylist.xyz/socks5.txt',
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4',
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5',
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5',
'https://proxy-list.download/api/v1/get?type=socks4',
'https://proxy-list.download/api/v1/get?type=socks5',
'https://proxyscan.io/download?type=socks4',
'https://proxyscan.io/download?type=socks5',
'https://proxyspace.pro/socks4.txt',
'https://proxyspace.pro/socks5.txt',
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt',
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt',
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt',
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt',
'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt',
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt',
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt',
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt',
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt',
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt',
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt',
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt',
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt',
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt',
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt',
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt',
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt',
'https://spys.me/socks.txt',
'https://spys.one/en/socks-proxy-list/'
))
def get_source(url: str) -> str:
''' Get the source of a URL using a Googlebot user-agent. '''
req = urllib.request.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)')
source = urllib.request.urlopen(req, timeout=15)
return source.read().decode()
# Main
print('#'*56)
print('#{0}#'.format(''.center(54)))
print('#{0}#'.format('SockHub Proxy Scraper'.center(54)))
print('#{0}#'.format('Developed by acidvegas in Python'.center(54)))
print('#{0}#'.format('https://git.acid.vegas/proxytools'.center(54)))
print('#{0}#'.format(''.center(54)))
print('#'*56)
total = 0
proxies = list() proxies = list()
proxy_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'proxies.txt')
print('scanning \033[35m{0:,}\033[0m urls from list...'.format(len(urls))) def find_proxies(url: str) -> str:
for url in urls: # TODO: Maybe add concurrent.futures support for using larger lists '''
Check a URL for IP:PORT proxies.
:param url: The URL to check for proxies.
'''
try: try:
source = get_source(url) source = urllib.request.urlopen(urllib.request.Request(url, headers={'User-Agent': 'SockHub/1.0'})).read().decode()
except: if source:
print('found \033[31m0\033[0m new proxies on \033[34m{0}\033[0m \033[30m(failed to load)\033[0m'.format(url)) found = set(re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+', source, re.MULTILINE))
else: if (new_proxies := [proxy for proxy in found if proxy not in proxies]):
total+= len(source.split()) proxies += new_proxies
found = set([proxy for proxy in re.findall('[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+:[0-9]+', source, re.MULTILINE) if proxy not in proxies]) print(f'found \033[32m{len(found):,}\033[0m new proxies on \033[34m{url}\033[0m')
if found:
proxies += found
print('found \033[32m{0:,}\033[0m new proxies on \033[34m{1}\033[0m'.format(len(found), url))
else: else:
print('found \033[31m0\033[0m new proxies on \033[34m{0}\033[0m \033[30m(duplicates)\033[0m'.format(url)) logging.warning(f'found \033[31m0\033[0m new proxies on \033[34m{url}\033[0m \033[30m(source is empty)\033[0m')
if proxies: except Exception as ex:
if len(proxies) < total: logging.error(f'found \033[31m0\033[0m new proxies on \033[34m{url}\033[0m \033[30m({ex})\033[0m')
print('found \033[32m{0:,}\033[0m total proxies! \033[30m({1:,} duplicates removed)\033[0m'.format(len(proxies), total-len(proxies)))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='SockHub Proxy Scraper - Developed by acidvegas in Python (https://git.acid.vegas/proxytools)')
parser.add_argument('-i', '--input', help='input file containing a list of URLs to scrape (one per line) or a single URL')
parser.add_argument('-o', '--output', help='output file to save proxies to', default='proxies.txt')
parser.add_argument('-c', '--concurrency', help='number of concurrent threads to use (default: 10)', default=10, type=int)
args = parser.parse_args()
logging.basicConfig(format='%(level)s %(message)s', level=logging.INFO)
if not os.path.isfile(args.input):
if args.input.startswith('https://') or args.input.startswith('http://'):
logging.info('using input as a single url...')
proxy_sources = [args.input]
else:
logging.fatal('input file does not exist!')
proxy_sources = open(args.input, 'r').read().split('\n')
if not proxy_sources:
logging.fatal('proxy sources input file is empty!')
logging.debug('scanning \033[35m{len(urls):,}\033[0m urls from list...')
with concurrent.futures.ThreadPoolExecutor(max_workers=args.concurrency) as executor:
futures = [executor.submit(find_proxies, url) for url in proxy_sources]
concurrent.futures.wait(futures)
if proxies:
logging.info('found \033[32m{len(proxies):,}\033[0m total proxies!')
proxies.sort()
with open (args.output, 'w') as output_file:
for proxy in proxies:
output_file.write(proxy + '\n')
else: else:
print('found \033[32m{0:,}\033[0m total proxies!'.format(len(proxies))) logging.warning('no proxies found!')
proxies.sort()
with open (proxy_file, 'w') as proxy__file:
for proxy in proxies:
proxy__file.write(proxy + '\n')

12
test.py Normal file
View File

@ -0,0 +1,12 @@
found = dict()
data = open('ports.txt').read()
for item in data.split('\n'):
if item in found:
found[item] += 1
else:
found[item] = 1
for item in found:
if found[item] > 50:
print(item.ljust(10) + str(found[item]))