Initial commit
This commit is contained in:
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
.env
|
||||
__pycache__
|
||||
logs/*
|
||||
*.log
|
||||
.log.
|
||||
assets/fcc_data/*
|
||||
venv/
|
||||
14
LICENSE
Normal file
14
LICENSE
Normal file
@@ -0,0 +1,14 @@
|
||||
Copyright (c) 2025, acidvegas acid.vegas@acid.vegas
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any purpose
|
||||
with or without fee is hereby granted, provided that the above copyright notice
|
||||
and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
||||
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||
THIS SOFTWARE.
|
||||
|
||||
72
README.md
Normal file
72
README.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# FCC Database Dump
|
||||
|
||||
A simple Python tool to download and archive FCC ULS (Universal Licensing System) database files for radio services.
|
||||
|
||||
This downloads weekly database dumps from the FCC's public database for various radio services including amateur radio, GMRS, land mobile, cellular, microwave, and more. The tool handles automatic extraction of zip files and organizes everything into a clean directory structure.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
- Python 3.7+
|
||||
- aiohttp
|
||||
- beautifulsoup4
|
||||
- tqdm
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Download (5 concurrent downloads)
|
||||
```bash
|
||||
python download.py
|
||||
```
|
||||
|
||||
### List Available Databases
|
||||
```bash
|
||||
python download.py --list
|
||||
```
|
||||
|
||||
### Custom Concurrency
|
||||
```bash
|
||||
python download.py -c 10
|
||||
```
|
||||
|
||||
### Debug Mode
|
||||
```bash
|
||||
python download.py --debug
|
||||
```
|
||||
|
||||
## Command-Line Arguments
|
||||
|
||||
| Argument | Short | Type | Default | Description |
|
||||
|----------|-------|------|---------|-------------|
|
||||
| `--list` | `-l` | flag | - | List available databases without downloading |
|
||||
| `--concurrency` | `-c` | int | 5 | Maximum number of concurrent downloads |
|
||||
| `--debug` | `-d` | flag | - | Enable debug logging output |
|
||||
|
||||
## Output Structure
|
||||
|
||||
Downloaded files are saved to `assets/fcc_data/YYYY-MM-DD/` with the following structure:
|
||||
|
||||
```
|
||||
assets/fcc_data/2025-11-26/
|
||||
├── amateur_radio_service/
|
||||
│ ├── applications/
|
||||
│ │ ├── AM.dat
|
||||
│ │ ├── CO.dat
|
||||
│ │ ├── EN.dat
|
||||
│ │ └── ...
|
||||
│ └── licenses/
|
||||
│ ├── AM.dat
|
||||
│ ├── HD.dat
|
||||
│ └── ...
|
||||
├── gmrs/
|
||||
│ ├── applications/
|
||||
│ └── licenses/
|
||||
├── cellular_47_cfr_part_22/
|
||||
└── ...
|
||||
```
|
||||
|
||||
Each category (e.g., `amateur_radio_service`) contains subdirectories for different file types (e.g., `applications`, `licenses`). The downloaded zip files are automatically extracted and the original archives are removed to save space.
|
||||
File diff suppressed because it is too large
Load Diff
3010
assets/definitions/struct.md
Normal file
3010
assets/definitions/struct.md
Normal file
File diff suppressed because it is too large
Load Diff
210
fcc-uls-database-dumper.py
Normal file
210
fcc-uls-database-dumper.py
Normal file
@@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env python
|
||||
# FCC Universal Licensing System Database Dumper - Developed by acidvegas (https://github.com/acidvegas/fcc-uls-database-dumper)
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
|
||||
from datetime import datetime
|
||||
from urllib.parse import urljoin
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
except ImportError:
|
||||
raise ImportError('missing \'aiohttp\' library (pip install aiohttp)')
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
raise ImportError('missing \'beautifulsoup4\' library (pip install beautifulsoup4)')
|
||||
|
||||
try:
|
||||
from tqdm.asyncio import tqdm
|
||||
except ImportError:
|
||||
raise ImportError('missing \'tqdm\' library (pip install tqdm)')
|
||||
|
||||
|
||||
# HTTP headers for all requests
|
||||
HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'DNT': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
}
|
||||
|
||||
|
||||
async def fetch_weekly_databases() -> dict:
|
||||
'''
|
||||
Fetch weekly database URLs from the FCC website
|
||||
|
||||
:return: dictionary of categories and their download URLs
|
||||
'''
|
||||
|
||||
base_url = 'https://www.fcc.gov'
|
||||
page_url = 'https://www.fcc.gov/uls/transactions/daily-weekly'
|
||||
|
||||
try:
|
||||
timeout = aiohttp.ClientTimeout(total=60)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.get(page_url, headers=HEADERS) as response:
|
||||
response.raise_for_status()
|
||||
html = await response.text()
|
||||
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
weekly_table = soup.find('table', class_='uls-transaction-weekly')
|
||||
if not weekly_table:
|
||||
raise Exception('Could not find weekly databases table on FCC website')
|
||||
|
||||
urls = {}
|
||||
current_category = None
|
||||
|
||||
for row in weekly_table.find_all('tr'):
|
||||
if 'group-header' in row.get('class', []):
|
||||
category_name = row.find('th').text.strip()
|
||||
category_key = re.sub(r'[^a-z0-9]+', '_', category_name.lower()).strip('_')
|
||||
current_category = category_key
|
||||
urls[current_category] = {}
|
||||
elif current_category:
|
||||
link = row.find('a')
|
||||
if link and link.get('href'):
|
||||
file_type = link.text.strip()
|
||||
file_key = re.sub(r'[^a-z0-9]+', '_', file_type.lower()).strip('_')
|
||||
url = urljoin(base_url, link['href'])
|
||||
urls[current_category][file_key] = url
|
||||
|
||||
if not urls:
|
||||
raise Exception('No URLs found on FCC website')
|
||||
|
||||
logging.info(f'Fetched {len(urls)} categories from FCC website')
|
||||
return urls
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f'Failed to fetch URLs from website: {e}')
|
||||
raise
|
||||
|
||||
|
||||
async def download_file(session: aiohttp.ClientSession, category: str, file_type: str, url: str, base_dir: str, pbar: tqdm, semaphore: asyncio.Semaphore):
|
||||
'''
|
||||
Download a single file from the given URL and extract if needed
|
||||
|
||||
:param session: aiohttp client session
|
||||
:param category: category name for the file
|
||||
:param file_type: type of file (applications, licenses, etc.)
|
||||
:param url: URL of the file to download
|
||||
:param base_dir: base directory for all downloads
|
||||
:param pbar: progress bar instance
|
||||
:param semaphore: semaphore to limit concurrent downloads
|
||||
'''
|
||||
|
||||
async with semaphore:
|
||||
filename = os.path.basename(url)
|
||||
category_dir = os.path.join(base_dir, category)
|
||||
type_dir = os.path.join(category_dir, file_type)
|
||||
is_zip = filename.endswith('.zip')
|
||||
file_path = os.path.join(category_dir if is_zip else type_dir, filename)
|
||||
|
||||
os.makedirs(type_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
async with session.get(url, headers=HEADERS, timeout=aiohttp.ClientTimeout(total=600)) as response:
|
||||
response.raise_for_status()
|
||||
with open(file_path, 'wb') as f:
|
||||
while True:
|
||||
chunk = await response.content.read(8192)
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
|
||||
if is_zip:
|
||||
try:
|
||||
with zipfile.ZipFile(file_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(type_dir)
|
||||
except zipfile.BadZipFile:
|
||||
logging.error(f'Corrupted zip file for {category}/{file_type} ({filename})')
|
||||
raise
|
||||
finally:
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
|
||||
pbar.set_description(f'Downloaded {category}/{file_type}')
|
||||
pbar.update(1)
|
||||
except Exception as e:
|
||||
logging.error(f'Failed to download/extract {category}/{file_type} ({filename}): {e}')
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
pbar.update(1)
|
||||
|
||||
|
||||
async def main(concurrency: int = 5):
|
||||
'''
|
||||
Main function to download all FCC database files
|
||||
|
||||
:param concurrency: maximum number of concurrent downloads
|
||||
'''
|
||||
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
base_dir = f'assets/fcc_data/{date_str}'
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
|
||||
logging.info('Fetching weekly database URLs from FCC website...')
|
||||
urls = await fetch_weekly_databases()
|
||||
|
||||
total_files = sum(len(files) for files in urls.values())
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
logging.info(f'Starting download of {total_files} files with {concurrency} concurrent downloads')
|
||||
|
||||
with tqdm(total=total_files, desc='Downloading FCC databases', unit='file') as pbar:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = []
|
||||
for category, files in urls.items():
|
||||
for file_type, url in files.items():
|
||||
tasks.append(download_file(session, category, file_type, url, base_dir, pbar, semaphore))
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
print(f'\nAll {total_files} files downloaded and extracted to {base_dir}')
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Download FCC ULS database files')
|
||||
parser.add_argument('-l', '--list', action='store_true', help='List available databases without downloading')
|
||||
parser.add_argument('-c', '--concurrency', type=int, default=5, help='Maximum concurrent downloads (default: 5)')
|
||||
parser.add_argument('-d', '--debug', action='store_true', help='Enable debug logging')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
log_level = logging.DEBUG if args.debug else logging.INFO
|
||||
logging.basicConfig(
|
||||
level = log_level,
|
||||
format = '%(asctime)s - %(levelname)s - %(message)s',
|
||||
datefmt = '%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
|
||||
if args.list:
|
||||
print('Fetching weekly database URLs from FCC website...\n')
|
||||
|
||||
urls = asyncio.run(fetch_weekly_databases())
|
||||
|
||||
total_files = 0
|
||||
for category, files in urls.items():
|
||||
print(f'\n{category}:')
|
||||
for file_type, url in files.items():
|
||||
print(f' {file_type}: {url}')
|
||||
total_files += 1
|
||||
|
||||
print(f'\n\nTotal: {total_files} files found across {len(urls)} categories')
|
||||
else:
|
||||
asyncio.run(main(args.concurrency))
|
||||
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
aiohttp
|
||||
beautifulsoup4
|
||||
tqdm
|
||||
|
||||
Reference in New Issue
Block a user