zmap-mark-ii/lib/constraint.c
2024-02-21 18:23:18 -06:00

417 lines
12 KiB
C

/*
* Copyright 2021 Regents of the University of Michigan
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "../lib/constraint.h"
#include "../lib/logger.h"
#include "../lib/xalloc.h"
//
// Efficient address-space constraints (AH 7/2013)
//
// This module uses a tree-based representation to efficiently
// manipulate and query constraints on the address space to be
// scanned. It provides a value for every IP address, and these
// values are applied by setting them for network prefixes. Order
// matters: setting a value replaces any existing value for that
// prefix or subsets of it. We use this to implement network
// allowlisting and blocklisting.
//
// Think of setting values in this structure like painting
// subnets with different colors. We can paint subnets black to
// exclude them and white to allow them. Only the top color shows.
// This makes for potentially very powerful constraint specifications.
//
// Internally, this is implemented using a binary tree, where each
// node corresponds to a network prefix. (E.g., the root is
// 0.0.0.0/0, and its children, if present, are 0.0.0.0/1 and
// 128.0.0.0/1.) Each leaf of the tree stores the value that applies
// to every address within the leaf's portion of the prefix space.
//
// As an optimization, after all values are set, we look up the
// value or subtree for every /16 prefix and cache them as an array.
// This lets subsequent lookups bypass the bottom half of the tree.
//
/*
* Constraint Copyright 2013 Regents of the University of Michigan
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*/
typedef struct node {
struct node *l;
struct node *r;
value_t value;
uint64_t count;
} node_t;
// As an optimization, we precompute lookups for every prefix of this
// length:
#define RADIX_LENGTH 20
struct _constraint {
node_t *root; // root node of the tree
uint32_t *radix; // array of prefixes (/RADIX_LENGTH) that are painted
// paint_value
size_t radix_len; // number of prefixes in radix array
int painted; // have we precomputed counts for each node?
value_t paint_value; // value for which we precomputed counts
};
// Tree operations respect the invariant that every node that isn't a
// leaf has exactly two children.
#define IS_LEAF(node) ((node)->l == NULL)
// Allocate a new leaf with the given value
static node_t *_create_leaf(value_t value)
{
node_t *node = xmalloc(sizeof(node_t));
node->l = NULL;
node->r = NULL;
node->value = value;
return node;
}
// Free the subtree rooted at node.
static void _destroy_subtree(node_t *node)
{
if (node == NULL)
return;
_destroy_subtree(node->l);
_destroy_subtree(node->r);
free(node);
}
// Convert from an internal node to a leaf.
static void _convert_to_leaf(node_t *node)
{
assert(node);
assert(!IS_LEAF(node));
_destroy_subtree(node->l);
_destroy_subtree(node->r);
node->l = NULL;
node->r = NULL;
}
// Recursive function to set value for a given network prefix within
// the tree. (Note: prefix must be in host byte order.)
static void _set_recurse(node_t *node, uint32_t prefix, int len, value_t value)
{
assert(node);
assert(0 <= len && len <= 256);
if (len == 0) {
// We're at the end of the prefix; make this a leaf and set the
// value.
if (!IS_LEAF(node)) {
_convert_to_leaf(node);
}
node->value = value;
return;
}
if (IS_LEAF(node)) {
// We're not at the end of the prefix, but we hit a leaf.
if (node->value == value) {
// A larger prefix has the same value, so we're done.
return;
}
// The larger prefix has a different value, so we need to
// convert it into an internal node and continue processing on
// one of the leaves.
node->l = _create_leaf(node->value);
node->r = _create_leaf(node->value);
}
// We're not at the end of the prefix, and we're at an internal
// node. Recurse on the left or right subtree.
if (prefix & 0x80000000) {
_set_recurse(node->r, prefix << 1, len - 1, value);
} else {
_set_recurse(node->l, prefix << 1, len - 1, value);
}
// At this point, we're an internal node, and the value is set
// by one of our children or its descendent. If both children are
// leaves with the same value, we can discard them and become a left.
if (IS_LEAF(node->r) && IS_LEAF(node->l) &&
node->r->value == node->l->value) {
node->value = node->l->value;
_convert_to_leaf(node);
}
}
// Set the value for a given network prefix, overwriting any existing
// values on that prefix or subsets of it.
// (Note: prefix must be in host byte order.)
void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value)
{
assert(con);
_set_recurse(con->root, prefix, len, value);
con->painted = 0;
}
// Return the value pertaining to an address, according to the tree
// starting at given root. (Note: address must be in host byte order.)
static int _lookup_ip(node_t *root, uint32_t address)
{
assert(root);
node_t *node = root;
uint32_t mask = 0x80000000;
for (;;) {
if (IS_LEAF(node)) {
return node->value;
}
if (address & mask) {
node = node->r;
} else {
node = node->l;
}
mask >>= 1;
}
}
// Return the value pertaining to an address.
// (Note: address must be in host byte order.)
value_t constraint_lookup_ip(constraint_t *con, uint32_t address)
{
assert(con);
return _lookup_ip(con->root, address);
}
// Return the nth painted IP address.
static int _lookup_index(node_t *root, uint64_t n)
{
assert(root);
node_t *node = root;
uint32_t ip = 0;
uint32_t mask = 0x80000000;
for (;;) {
if (IS_LEAF(node)) {
return ip | n;
}
if (n < node->l->count) {
node = node->l;
} else {
n -= node->l->count;
node = node->r;
ip |= mask;
}
mask >>= 1;
}
}
// For a given value, return the IP address with zero-based index n.
// (i.e., if there are three addresses with value 0xFF, looking up index 1
// will return the second one).
// Note that the tree must have been previously painted with this value.
uint32_t constraint_lookup_index(constraint_t *con, uint64_t index,
value_t value)
{
assert(con);
if (!con->painted || con->paint_value != value) {
constraint_paint_value(con, value);
}
uint64_t radix_idx = index / (1 << (32 - RADIX_LENGTH));
if (radix_idx < con->radix_len) {
// Radix lookup
uint32_t radix_offset =
index % (1 << (32 - RADIX_LENGTH)); // TODO: bitwise maths
return con->radix[radix_idx] | radix_offset;
}
// Otherwise, do the "slow" lookup in tree.
// Note that tree counts do NOT include things in the radix,
// so we subtract these off here.
index -= con->radix_len * (1 << (32 - RADIX_LENGTH));
assert(index < con->root->count);
return _lookup_index(con->root, index);
}
// Implement count_ips by recursing on halves of the tree. Size represents
// the number of addresses in a prefix at the current level of the tree.
// If paint is specified, each node will have its count set to the number of
// leaves under it set to value.
// If exclude_radix is specified, the number of addresses will exclude prefixes
// that are a /RADIX_LENGTH or larger
static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size,
int paint, int exclude_radix)
{
assert(node);
uint64_t n;
if (IS_LEAF(node)) {
if (node->value == value) {
n = size;
// Exclude prefixes already included in the radix
if (exclude_radix &&
size >= (1 << (32 - RADIX_LENGTH))) {
n = 0;
}
} else {
n = 0;
}
} else {
n = _count_ips_recurse(node->l, value, size >> 1, paint,
exclude_radix) +
_count_ips_recurse(node->r, value, size >> 1, paint,
exclude_radix);
}
if (paint) {
node->count = n;
}
return n;
}
// Return a node that determines the values for the addresses with
// the given prefix. This is either the internal node that
// corresponds to the end of the prefix or a leaf node that
// encompasses the prefix. (Note: prefix must be in host byte order.)
static node_t *_lookup_node(node_t *root, uint32_t prefix, int len)
{
assert(root);
assert(0 <= len && len <= 32);
node_t *node = root;
uint32_t mask = 0x80000000;
int i;
for (i = 0; i < len; i++) {
if (IS_LEAF(node)) {
return node;
}
if (prefix & mask) {
node = node->r;
} else {
node = node->l;
}
mask >>= 1;
}
return node;
}
// For each node, precompute the count of leaves beneath it set to value.
// Note that the tree can be painted for only one value at a time.
void constraint_paint_value(constraint_t *con, value_t value)
{
assert(con);
log_debug("constraint", "Painting value %lu", value);
// Paint everything except what we will put in radix
_count_ips_recurse(con->root, value, (uint64_t)1 << 32, 1, 1);
// Fill in the radix array with a list of addresses
uint32_t i;
con->radix_len = 0;
for (i = 0; i < (1 << RADIX_LENGTH); i++) {
uint32_t prefix = i << (32 - RADIX_LENGTH);
node_t *node = _lookup_node(con->root, prefix, RADIX_LENGTH);
if (IS_LEAF(node) && node->value == value) {
// Add this prefix to the radix
con->radix[con->radix_len++] = prefix;
}
}
log_debug("constraint", "%lu IPs in radix array, %lu IPs in tree",
con->radix_len * (1 << (32 - RADIX_LENGTH)),
con->root->count);
con->painted = 1;
con->paint_value = value;
}
// Return the number of addresses that have a given value.
uint64_t constraint_count_ips(constraint_t *con, value_t value)
{
assert(con);
if (con->painted && con->paint_value == value) {
return con->root->count +
con->radix_len * (1 << (32 - RADIX_LENGTH));
} else {
return _count_ips_recurse(con->root, value, (uint64_t)1 << 32,
0, 0);
}
}
// Initialize the tree.
// All addresses will initially have the given value.
constraint_t *constraint_init(value_t value)
{
constraint_t *con = xmalloc(sizeof(constraint_t));
con->root = _create_leaf(value);
con->radix = xcalloc(sizeof(uint32_t), 1 << RADIX_LENGTH);
con->painted = 0;
return con;
}
// Deinitialize and free the tree.
void constraint_free(constraint_t *con)
{
assert(con);
log_debug("constraint", "Cleaning up");
_destroy_subtree(con->root);
free(con->radix);
free(con);
}
/*
int main(void)
{
log_init(stderr, LOG_DEBUG);
constraint_t *con = constraint_init(0);
constraint_set(con, ntohl(inet_addr("128.128.0.0")), 1, 22);
constraint_set(con, ntohl(inet_addr("128.128.0.0")), 1, 1);
constraint_set(con, ntohl(inet_addr("128.0.0.0")), 1, 1);
constraint_set(con, ntohl(inet_addr("10.0.0.0")), 24, 1);
constraint_set(con, ntohl(inet_addr("10.0.0.0")), 24, 0);
constraint_set(con, ntohl(inet_addr("10.11.12.0")), 24, 1);
constraint_set(con, ntohl(inet_addr("141.212.0.0")), 16, 0);
for (int x=1; x < 2; x++) {
if (x == 1) {
constraint_optimize(con);
}
printf("count(0)=%ld\n", constraint_count_ips(con, 0));
printf("count(1)=%ld\n", constraint_count_ips(con, 1));
printf("%d\n",
constraint_lookup_ip(con,ntohl(inet_addr("10.11.12.0"))));
assert(constraint_count_ips(con, 0) + constraint_count_ips(con,
1) == (uint64_t)1 << 32);
uint32_t i=0, count=0;
do {
if (constraint_lookup_ip(con, i))
count++;
} while (++i != 0);
printf("derived count(1)=%u\n", count);
}
constraint_free(con);
}
*/