zmap-mark-ii/src/fieldset.c

/*
 * ZMap Copyright 2013 Regents of the University of Michigan
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy
 * of the License at http://www.apache.org/licenses/LICENSE-2.0
 */

#include "fieldset.h"

#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wundef"
#include <unistr.h>
#pragma GCC diagnostic pop

#include "../lib/logger.h"
#include "../lib/xalloc.h"

void gen_fielddef_set(fielddefset_t *fds, fielddef_t fs[], int len)
{
	if (fds->len + len > MAX_FIELDS) {
		log_fatal("fieldset", "out of room in field def set");
	}
	fielddef_t *open = &(fds->fielddefs[fds->len]);
	memcpy(open, fs, len * sizeof(fielddef_t));
	fds->len += len;
}

fieldset_t *fs_new_fieldset(fielddefset_t *fds)
{
	fieldset_t *f = xcalloc(1, sizeof(fieldset_t));
	f->len = 0;
	f->type = FS_FIELDSET;
	f->fds = fds;
	return f;
}

fieldset_t *fs_new_repeated_field(int type, int free_)
{
	fieldset_t *f = xcalloc(1, sizeof(fieldset_t));
	f->len = 0;
	f->type = FS_REPEATED;
	f->inner_type = type;
	f->free_ = free_;
	return f;
}

fieldset_t *fs_new_repeated_uint64(void)
{
	return fs_new_repeated_field(FS_UINT64, 0);
}

fieldset_t *fs_new_repeated_bool(void)
{
	return fs_new_repeated_field(FS_BOOL, 0);
}

fieldset_t *fs_new_repeated_string(int free_)
{
	return fs_new_repeated_field(FS_STRING, free_);
}

fieldset_t *fs_new_repeated_binary(int free_)
{
	return fs_new_repeated_field(FS_BINARY, free_);
}

fieldset_t *fs_new_repeated_fieldset(void)
{
	return fs_new_repeated_field(FS_FIELDSET, 0);
}

static inline void fs_add_word(fieldset_t *fs, const char *name, int type,
			       int free_, size_t len, field_val_t value)
{
	if (fs->len + 1 >= MAX_FIELDS) {
		log_fatal("fieldset", "out of room in fieldset");
	}
	if (fs->type == FS_REPEATED && fs->inner_type != type) {
		log_fatal(
		    "fieldset",
		    "object added to repeated field does not match type of repeated field.");
	}
	field_t *f = &(fs->fields[fs->len]);
	// if we have a fieldset definition, then we can validate that the name
	// of the field is as expected
	if (fs->fds && strcmp(fs->fds->fielddefs[fs->len].name, name)) {
		log_fatal("fieldset",
			  "added field (%s) is not next expected field (%s).",
			  name, fs->fds->fielddefs[fs->len].name);
	}

	fs->len++;
	f->type = type;
	f->name = name;
	f->len = len;
	f->value = value;
	f->free_ = free_;
}

static void fs_modify_word(fieldset_t *fs, const char *name, int type,
			   int free_, size_t len, field_val_t value)
{
	for (int i = 0; i < fs->len; i++) {
		if (!strcmp(fs->fields[i].name, name)) {
			if (fs->fields[i].free_) {
				free(fs->fields[i].value.ptr);
				fs->fields[i].value.ptr = NULL;
			}
			fs->fields[i].type = type;
			fs->fields[i].free_ = free_;
			fs->fields[i].len = len;
			fs->fields[i].value = value;
			return;
		}
	}
	// TODO(ZD): We need to test, but this is really unsafe to just add because it
	// will all but guarantee that it's in the wrong place
	//fs_add_word(fs, name, type, free_, len, value);
	log_fatal("fs", "attempting to modify non-existent field");
}

static char *sanitize_utf8(const char *buf)
{
	const char *ptr = buf;

	// Count how many errors we encounter
	uint32_t i = 0;
	// Upper bounds to ensure termination even if u8_check is unsafe
	while (i < strlen(buf) && ptr < buf + strlen(buf)) {
		ptr = (char *)u8_check((uint8_t *)ptr, strlen(ptr));
		if (ptr == NULL) {
			break;
		}

		assert(ptr >= buf);
		assert(ptr < buf + strlen(buf));

		ptr++;
		i++;
	}

	// i is the total number of errors. We need 2 extra bytes for each rune
	char *safe_buf = xmalloc(strlen(buf) + i * 2 + 1);
	char *safe_ptr = NULL;
	memcpy(safe_buf, buf, strlen(buf));

	// Fix exactly i errors
	for (uint32_t j = 0; j < i; j++) {
		// Always operate on the working buffer
		safe_ptr =
		    (char *)u8_check((uint8_t *)safe_buf, strlen(safe_buf));

		// This implies we had less errors than we should.
		// This is temporary debug code.
		if (safe_ptr == NULL) {
			log_warn(
			    "fieldset",
			    "UTF8 Sanitization issue. %u errors, fell through iter %u. Orig: %s new: %s",
			    i, j, buf, safe_buf);
			i = j;
			break;
		}

		// XXX Uncomment when we remove above log_warn.
		// assert(safe_ptr != NULL);
		assert(safe_ptr >= safe_buf);
		assert(safe_ptr < safe_buf + strlen(safe_buf));

		// Shift the rest of the string by 2 bytes
		if (strlen(safe_ptr) > 1) {
			memcpy(safe_ptr + 3, safe_ptr + 1,
			       strlen(safe_ptr + 1));
		}

		// UTF8 replacement rune
		safe_ptr[0] = (char)0xef;
		safe_ptr[1] = (char)0xbf;
		safe_ptr[2] = (char)0xbd;
	}

	// We now have a valid utf8 string
	assert(u8_check((uint8_t *)safe_buf, strlen(safe_buf)) == NULL);
	// We should be null terminated
	assert(safe_buf[strlen(buf) + i * 2] == '\0');
	// We should be the right length
	assert(strlen(safe_buf) == (strlen(buf) + i * 2));

	return safe_buf;
}

void fs_add_null(fieldset_t *fs, const char *name)
{
	field_val_t val = {.ptr = NULL};
	fs_add_word(fs, name, FS_NULL, 0, 0, val);
}

void fs_add_string(fieldset_t *fs, const char *name, char *value, int free_)
{
	field_val_t val = {.ptr = value};
	fs_add_word(fs, name, FS_STRING, free_, strlen(value), val);
}

void fs_add_unsafe_string(fieldset_t *fs, const char *name, char *value,
			  int free_)
{
	if (u8_check((uint8_t *)value, strlen(value)) == NULL) {
		field_val_t val = {.ptr = value};
		fs_add_word(fs, name, FS_STRING, free_, strlen(value), val);
	} else {
		char *safe_value = sanitize_utf8(value);

		if (free_) {
			free(value);
		}

		field_val_t val = {.ptr = safe_value};
		fs_add_word(fs, name, FS_STRING, 1, strlen(safe_value), val);
	}
}

void fs_chkadd_string(fieldset_t *fs, const char *name, char *value, int free_)
{
	if (value) {
		fs_add_string(fs, name, value, free_);
	} else {
		fs_add_null(fs, name);
	}
}

void fs_chkadd_unsafe_string(fieldset_t *fs, const char *name, char *value,
			     int free_)
{
	if (value) {
		fs_add_unsafe_string(fs, name, value, free_);
	} else {
		fs_add_null(fs, name);
	}
}

void fs_add_constchar(fieldset_t *fs, const char *name, const char *value)
{
	field_val_t val = {.ptr = (char *)value};
	fs_add_word(fs, name, FS_STRING, 0, strlen(value), val);
}

void fs_add_uint64(fieldset_t *fs, const char *name, uint64_t value)
{
	field_val_t val = {.num = value};
	fs_add_word(fs, name, FS_UINT64, 0, sizeof(uint64_t), val);
}

void fs_add_bool(fieldset_t *fs, const char *name, int value)
{
	field_val_t val = {.num = value};
	fs_add_word(fs, name, FS_BOOL, 0, sizeof(int), val);
}

void fs_add_binary(fieldset_t *fs, const char *name, size_t len, void *value,
		   int free_)
{
	field_val_t val = {.ptr = value};
	fs_add_word(fs, name, FS_BINARY, free_, len, val);
}

void fs_add_fieldset(fieldset_t *fs, const char *name, fieldset_t *child)
{
	field_val_t val = {.ptr = child};
	fs_add_word(fs, name, FS_FIELDSET, 1, sizeof(void *), val);
}

void fs_add_repeated(fieldset_t *fs, const char *name, fieldset_t *child)
{
	field_val_t val = {.ptr = child};
	fs_add_word(fs, name, FS_REPEATED, 1, sizeof(void *), val);
}

// Modify
void fs_modify_null(fieldset_t *fs, const char *name)
{
	field_val_t val = {.ptr = NULL};
	fs_modify_word(fs, name, FS_NULL, 0, 0, val);
}

void fs_modify_string(fieldset_t *fs, const char *name, char *value, int free_)
{
	field_val_t val = {.ptr = value};
	fs_modify_word(fs, name, FS_STRING, free_, strlen(value), val);
}

void fs_modify_constchar(fieldset_t *fs, const char *name, const char *value)
{
	field_val_t val = {.ptr = (char *)value};
	fs_modify_word(fs, name, FS_STRING, 0, strlen(value), val);
}

void fs_modify_uint64(fieldset_t *fs, const char *name, uint64_t value)
{
	field_val_t val = {.num = value};
	fs_modify_word(fs, name, FS_UINT64, 0, sizeof(uint64_t), val);
}

void fs_modify_bool(fieldset_t *fs, const char *name, int value)
{
	field_val_t val = {.num = value};
	fs_modify_word(fs, name, FS_BOOL, 0, sizeof(int), val);
}

void fs_modify_binary(fieldset_t *fs, const char *name, size_t len, void *value,
		      int free_)
{
	field_val_t val = {.ptr = value};
	fs_modify_word(fs, name, FS_BINARY, free_, len, val);
}

uint64_t fs_get_uint64_by_index(fieldset_t *fs, int index)
{
	return (uint64_t)fs->fields[index].value.num;
}

char *fs_get_string_by_index(fieldset_t *fs, int index)
{
	return (char *)fs->fields[index].value.ptr;
}

int fds_get_index_by_name(fielddefset_t *fds, const char *name)
{
	for (int i = 0; i < fds->len; i++) {
		if (!strcmp(fds->fielddefs[i].name, name)) {
			return i;
		}
	}
	return -1;
}

void field_free(field_t *f)
{
	if (f->type == FS_FIELDSET || f->type == FS_REPEATED) {
		fs_free((fieldset_t *)f->value.ptr);
	} else if (f->free_) {
		free(f->value.ptr);
	}
}

void fs_free(fieldset_t *fs)
{
	if (!fs) {
		return;
	}
	for (int i = 0; i < fs->len; i++) {
		field_t *f = &(fs->fields[i]);
		field_free(f);
	}
	free(fs);
}

void fs_generate_fieldset_translation(translation_t *t, fielddefset_t *avail,
				      const char **req, int reqlen)
{
	memset(t, 0, sizeof(translation_t));
	if (!t) {
		log_fatal("fieldset",
			  "unable to allocate memory for translation");
	}
	for (int i = 0; i < reqlen; i++) {
		int l = fds_get_index_by_name(avail, req[i]);
		if (l < 0) {
			log_fatal("fieldset",
				  "specified field (%s) not "
				  "available in selected "
				  "probe module.",
				  req[i]);
		}
		t->translation[t->len++] = l;
	}
}

void fs_generate_full_fieldset_translation(translation_t *t,
					   fielddefset_t *avail)
{
	memset(t, 0, sizeof(translation_t));
	if (!t) {
		log_fatal("fieldset",
			  "unable to allocate memory for translation");
	}
	t->len = avail->len;
	for (int i = 0; i < avail->len; i++) {
		t->translation[i] = i;
	}
}

fieldset_t *translate_fieldset(fieldset_t *fs, translation_t *t)
{
	fieldset_t *retv = fs_new_fieldset(NULL);
	if (!retv) {
		log_fatal("fieldset",
			  "unable to allocate space for translated field set");
	}
	for (int i = 0; i < t->len; i++) {
		int o = t->translation[i];
		memcpy(&(retv->fields[i]), &(fs->fields[o]), sizeof(field_t));
	}
	retv->len = t->len;
	return retv;
}