/*
* ipq berkeley db daemon emitting verdicts - ibd-judge
* written by ale in milano on 10 sep 2008

Copyright (C) 2008-2023 Alessandro Vesely

This file is part of Ipqbdb.

Ipqbdb is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Ipqbdb is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Ipqbdb.  If not, see <http://www.gnu.org/licenses/>.

*/
// #define _GNU_SOURCE 1  needed?
#include <errno.h>
#include <ctype.h>
#include <limits.h>
#include <netinet/in.h>

#include <libmnl/libmnl.h>
#include <linux/netfilter.h> // NF_DROP, NF_ACCEPT
// #include <linux/netfilter/nfnetlink.h> not necessary

// #include <linux/types.h> not necessary
// #include <linux/netfilter/nfnetlink_queue.h> not necessary
#include <libnetfilter_queue/libnetfilter_queue.h> // nfq_* functions
#include <linux/netfilter/nfnetlink_conntrack.h>  // CTA_MARK

// Berkeley DB v4.4
#include <db.h>

#include <popt.h>

// format of data packet
#include "config_names.h"
#include "dbstruct.h"

#include "percent_prob.h"
#include "rehabilitated_prob.h"
#include "in_range_ip6.h"

#include <assert.h>

static app_private ap;
static char const err_prefix[] = "ibd-judge";
#include "setsig_func.h"

// visible for TESTjudge
int verbose = -1;

// options for netfilter libraries
static int netlink_bufsize, netlink_no_enobufs;

// #if !defined NDEBUG
#define TRACE(v, x...) \
	while (ap.mode == 0 && verbose >= (v)) \
		{ fprintf(stderr, ## x); break; }
// #else
// #define TRACE(x...)
// #endif


typedef struct queue_descr
{
	// values set by parse_queue_descr
	uint32_t mark_value;  // host order
	unsigned int queue_maxlen;
	uint16_t queue_num;
	unsigned char dest;
	unsigned char mark;
	unsigned char nu[4];
} queue_descr;


static int parse_queue_descr(char const *descr, queue_descr* out)
{
	queue_descr q;
	int ch, rtc = 0, source = 0;

	memset(&q, 0, sizeof q);

	while (rtc == 0 && isalpha(ch = *(unsigned char const*)descr++))
	{
		char *t = NULL;
		unsigned long l;
		switch (tolower(ch))
		{
			case 'd': // dest
				if (source)
					rtc = -1;
				q.dest = 1;
				break;
			case 'l': //length
				l = strtoul(descr, &t, 0);
				if (l <= UINT_MAX)
					q.queue_maxlen = (unsigned int)l;
				else
					rtc = -1;
				descr = t;
				break;
			case 'm': // mark
				l = strtoul(descr, &t, 0);
				if (l <= UINT32_MAX)
				{
					q.mark_value = (uint32_t)l; // was htonl in 0.4
					q.mark = 1;
				}
				else
					rtc = -1;
				descr = t;
				break;
			case 'q': // queue
				l = strtoul(descr, &t, 0);
				if (l <= UINT16_MAX)
					q.queue_num = (uint16_t)l;
				else
					rtc = -1;
				descr = t;
				break;
			case 's': // source
				if (q.dest)
					rtc = -1;
				source = 1;
				break;
			default:
				rtc = -1;
				break;
		}
	}
	if (ch) // should be 0-terminated
		rtc = -1;

	if (out)
		*out = q;

	return rtc;
}

/*
Possible verdicts and guessed behavior:

#define NF_DROP 0
the packet is discarded

#define NF_ACCEPT 1
the packet passes, continue iterations

#define NF_STOLEN 2
gone away,

#define NF_QUEUE 3
inject into a different queue
(the queue number is in the high 16 bits of verdict)

#define NF_REPEAT 4
iterate the same cycle once more

#define NF_STOP 5
accept, but don't continue iteration

*/

#define SEND_KERNEL_BUFSIZE 256
static int
send_kernel(struct mnl_socket *nl, struct nlmsghdr *nlh,
	int queue_num, char const *what)
{
	assert(nlh->nlmsg_len < SEND_KERNEL_BUFSIZE);
	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
		report_error(&ap, LOG_CRIT,
			"send_kernel fail on %s, queue %d: %s (%0x)\n",
			what, queue_num, strerror(errno), errno);
		return -1;
	}

	return 0;
}

static int create_queue(struct mnl_socket *nl, int queue_num)
/* Configure the pipeline between kernel and userspace, build and send
 * a netlink message to specify queue number to bind to. Your ruleset
 * has to use this queue number to deliver packets to userspace.
 */
{
	char buf[SEND_KERNEL_BUFSIZE];
	struct nlmsghdr *nlh = nfq_nlmsg_put(buf, NFQNL_MSG_CONFIG, queue_num);
	nfq_nlmsg_cfg_put_cmd(nlh, AF_INET, NFQNL_CFG_CMD_BIND);
	return send_kernel(nl, nlh, queue_num, __func__);
}

static int destroy_queue(struct mnl_socket *nl, int queue_num)
/* Configure the pipeline between kernel and userspace, build and send
 * a netlink message to specify queue number to bind to. Your ruleset
 * has to use this queue number to deliver packets to userspace.
 */
{
	char buf[SEND_KERNEL_BUFSIZE];
	struct nlmsghdr *nlh = nfq_nlmsg_put(buf, NFQNL_MSG_CONFIG, queue_num);
	nfq_nlmsg_cfg_put_cmd(nlh, AF_INET, NFQNL_CFG_CMD_UNBIND);
	return send_kernel(nl, nlh, queue_num, __func__);
}

static int set_queue_copy_size(struct mnl_socket *nl, int queue_num, int range)
/* Build and send a netlink message to specify how many bytes are
 * copied from kernel to userspace for this queue.
 */
{
	char buf[SEND_KERNEL_BUFSIZE];
	memset(buf, 0, sizeof buf);  // why is this uninitialized otherwise?
	struct nlmsghdr *nlh = nfq_nlmsg_put(buf, NFQNL_MSG_CONFIG, queue_num);
	nfq_nlmsg_cfg_put_params(nlh, NFQNL_COPY_PACKET, range);
	return send_kernel(nl, nlh, queue_num, __func__);
}

static int set_queue_maxlen(struct mnl_socket *nl, int queue_num, uint32_t maxlen)
/* Build and send a netlink message to specify how long the queue length
 * can be.
 */
{
	char buf[SEND_KERNEL_BUFSIZE];
	struct nlmsghdr *nlh = nfq_nlmsg_put(buf, NFQNL_MSG_CONFIG, queue_num);
	nfq_nlmsg_cfg_put_qmaxlen(nlh, maxlen);
	return send_kernel(nl, nlh, queue_num, __func__);
}

static int set_queue_flags(struct mnl_socket *nl, int queue_num, uint32_t flags)
/* Build and send a netlink message to add flags
 * Possible flags values are:
 *
 * NFQA_CFG_F_FAIL_OPEN
 * NFQA_CFG_F_CONNTRACK
 * NFQA_CFG_F_GSO      
 * Kernel has aggregated several packets into one single packet via
 * Generic Segmentation Offload (GSO).
 *
 * NFQA_CFG_F_UID_GID  
 * NFQA_CFG_F_SECCTX   
 */
{
	char buf[SEND_KERNEL_BUFSIZE];
	struct nlmsghdr *nlh = nfq_nlmsg_put(buf, NFQNL_MSG_CONFIG, queue_num);

	mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
	mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
	return send_kernel(nl, nlh, queue_num, __func__);
}

static int set_packet_verdict(struct mnl_socket *nl, int queue_num, uint32_t id, int what)
/*
* Set verdict to a packet id.  The what argument determines possible
* behavior:
*  what == 0,  plain accept
*  what == -1, drop
*  what > 0: accept and set mark.
*/
{
	char buf[SEND_KERNEL_BUFSIZE];
	struct nlmsghdr *nlh = nfq_nlmsg_put(buf, NFQNL_MSG_VERDICT, queue_num);
	nfq_nlmsg_verdict_put(nlh, id, what >= 0? NF_ACCEPT: NF_DROP);

	if (what > 0)
	{
		struct nlattr *nest = mnl_attr_nest_start(nlh, NFQA_CT);
		mnl_attr_put_u32(nlh, CTA_MARK, htonl(what));
		mnl_attr_nest_end(nlh, nest);
	}

	return send_kernel(nl, nlh, queue_num, __func__);
}

typedef struct data_cb_t
{
	struct mnl_socket *nl;
	queue_descr *queue_data;
	DB *db, *db6;
	unsigned queue_data_cnt;
	unsigned martian_packets;
	uint32_t deadbeef;
} data_cb_t;


//enum nfqnl_attr_type {
	//NFQA_UNSPEC,
	//NFQA_PACKET_HDR,
	//NFQA_VERDICT_HDR,		/* nfqnl_msg_verdict_hrd */
	//NFQA_MARK,			/* __u32 nfmark */
	//NFQA_TIMESTAMP,			/* nfqnl_msg_packet_timestamp */
	//NFQA_IFINDEX_INDEV,		/* __u32 ifindex */
	//NFQA_IFINDEX_OUTDEV,		/* __u32 ifindex */
	//NFQA_IFINDEX_PHYSINDEV,		/* __u32 ifindex */
	//NFQA_IFINDEX_PHYSOUTDEV,	/* __u32 ifindex */
	//NFQA_HWADDR,			/* nfqnl_msg_packet_hw */
	//NFQA_PAYLOAD,			/* opaque data payload */
	//NFQA_CT,			/* nf_conntrack_netlink.h */
	//NFQA_CT_INFO,			/* enum ip_conntrack_info */
	//NFQA_CAP_LEN,			/* __u32 length of captured packet */
	//NFQA_SKB_INFO,			/* __u32 skb meta information */
	//NFQA_EXP,			/* nf_conntrack_netlink.h */
	//NFQA_UID,			/* __u32 sk uid */
	//NFQA_GID,			/* __u32 sk gid */
	//NFQA_SECCTX,			/* security context string */
	//NFQA_VLAN,			/* nested attribute: packet vlan info */
	//NFQA_L2HDR,			/* full L2 header */

	//__NFQA_MAX
//};

static void
dump(void const *buffer, unsigned length)
{
	unsigned char const *buf = (unsigned char const*)buffer;
	unsigned const char *ebuf = buf + length;

	while (buf < ebuf)
	{
		int top = 16;
		if (buf + 16 >= ebuf)
			top = ebuf - buf;
		for (int i = 0; i < top; ++i)
		{
			if (i == 8) putchar(' ');
			fprintf(stderr, " %02x", buf[i]);
		}
		putc(' ', stderr);
		if (top < 8)
			putc(' ', stderr);
		for (int i = top; i < 16; ++i)
			putc(' ', stderr);
		for (int i = 0; i < top; ++i)
			putc(isprint(buf[i])? buf[i]: '.', stderr);
		putc('\n', stderr);
		buf += top;
	}
}

static const char*dump_buf(char *out, char const *buf, uint32_t size)
/*
* size is the size of buffer.  out must be 2*size + 1.
* Return out.
*/
{
	static char const hex[] = "0123456789abcdef";
	char *p = out;
	char const *s = buf;
	while (size > 0)
	{
		int ch = *(unsigned char const*)s++;
		*p++ = hex[ch >> 4];
		*p++ = hex[ch & 0xf];
		--size;
	}
	*p = 0;
	return out;
}

static inline const char*dump_key(DBT const *key, char *out)
/*
* Do hex.  out must be at least 2*key->size + 1.
*/
{
	return dump_buf(out, key->data, key->size);
}

static int queue_cb(const struct nlmsghdr *nlh, void *data)
{
	data_cb_t *data_cb = data;
	struct nlattr *attr[NFQA_MAX+4] = {};

	assert(data_cb);
	assert(data_cb->deadbeef == 0xdeadbeef);

	/* Parse netlink message received from the kernel, the array of
	 * attributes is set up to store metadata and the actual packet.
	 */
	if (nfq_nlmsg_parse(nlh, attr) < 0)
	{
		report_error(&ap, LOG_CRIT, "problems parsing");
		return MNL_CB_ERROR;
	}

	if (attr[NFQA_PACKET_HDR] == NULL)
	{
		report_error(&ap, LOG_CRIT, "metaheader not set");
		return MNL_CB_ERROR;
	}

#if 0
	char attr_flag[__NFQA_MAX];
	for (int i = 0; i < __NFQA_MAX; ++i)
		attr_flag[i] = attr[i]? '*': '_';
	TRACE(4, "%s ", attr_flag);
#endif

	int verdict = 0;  // 0 = accept, -1 or mark
	struct nfgenmsg *nfg = mnl_nlmsg_get_payload(nlh);
	uint16_t queue_num = ntohs(nfg->res_id);
	queue_descr *qu = NULL;
	for (unsigned i = 0; i < data_cb->queue_data_cnt; ++i)
	{
		qu = &data_cb->queue_data[i];
		if (qu->queue_num == queue_num)
			break;
	}
	if (qu == NULL || qu->queue_num != queue_num)
	{
		report_error(&ap, LOG_CRIT,
			"Received data for non-existent queue %d", queue_num);
		return MNL_CB_ERROR;
	}

	/* Access packet metadata, which provides unique packet ID, hook number
	 * and ethertype. See struct nfqnl_msg_packet_hdr for details.
	 */
	struct nfqnl_msg_packet_hdr *ph = mnl_attr_get_payload(attr[NFQA_PACKET_HDR]);

	/* Access actual packet data length. */
	uint16_t plen = attr[NFQA_PAYLOAD]?
		mnl_attr_get_payload_len(attr[NFQA_PAYLOAD]): 0;
	uint32_t id = ntohl(ph->packet_id);

	time_t now;
	if (attr[NFQA_TIMESTAMP])
	{
		struct nfqnl_msg_packet_timestamp* ts = mnl_attr_get_payload(attr[NFQA_TIMESTAMP]);
		struct timeval tv;
		tv.tv_sec = be64toh(ts->sec);
		tv.tv_usec = be64toh(ts->usec);
		now = tv.tv_sec + (tv.tv_usec > 500000);
	}
	else
		now = time(NULL);

#if 0
	uint32_t old_mark = 0;
	if (attr[NFQA_MARK])
	{
		uint32_t *p = mnl_attr_get_payload(attr[NFQA_MARK]);
		old_mark = ntohl(*p);
	}
#endif

	/* Access actual packet data */
	char *payload = attr[NFQA_PAYLOAD]?
		mnl_attr_get_payload(attr[NFQA_PAYLOAD]): NULL;
	if (payload)
	{
		int const af = (payload[0] & 0xf0) >> 4;
		char source[INET6_ADDRSTRLEN], dest[INET6_ADDRSTRLEN];
		char const *ip_addr_str = "???"; // should never appear
		if (ap.mode == 0 && verbose)
		{
			char const *s, *d;
			if (af == 4)
			{
				s = inet_ntop(AF_INET, payload + 12, source, sizeof source);
				d = inet_ntop(AF_INET, payload + 16, dest, sizeof dest);
			}
			else if (af == 6)
			{
				s = inet_ntop(AF_INET6, payload + 8, source, sizeof source);
				d = inet_ntop(AF_INET6, payload + 24, dest, sizeof dest);
			}
			else // martians exist?
				s = d = "unknown";

			if (verbose >= 3)
			{
				fprintf(stderr,
					"RECV in queue %d, af=%d, %s %d, len %d, id %d: %s -> %s\n",
					queue_num, af,
					ph? "hook": "_!*!BAD!*!_", ph? ph->hook: -1,
					plen, id, s, d);
				if (verbose >= 5)
					dump(payload, plen);
			}

			ip_addr_str = qu->dest? d: s;
		}

		DBT key, data;
		DB *db = NULL;
		DBC *dbc6 = NULL;
		ip_data_t ip_data;

		memset(&key, 0, sizeof key);
		memset(&data, 0, sizeof data);
		memset(&ip_data, 0, sizeof ip_data);
		data.ulen = data.size = sizeof ip_data;
		data.data = &ip_data;
		key.flags = data.flags = DB_DBT_USERMEM;

		int rc = -1; // return code;
		int found = 0;
		int need_write = 0;

		if (af == 4) // IPv4
		{
			db = data_cb->db;
			key.data = payload + (qu->dest? 16: 12);
			key.ulen = key.size = 4;

			rc = db->get(db, NULL, &key, &data, 0);
			if (rc == 0 &&
				data.size == sizeof ip_data &&
				ip_data.chk == IPQBDB_CHK_SIGNATURE)
					found = 1;
		}
		else if (af == 6) // IPv6
		/*
		* Although this is the only write cursor, we must open and
		* close it every time.  Any attempt to create a second write 
		* cursor or to perform a non-cursor write operation while a 
		* write cursor is open will block until that write cursor is 
		* closed.
		*/
		{
			char *address = payload + (qu->dest? 24: 8);

			static const unsigned char ten_zeroes[10] = {0};
			if (memcmp(address, ten_zeroes, 10) == 0)
			/*
			* IPv4 mapped addresses are stored as IPv4.
			*/
			{
				db = data_cb->db;
				key.data = &address[12];
				key.ulen = key.size = 4;

				rc = db->get(db, NULL, &key, &data, 0);
				if (rc == 0 &&
					data.size == sizeof ip_data &&
					ip_data.chk == IPQBDB_CHK_SIGNATURE)
						found = 1;
			}
			else
			{
				DB *db6 = data_cb->db6;
				rc = db6->cursor(db6, NULL, &dbc6, DB_WRITECURSOR);
				if (rc == 0)
				{
					unsigned char save[16];
					memcpy(save, address, 16);

					key.data = address;
					key.ulen = key.size = 16;
					rc = dbc6->get(dbc6, &key, &data,  DB_SET_RANGE);
					if (rc == 0)
					{
						if (in_range_ip6(save, key.data, ip_data.plen))
							found = 1;
						else
							rc = DB_NOTFOUND;
					}
				}
			}
		}
		else
		{
			if (data_cb->martian_packets < 100)
			{
				char buf[33];
				report_error(&ap, LOG_CRIT,
					"%s martian packet on queue %d: %s\n",
					data_cb->martian_packets < 99?
						"Received": "Last message on",
					queue_num, dump_buf(buf, payload, 16));
				data_cb->martian_packets += 1;
			}

			rc = DB_NOTFOUND;
		}

		if (found)
		{
			if (ip_data.probability <= 0)
			{
				TRACE(2, "ACCEPT %s: probability=%d\n",
					ip_addr_str, ip_data.probability);
			}
			else if ((ip_data.last_block + IPQBDB_RECENT_DROP_TIME >= now &&
				ip_data.rep_ruling < IPQBDB_REPETITION_MAX) ||
				ip_data.decay >= IPQBDB_NEVER_DECAY) // static block
			{
				verdict = -1;
				ip_data.last_block = now;
				ip_data.block_cnt += 1;
				ip_data.rep_ruling += 1;
				need_write = 1;
				TRACE(2, "BLOCK  %s/%d %s\n", ip_addr_str,
					af == 4? 32: ip_data.plen,
					ip_data.decay >= IPQBDB_NEVER_DECAY? "forever": "again");
			}
			else
			{
				// rehabilitate
				time_t delta = now - ip_data.last_update;
				if (IPQBDB_UPDATE_TICK_TIME < delta && ip_data.decay > 0.0)
				{
					unsigned int const prob =
						rehabilitated_prob(ip_data.probability,
							delta, ip_data.decay);
					if (prob < (unsigned) ip_data.probability)
					{
						ip_data.probability = prob;
						ip_data.last_update = now;
						/*
						* Was setting need_write = 1 in version 1.x,
						* but rehabilitation works the same if it is
						* upgraded in steps or all in one (up to
						* rounding.)  Omitting this write implies
						* write_block() must also rehabilitate.
						*/
					}
				}

				// rule
				int const toss = rand();
				if (toss < ip_data.probability)
				{
					verdict = -1;
					ip_data.last_block = now;
					ip_data.block_cnt += 1;
					ip_data.rep_ruling = 0;
					need_write = 1;
				}
				// else possibly allow rep_ruling to stay above max

				TRACE(2, "%s %s/%d: probability=%.2f%%: toss=%.2f, delta=%ld, reason=%d\n",
					verdict < 0? "BLOCK ": "ACCEPT",
					ip_addr_str, af == 4? 32: ip_data.plen,
					percent_prob(ip_data.probability),
					percent_prob(toss),
					(long)delta, ip_data.reason_id);
			}

			/*
			* update the record (re-insert it if it was deleted meanwhile)
			*/
			if (need_write)
			{
				if (dbc6)
					rc = dbc6->put(dbc6, &key, &data,  DB_CURRENT);
				else if (db)
					rc = db->put(db, NULL, &key, &data, 0);
				if (rc)
				{
					char key_dump[34];
					report_error(&ap, LOG_CRIT,
						"cannot write record for %s: %s\n",
							dump_key(&key, key_dump), db_strerror(rc));
				}
			}
		}
		else if (rc != DB_NOTFOUND)
		{
			char key_dump[34];
			report_error(&ap, LOG_CRIT, "cannot read key %s: %s\n",
				dump_key(&key, key_dump),
				rc? db_strerror(rc): "bad data record");
		}
		else
		{
			TRACE(2, "ACCEPT %s: not in db\n",
				ip_addr_str);
		}

		if (verdict >= 0 || qu->mark == 0)
		{
			TRACE(1, "%s %s\n",
				verdict < 0? "BLOCK ": "ACCEPT", ip_addr_str);
		}
		else
		{
			verdict = qu->mark_value;
			TRACE(1, "MARK %#x %s\n", verdict, ip_addr_str);
		}

		if (dbc6)
			dbc6->close(dbc6);
	}
	else // no payload
		TRACE(3, "RECV in queue %d, %s %d, len %d, NO PAYLOAD\n",
			qu->queue_num, ph? "hook": "_!*!BAD!*!_", ph? ph->hook: -1,
			plen);

	if (set_packet_verdict(data_cb->nl, queue_num, id, verdict))
		return MNL_CB_ERROR;

	return MNL_CB_OK;
}

static queue_descr default_queue[1], *queue_data = default_queue;
static size_t queue_data_cnt = 1;

static int
daemon_loop(struct mnl_socket *nl, DB *db, DB *db6)
{
	char buf[8192];

#if 0
// Leftover from version 1.  Can this be configured with libmnl?
	ipaddr_cnt cb_cnt;
	memset(&cb_cnt, 0, sizeof cb_cnt);


	if (verbose)
	{
		int bufsize = get_buffer_size(fd);
		if (bufsize >= 0)
			report_error(&ap, LOG_INFO, "netlink socket recv size %0x\n",
				bufsize);
	}

	if (netlink_bufsize)
	{
		socklen_t bufsize = netlink_bufsize;
		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof bufsize))
			report_error(&ap, LOG_CRIT,
				"setsockopt fail for fd %d size = %0x (%s)\n",
					fd, netlink_bufsize, strerror(errno));
		else if (verbose)
		{
			int newsize = get_buffer_size(fd);
			if (newsize >= 0)
				report_error(&ap, LOG_INFO,
					"netlink socket recv size set to %0x, now %0x\n",
					netlink_bufsize, newsize);
		}
	}
#endif

	if (netlink_no_enobufs)
	{
		int on = 1;
		if (mnl_socket_setsockopt(nl, NETLINK_NO_ENOBUFS, &on, sizeof on) != 0)
		{
			report_error(&ap, LOG_ERR, "cannot suppress ENOBUFS: %s\n",
				strerror(errno));
			// ignore the error
		}
	}

	for (size_t i = 0; i < queue_data_cnt; ++i)
	{
		queue_descr *const qu = &queue_data[i];

		if (create_queue(nl, qu->queue_num))
			return 1;

		// 40 bytes should be enough to get both IPv6 addresses
		if (set_queue_copy_size(nl, qu->queue_num, 40))
			return 1;

		// Receive one large GSO packet instead of many original
		// packets of MTU size
		if (set_queue_flags(nl, qu->queue_num, NFQA_CFG_F_GSO))
			return 1;

		if (qu->queue_maxlen &&
			set_queue_maxlen(nl, qu->queue_num, qu->queue_maxlen))
				return 1;
	}

	unsigned int portid = mnl_socket_get_portid(nl);
	data_cb_t data_cb;
	memset(&data_cb, 0, sizeof data_cb);
	data_cb.nl = nl;
	data_cb.queue_data = queue_data;
	data_cb.db = db;
	data_cb.db6 = db6;
	data_cb.queue_data_cnt = queue_data_cnt;
	data_cb.deadbeef = 0xdeadbeef;

	while (caught_signal == 0)
	{
		int recv = mnl_socket_recvfrom(nl, buf, sizeof buf);
		if (recv == -1)
		{
			if (errno == ENOBUFS)
			{
				report_error(&ap, LOG_ERR, "packet(s) lost\n");
				continue;
			}

			if (errno == ENOSPC)
				report_error(&ap, LOG_ERR, "packet truncated\n");
			else
			{
				report_error(&ap, LOG_ERR,
					"mnl_socket_recvfrom failed: %s (%0x)\n",
					strerror(errno), errno);
				break;
			}
		}

		int rtc = mnl_cb_run(buf, recv, 0, portid, queue_cb, &data_cb);
		/*
		* Your callback may return three possible values:
		* - MNL_CB_ERROR (<=-1): an error has occurred. Stop callback runqueue.
		* - MNL_CB_STOP (=0): stop callback runqueue.
		* - MNL_CB_OK (>=1): no problems has occurred.
		*/
		if (rtc <= 0)
			break;
	}

	int rv;
	/*
	* rv < 0 on EINTR or other error; EINTR implies a signal
	*/
	if (caught_signal)
	{
		if (caught_signal == SIGHUP)
		{
			report_error(&ap, LOG_INFO, "reopening on SIGHUP\n");
			caught_signal = 0;
			rv = -1; // enter daemon_loop again
		}
		else
		{
			report_error(&ap, LOG_INFO, "exiting on signal %s\n",
				strsignal(caught_signal));
			rv = 0;  // program exit code
		}
	}
	else
	{
		// Logging should be done by failing callback.
		rv = -1; // enter daemon_loop again, hopefully the error is transient
	}

	for (unsigned i = 0; i < queue_data_cnt; ++i)
	{
		queue_descr *const qu = &queue_data[i];
		destroy_queue(nl, qu->queue_num);
	}

	return rv;
}

static char *db_name = IPQBDB_DATABASE_NAME;
static int version_opt, help_opt, no_daemon_opt, no_cleanup_opt;
static struct poptOption opttab[] =
{
	{"db-block", 'b', POPT_ARG_STRING|POPT_ARGFLAG_SHOW_DEFAULT, &db_name, 0,
	"The database where IPv4 addresses are looked up, and stem for IPv6", "filename"},
	{"no-daemon", 'D', POPT_ARG_NONE, &no_daemon_opt, 0,
	"Stay foreground and use stderr", NULL},
	{"verbose", 'v', POPT_ARG_INT|POPT_ARGFLAG_OPTIONAL, &verbose, 0,
	"Be verbose", "level"},
	{"no-db-cleanup", '\0', POPT_ARG_NONE, &no_cleanup_opt, 0,
	"On exit don't cleanup environment (__db.00? files) if not still busy", NULL},
	{"netlink-bufsize", '\0', POPT_ARG_INT, &netlink_bufsize, 0,
	"Set nfnetlink buffer size", "bytes"},
	{"netlink-no-enobufs", '\0', POPT_ARG_NONE, &netlink_no_enobufs, 0,
	"Suppress error notifications for dropped packets", NULL},
	{"version", 'V', POPT_ARG_NONE, &version_opt, 0,
	"Print version number and exit", NULL},
	{"help", 'h', POPT_ARG_NONE, &help_opt, 0,
	"This help.", NULL},
	POPT_TABLEEND
};

int main(int argc, char const *argv[])
{
	static const char optaliases[] = IPQBDB_OPTION_FILE;
	int rtc = 0, errs = 0;

	poptContext opt = poptGetContext(err_prefix, argc, argv, opttab, 0);

	if (access(optaliases, F_OK) == 0 &&
		(rtc = poptReadConfigFile(opt, optaliases)) < 0)
	{
		fprintf(stderr, "%s: cannot read %s: %s\n",
			err_prefix, optaliases, poptStrerror(rtc));
		errs = 3;
	}

	rtc = poptGetNextOpt(opt);
	if (rtc != -1)
	{
		fprintf(stderr, "%s: %s\n",
			err_prefix, poptStrerror(rtc));
		errs = 1;
	}
	else
	{
		char const **q_argv = poptGetArgs(opt);
		size_t q_argc = 0;
		
		if (q_argv)
			while (q_argv[q_argc] != NULL)
			{
				if (parse_queue_descr(q_argv[q_argc], NULL))
				{
					fprintf(stderr, "%s: invalid queue description: %s\n",
						err_prefix, q_argv[q_argc]);
					errs = 1;
				}
				++q_argc;
			}

		if (version_opt)
		{
			fprintf(stdout, "%s: version " PACKAGE_VERSION "\n", err_prefix);
			errs = 2;
		}

		// popt sets verbose to 0 if no arg is given
		// otherwise it stays -1
		verbose += 1;

		if (help_opt)
		{
			// popt bug: cannot access opt->otherHelp
			printf("Usage: %s [OPTION...] [QUEUEARG...]\n", err_prefix);
			poptPrintHelp(opt, stdout, 0);
			fputs_database_help();
			fputs("\n"
"Queue arguments can be specified as Qn[Mn][S|D][Ln], with no blanks, where\n"
"QMSDL are (case insensitive) specifiers, and n are suitable decimal integers.\n"
"Only one of S, for source, or D, for destination address, must be specified;\n"
"S is assumed by default. When the selected address is found to deserve being\n"
"blocked, if M, for mark, is specified, the packet is marked and accepted, else\n"
"it is dropped. L specifies the configured max length of the queue, in packets.\n"
"\n"
"For example, \"Q2M16DL10240\" will set queue_maxlen to 10240 for queue #2, and\n"
"then check the destination addresses of those packets, marking guilty packets\n"
"with the value 16, which can be tested with -m mark --mark 16 on a subsequent\n"
"iptables table.\n"
"\n"
"Zero or more queue argumens can be specified. \"Q0S\" is assumed by default.\n",
				stdout);
			errs = 2;
		}

		if (errs == 0 && q_argc > 0)
		{
			queue_descr *queue =
				(queue_descr*)malloc(q_argc * sizeof(queue_descr));
			if (queue == NULL)
				errs = 3;
			else
			{
				for (size_t i = 0; i < q_argc; ++i)
					parse_queue_descr(q_argv[i], &queue[i]);

				// check duplicates
				for (size_t i = 0; i < q_argc; ++i)
					for (size_t j = 0; j < i; ++j)
						if (queue[j].queue_num == queue[i].queue_num)
						{
							fprintf(stderr,
								"%s: queues %s [%zd] and %s [%zd]"
								" have the same queue number %d\n",
								err_prefix, q_argv[j], j, q_argv[i], i, queue[i].queue_num);
							errs = 1;
						}
			}
			
			if (errs == 1)
			{
				free(queue);
				fputs("queue arg: Q<queue-num>[M<mark-num>][S|D]\n", stderr);
				errs = 3;
			}
			else
			{
				queue_data = queue;
				queue_data_cnt = q_argc;
			}
		}
	}

	if (errs == 1)
		poptPrintUsage(opt, stderr, 0);
	poptFreeContext(opt);
	rtc = 0;

	if (errs)
	{
		rtc = 1;
	}
	else
	{
		ap.mode = error_report_stderr; // 0
		ap.err_prefix = err_prefix;

		switchable_fname *fname = database_fname(db_name, &ap);
		if (fname == NULL)
			rtc = 1;
		else
		{
			char const *what = NULL;
			if (no_daemon_opt == 0)
			{
				char *p = strrchr(fname->fname, '/');
				if (what == NULL && daemon(p != NULL /* nochdir */, 0))
					what = "daemon";
				if (what == NULL && setsigs())
					what = "sigaction";
				if (p)
				{
					*p = 0;
					if (chdir(fname->fname))
						what = "chdir";
					*p = '/';
				}
				openlog(err_prefix, LOG_PID, LOG_DAEMON);
				ap.mode = LOG_DAEMON;
			}
			else if (setsigs())
				what = "sigaction";

			if (what)
			{
				report_error(&ap, LOG_CRIT, "cannot %s: %s - exiting\n",
					what, strerror(errno));
				rtc = 1;
			}
		}

		while (rtc == 0)
		{
			DB_ENV *db_env = NULL;
			DB *db = NULL, *db6 = NULL;
			TRACE(3, "Open database %s\n", fname->fname);
			rtc = open_database(fname, &ap, &db_env, &db, &db6);

			if (rtc == 0)
			{
				rtc = 2;

				// printf("opening library handle\n");
				struct mnl_socket *nl = mnl_socket_open(NETLINK_NETFILTER);
				if (!nl)
				{
					report_error(&ap, LOG_CRIT, "error during mnl_socket_open()\n");
				}
				else
				{
					if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0)
					{
						report_error(&ap, LOG_CRIT, "error during mnl_socket_bind()\n");
					}
					else
					{
						rtc = daemon_loop(nl, db, db6);
						TRACE(2, "Daemon loop exited with rtc=%d\n", rtc);
					}

					mnl_socket_close(nl);
				}


				close_db(db);
				close_db(db6);
				close_dbenv(db_env, !no_cleanup_opt);
				TRACE(3, "Database closed\n");
				if (rtc >= 0)
				{
					free(fname);
					break;
				}

				/*
				* when daemon_loop returns a negative number,
				* enter the loop again.
				*/
				TRACE(2, "Restarting loop in 2 secs...\n");
				rtc = 0;
				sleep(2);
			}
		}
	}

	if (queue_data != default_queue)
		free(queue_data);

	TRACE(2, "Exit with status %d\n", rtc);
	return rtc;
}
