Marco Davids suggested on Twitter that I should try using mdig for bulk lookups. I have previously used adns for this (it can handily manage 20kqps single threaded), and I have briefly tried out zdns (which isn't as efficient). I tried feeding it the Alexa top 1 million list: $ sed '1d;s|^[0-9]*,||;s|/.*||' top-1m.csv | time mdig +noall +answer -f - A @::1 and it failed: mdig: dns_request_createvia4 failed with quota reached Looking at the code it tries to send all the queries at once, which is obviously not going to work for big jobs! (In fact mdig sets a dispatch quota of 100 which restricts the maximum job size.) I hacked around a bit to try adding a concurrency limit, along the lines I have previously done for adns - see the patch below. This seems to work reasonably OK, e.g. $ sed '1d;s|^[0-9]*,||;s|/.*||' ~/work/dns/adns/top-1m.csv | head -100000 | time ./mdig +continue +noall -C 1000 -f - A @::1 9.74user 0.95system 0:14.41elapsed 74%CPU A couple of problems, though: mdig sometimes goes into a spin if I send it a SIGINT and/or fails with the seemingly spurious error "dns_name_fromtext failed with label too long". Dunno why - signal handling in BIND is still mysterious to me :-) It slurps in the entire query list before starting to talk to the DNS server, which can use a lot of RAM. (about 1kb per query) I suspect there may be a better way to do this concurrency limit stuff, making more effective use of the dispatch quota - at the moment the code is enforcing the concurrency limit in two places, in mdig and in the dns library. If I give mdig more than a few hundred thousand queries it seems to hang at the end of the job. On further investigation, there is something accidentally quadratic in the cleanup code! 100k 0:11.45 200k 0:24.01 300k 0:40.44 400k 1:01.51 500k 1:27.97 If I add an _exit() after isc_app_run() returns, before the query list is free()d, I get: 100k 0:10.23 200k 0:15.87 300k 0:20.08 400k 0:26.08 500k 0:29.89 1M 0:59.96 (adns can resolve this list in about 46s) If I _exit() just after the query list is free()d, the times are similar to the first list. Looks like something slooow in the memory manager. --- bin/tools/mdig.c | 44 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/bin/tools/mdig.c b/bin/tools/mdig.c index eaf477c..034bf91 100644 --- a/bin/tools/mdig.c +++ b/bin/tools/mdig.c @@ -51,6 +51,12 @@ #include +#if 0 +#define TRACE(...) printf(__VA_ARGS__) +#else +#define TRACE(...) +#endif + #define CHECK(str, x) { \ if ((x) != ISC_R_SUCCESS) { \ fprintf(stderr, "mdig: %s failed with %s\n", \ @@ -110,6 +116,7 @@ static in_port_t port = 53; static isc_dscp_t dscp = -1; static unsigned char cookie_secret[33]; static int onfly = 0; +static int concurrency = 100; static char hexcookie[81]; struct query { @@ -140,6 +147,9 @@ struct query { }; static struct query default_query; static ISC_LIST(struct query) queries; +static struct query *pending; + +static void sendqueries(isc_task_t *task, isc_event_t *event); #define EDNSOPTS 100U /*% opcode text */ @@ -477,8 +487,18 @@ cleanup: dns_request_destroy(&reqev->request); isc_event_free(&event); - if (--onfly == 0) + --onfly; + if (pending != NULL && (onfly < concurrency * 9/10 || onfly < 10)) { + /* XXX is this the right event type? copied from app.c ... */ + event = isc_event_allocate(mctx, task, ISC_APPEVENT_SHUTDOWN, + sendqueries, pending, + sizeof(*event)); + if (event == NULL) + CHECK("isc_event_allocate", ISC_R_NOMEMORY); + isc_task_send(task, &event); + } else if (onfly == 0) isc_app_shutdown(); + return; } @@ -695,19 +715,23 @@ sendquery(struct query *query, isc_task_t *task) static void sendqueries(isc_task_t *task, isc_event_t *event) { - struct query *query = (struct query *)event->ev_arg; + struct query *query = pending; isc_event_free(&event); - while (query != NULL) { + while (query != NULL && onfly < concurrency) { struct query *next = ISC_LIST_NEXT(query, link); sendquery(query, task); query = next; } + TRACE("onfly = %d pending = %d\n", onfly, query != NULL); - if (onfly == 0) + if (query == NULL && onfly == 0) isc_app_shutdown(); + else + pending = query; + return; } @@ -740,6 +764,7 @@ help(void) { " global opt is one of:\n" " -4 (use IPv4 query transport only)\n" " -6 (use IPv6 query transport only)\n" +" -C count (concurrency limit, default 100)\n" " -b address[#port] (bind to source address/port)\n" " -p port (specify port number)\n" " +[no]dscp[=###] (Set the DSCP value to ### [0..63])\n" @@ -1624,6 +1649,11 @@ dash_option(const char *option, char *next, struct query *query, *hash = '#'; have_src = ISC_TRUE; return (value_from_next); + case 'C': + concurrency = atoi(value); + if (concurrency < 1) + fatal("concurrency must be greater than zero"); + return (value_from_next); case 'c': tr.base = value; tr.length = strlen(value); @@ -1931,7 +1961,7 @@ isc_mem_debugging = ISC_MEM_DEBUGRECORD; dispatchvx = NULL; RUNCHECK(dns_dispatch_getudp(dispatchmgr, socketmgr, taskmgr, have_src ? &srcaddr : &bind_any, - 4096, 100, 100, 17, 19, + 4096, concurrency, concurrency, 17, 19, attrs, attrmask, &dispatchvx)); requestmgr = NULL; RUNCHECK(dns_requestmgr_create(mctx, timermgr, socketmgr, @@ -1943,8 +1973,8 @@ isc_mem_debugging = ISC_MEM_DEBUGRECORD; view = NULL; RUNCHECK(dns_view_create(mctx, 0, "_test", &view)); - query = ISC_LIST_HEAD(queries); - RUNCHECK(isc_app_onrun(mctx, task, sendqueries, query)); + pending = ISC_LIST_HEAD(queries); + RUNCHECK(isc_app_onrun(mctx, task, sendqueries, pending)); (void)isc_app_run(); -- 2.10.1.445.g3cdd5d1 _______________________________________________ bind-workers mailing list bind-workers@lists.isc.org https://lists.isc.org/mailman/listinfo/bind-workers