master
  1#include <sys/socket.h>
  2#include <netinet/in.h>
  3#include <netinet/tcp.h>
  4#include <netdb.h>
  5#include <arpa/inet.h>
  6#include <stdint.h>
  7#include <string.h>
  8#include <poll.h>
  9#include <time.h>
 10#include <ctype.h>
 11#include <unistd.h>
 12#include <errno.h>
 13#include <pthread.h>
 14#include "stdio_impl.h"
 15#include "syscall.h"
 16#include "lookup.h"
 17
 18static void cleanup(void *p)
 19{
 20	struct pollfd *pfd = p;
 21	for (int i=0; pfd[i].fd >= -1; i++)
 22		if (pfd[i].fd >= 0) __syscall(SYS_close, pfd[i].fd);
 23}
 24
 25static unsigned long mtime()
 26{
 27	struct timespec ts;
 28	if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0 && errno == ENOSYS)
 29		clock_gettime(CLOCK_REALTIME, &ts);
 30	return (unsigned long)ts.tv_sec * 1000
 31		+ ts.tv_nsec / 1000000;
 32}
 33
 34static int start_tcp(struct pollfd *pfd, int family, const void *sa, socklen_t sl, const unsigned char *q, int ql)
 35{
 36	struct msghdr mh = {
 37		.msg_name = (void *)sa,
 38		.msg_namelen = sl,
 39		.msg_iovlen = 2,
 40		.msg_iov = (struct iovec [2]){
 41			{ .iov_base = (uint8_t[]){ ql>>8, ql }, .iov_len = 2 },
 42			{ .iov_base = (void *)q, .iov_len = ql } }
 43	};
 44	int r;
 45	int fd = socket(family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
 46	pfd->fd = fd;
 47	pfd->events = POLLOUT;
 48	if (!setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT,
 49	    &(int){1}, sizeof(int))) {
 50		r = sendmsg(fd, &mh, MSG_FASTOPEN|MSG_NOSIGNAL);
 51		if (r == ql+2) pfd->events = POLLIN;
 52		if (r >= 0) return r;
 53		if (errno == EINPROGRESS) return 0;
 54	}
 55	r = connect(fd, sa, sl);
 56	if (!r || errno == EINPROGRESS) return 0;
 57	close(fd);
 58	pfd->fd = -1;
 59	return -1;
 60}
 61
 62static void step_mh(struct msghdr *mh, size_t n)
 63{
 64	/* Adjust iovec in msghdr to skip first n bytes. */
 65	while (mh->msg_iovlen && n >= mh->msg_iov->iov_len) {
 66		n -= mh->msg_iov->iov_len;
 67		mh->msg_iov++;
 68		mh->msg_iovlen--;
 69	}
 70	if (!mh->msg_iovlen) return;
 71	mh->msg_iov->iov_base = (char *)mh->msg_iov->iov_base + n;
 72	mh->msg_iov->iov_len -= n;
 73}
 74
 75/* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries
 76 * must be sufficiently small to be safe as VLA size. In practice it's
 77 * either 1 or 2, anyway. */
 78
 79int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 80	const int *qlens, unsigned char *const *answers, int *alens, int asize,
 81	const struct resolvconf *conf)
 82{
 83	int fd;
 84	int timeout, attempts, retry_interval, servfail_retry;
 85	union {
 86		struct sockaddr_in sin;
 87		struct sockaddr_in6 sin6;
 88	} sa = {0}, ns[MAXNS] = {{0}};
 89	socklen_t sl = sizeof sa.sin;
 90	int nns = 0;
 91	int family = AF_INET;
 92	int rlen;
 93	int next;
 94	int i, j;
 95	int cs;
 96	struct pollfd pfd[nqueries+2];
 97	int qpos[nqueries], apos[nqueries];
 98	unsigned char alen_buf[nqueries][2];
 99	int r;
100	unsigned long t0, t1, t2;
101
102	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
103
104	timeout = 1000*conf->timeout;
105	attempts = conf->attempts;
106
107	for (nns=0; nns<conf->nns; nns++) {
108		const struct address *iplit = &conf->ns[nns];
109		if (iplit->family == AF_INET) {
110			memcpy(&ns[nns].sin.sin_addr, iplit->addr, 4);
111			ns[nns].sin.sin_port = htons(53);
112			ns[nns].sin.sin_family = AF_INET;
113		} else {
114			sl = sizeof sa.sin6;
115			memcpy(&ns[nns].sin6.sin6_addr, iplit->addr, 16);
116			ns[nns].sin6.sin6_port = htons(53);
117			ns[nns].sin6.sin6_scope_id = iplit->scopeid;
118			ns[nns].sin6.sin6_family = family = AF_INET6;
119		}
120	}
121
122	/* Get local address and open/bind a socket */
123	fd = socket(family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
124
125	/* Handle case where system lacks IPv6 support */
126	if (fd < 0 && family == AF_INET6 && errno == EAFNOSUPPORT) {
127		for (i=0; i<nns && conf->ns[nns].family == AF_INET6; i++);
128		if (i==nns) {
129			pthread_setcancelstate(cs, 0);
130			return -1;
131		}
132		fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
133		family = AF_INET;
134		sl = sizeof sa.sin;
135	}
136
137	/* Convert any IPv4 addresses in a mixed environment to v4-mapped */
138	if (fd >= 0 && family == AF_INET6) {
139		setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &(int){0}, sizeof 0);
140		for (i=0; i<nns; i++) {
141			if (ns[i].sin.sin_family != AF_INET) continue;
142			memcpy(ns[i].sin6.sin6_addr.s6_addr+12,
143				&ns[i].sin.sin_addr, 4);
144			memcpy(ns[i].sin6.sin6_addr.s6_addr,
145				"\0\0\0\0\0\0\0\0\0\0\xff\xff", 12);
146			ns[i].sin6.sin6_family = AF_INET6;
147			ns[i].sin6.sin6_flowinfo = 0;
148			ns[i].sin6.sin6_scope_id = 0;
149		}
150	}
151
152	sa.sin.sin_family = family;
153	if (fd < 0 || bind(fd, (void *)&sa, sl) < 0) {
154		if (fd >= 0) close(fd);
155		pthread_setcancelstate(cs, 0);
156		return -1;
157	}
158
159	/* Past this point, there are no errors. Each individual query will
160	 * yield either no reply (indicated by zero length) or an answer
161	 * packet which is up to the caller to interpret. */
162
163	for (i=0; i<nqueries; i++) pfd[i].fd = -1;
164	pfd[nqueries].fd = fd;
165	pfd[nqueries].events = POLLIN;
166	pfd[nqueries+1].fd = -2;
167
168	pthread_cleanup_push(cleanup, pfd);
169	pthread_setcancelstate(cs, 0);
170
171	memset(alens, 0, sizeof *alens * nqueries);
172
173	retry_interval = timeout / attempts;
174	next = 0;
175	t0 = t2 = mtime();
176	t1 = t2 - retry_interval;
177
178	for (; t2-t0 < timeout; t2=mtime()) {
179		/* This is the loop exit condition: that all queries
180		 * have an accepted answer. */
181		for (i=0; i<nqueries && alens[i]>0; i++);
182		if (i==nqueries) break;
183
184		if (t2-t1 >= retry_interval) {
185			/* Query all configured namservers in parallel */
186			for (i=0; i<nqueries; i++)
187				if (!alens[i])
188					for (j=0; j<nns; j++)
189						sendto(fd, queries[i],
190							qlens[i], MSG_NOSIGNAL,
191							(void *)&ns[j], sl);
192			t1 = t2;
193			servfail_retry = 2 * nqueries;
194		}
195
196		/* Wait for a response, or until time to retry */
197		if (poll(pfd, nqueries+1, t1+retry_interval-t2) <= 0) continue;
198
199		while (next < nqueries) {
200			struct msghdr mh = {
201				.msg_name = (void *)&sa,
202				.msg_namelen = sl,
203				.msg_iovlen = 1,
204				.msg_iov = (struct iovec []){
205					{ .iov_base = (void *)answers[next],
206					  .iov_len = asize }
207				}
208			};
209			rlen = recvmsg(fd, &mh, 0);
210			if (rlen < 0) break;
211
212			/* Ignore non-identifiable packets */
213			if (rlen < 4) continue;
214
215			/* Ignore replies from addresses we didn't send to */
216			for (j=0; j<nns && memcmp(ns+j, &sa, sl); j++);
217			if (j==nns) continue;
218
219			/* Find which query this answer goes with, if any */
220			for (i=next; i<nqueries && (
221				answers[next][0] != queries[i][0] ||
222				answers[next][1] != queries[i][1] ); i++);
223			if (i==nqueries) continue;
224			if (alens[i]) continue;
225
226			/* Only accept positive or negative responses;
227			 * retry immediately on server failure, and ignore
228			 * all other codes such as refusal. */
229			switch (answers[next][3] & 15) {
230			case 0:
231			case 3:
232				break;
233			case 2:
234				if (servfail_retry && servfail_retry--)
235					sendto(fd, queries[i],
236						qlens[i], MSG_NOSIGNAL,
237						(void *)&ns[j], sl);
238			default:
239				continue;
240			}
241
242			/* Store answer in the right slot, or update next
243			 * available temp slot if it's already in place. */
244			alens[i] = rlen;
245			if (i == next)
246				for (; next<nqueries && alens[next]; next++);
247			else
248				memcpy(answers[i], answers[next], rlen);
249
250			/* Ignore further UDP if all slots full or TCP-mode */
251			if (next == nqueries) pfd[nqueries].events = 0;
252
253			/* If answer is truncated (TC bit), fallback to TCP */
254			if ((answers[i][2] & 2) || (mh.msg_flags & MSG_TRUNC)) {
255				alens[i] = -1;
256				pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0);
257				r = start_tcp(pfd+i, family, ns+j, sl, queries[i], qlens[i]);
258				pthread_setcancelstate(cs, 0);
259				if (r >= 0) {
260					qpos[i] = r;
261					apos[i] = 0;
262				}
263				continue;
264			}
265		}
266
267		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLOUT) {
268			struct msghdr mh = {
269				.msg_iovlen = 2,
270				.msg_iov = (struct iovec [2]){
271					{ .iov_base = (uint8_t[]){ qlens[i]>>8, qlens[i] }, .iov_len = 2 },
272					{ .iov_base = (void *)queries[i], .iov_len = qlens[i] } }
273			};
274			step_mh(&mh, qpos[i]);
275			r = sendmsg(pfd[i].fd, &mh, MSG_NOSIGNAL);
276			if (r < 0) goto out;
277			qpos[i] += r;
278			if (qpos[i] == qlens[i]+2)
279				pfd[i].events = POLLIN;
280		}
281
282		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLIN) {
283			struct msghdr mh = {
284				.msg_iovlen = 2,
285				.msg_iov = (struct iovec [2]){
286					{ .iov_base = alen_buf[i], .iov_len = 2 },
287					{ .iov_base = answers[i], .iov_len = asize } }
288			};
289			step_mh(&mh, apos[i]);
290			r = recvmsg(pfd[i].fd, &mh, 0);
291			if (r <= 0) goto out;
292			apos[i] += r;
293			if (apos[i] < 2) continue;
294			int alen = alen_buf[i][0]*256 + alen_buf[i][1];
295			if (alen < 13) goto out;
296			if (apos[i] < alen+2 && apos[i] < asize+2)
297				continue;
298			int rcode = answers[i][3] & 15;
299			if (rcode != 0 && rcode != 3)
300				goto out;
301
302			/* Storing the length here commits the accepted answer.
303			 * Immediately close TCP socket so as not to consume
304			 * resources we no longer need. */
305			alens[i] = alen;
306			__syscall(SYS_close, pfd[i].fd);
307			pfd[i].fd = -1;
308		}
309	}
310out:
311	pthread_cleanup_pop(1);
312
313	/* Disregard any incomplete TCP results */
314	for (i=0; i<nqueries; i++) if (alens[i]<0) alens[i] = 0;
315
316	return 0;
317}
318
319int __res_msend(int nqueries, const unsigned char *const *queries,
320	const int *qlens, unsigned char *const *answers, int *alens, int asize)
321{
322	struct resolvconf conf;
323	if (__get_resolv_conf(&conf, 0, 0) < 0) return -1;
324	return __res_msend_rc(nqueries, queries, qlens, answers, alens, asize, &conf);
325}