master
  1#include <stdio.h>
  2#include <stdlib.h>
  3#include <stdarg.h>
  4#include <ctype.h>
  5#include <wchar.h>
  6#include <wctype.h>
  7#include <limits.h>
  8#include <string.h>
  9
 10#include "stdio_impl.h"
 11#include "shgetc.h"
 12#include "intscan.h"
 13#include "floatscan.h"
 14
 15#define SIZE_hh -2
 16#define SIZE_h  -1
 17#define SIZE_def 0
 18#define SIZE_l   1
 19#define SIZE_L   2
 20#define SIZE_ll  3
 21
 22static void store_int(void *dest, int size, unsigned long long i)
 23{
 24	if (!dest) return;
 25	switch (size) {
 26	case SIZE_hh:
 27		*(char *)dest = i;
 28		break;
 29	case SIZE_h:
 30		*(short *)dest = i;
 31		break;
 32	case SIZE_def:
 33		*(int *)dest = i;
 34		break;
 35	case SIZE_l:
 36		*(long *)dest = i;
 37		break;
 38	case SIZE_ll:
 39		*(long long *)dest = i;
 40		break;
 41	}
 42}
 43
 44static void *arg_n(va_list ap, unsigned int n)
 45{
 46	void *p;
 47	unsigned int i;
 48	va_list ap2;
 49	va_copy(ap2, ap);
 50	for (i=n; i>1; i--) va_arg(ap2, void *);
 51	p = va_arg(ap2, void *);
 52	va_end(ap2);
 53	return p;
 54}
 55
 56static int in_set(const wchar_t *set, int c)
 57{
 58	int j;
 59	const wchar_t *p = set;
 60	if (*p == '-') {
 61		if (c=='-') return 1;
 62		p++;
 63	} else if (*p == ']') {
 64		if (c==']') return 1;
 65		p++;
 66	}
 67	for (; *p && *p != ']'; p++) {
 68		if (*p=='-' && p[1] && p[1] != ']')
 69			for (j=p++[-1]; j<*p; j++)
 70				if (c==j) return 1;
 71		if (c==*p) return 1;
 72	}
 73	return 0;
 74}
 75
 76#if 1
 77#undef getwc
 78#define getwc(f) \
 79	((f)->rpos != (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f))
 80
 81#undef ungetwc
 82#define ungetwc(c,f) \
 83	((f)->rend && (c)<128U ? *--(f)->rpos : ungetwc((c),(f)))
 84#endif
 85
 86int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap)
 87{
 88	int width;
 89	int size;
 90	int alloc;
 91	const wchar_t *p;
 92	int c, t;
 93	char *s;
 94	wchar_t *wcs;
 95	void *dest=NULL;
 96	int invert;
 97	int matches=0;
 98	off_t pos = 0, cnt;
 99	static const char size_pfx[][3] = { "hh", "h", "", "l", "L", "ll" };
100	char tmp[3*sizeof(int)+10];
101	const wchar_t *set;
102	size_t i, k;
103
104	FLOCK(f);
105
106	fwide(f, 1);
107
108	for (p=fmt; *p; p++) {
109
110		alloc = 0;
111
112		if (iswspace(*p)) {
113			while (iswspace(p[1])) p++;
114			while (iswspace((c=getwc(f)))) pos++;
115			ungetwc(c, f);
116			continue;
117		}
118		if (*p != '%' || p[1] == '%') {
119			if (*p == '%') {
120				p++;
121				while (iswspace((c=getwc(f)))) pos++;
122			} else {
123				c = getwc(f);
124			}
125			if (c!=*p) {
126				ungetwc(c, f);
127				if (c<0) goto input_fail;
128				goto match_fail;
129			}
130			pos++;
131			continue;
132		}
133
134		p++;
135		if (*p=='*') {
136			dest = 0; p++;
137		} else if (iswdigit(*p) && p[1]=='$') {
138			dest = arg_n(ap, *p-'0'); p+=2;
139		} else {
140			dest = va_arg(ap, void *);
141		}
142
143		for (width=0; iswdigit(*p); p++) {
144			width = 10*width + *p - '0';
145		}
146
147		if (*p=='m') {
148			wcs = 0;
149			s = 0;
150			alloc = !!dest;
151			p++;
152		} else {
153			alloc = 0;
154		}
155
156		size = SIZE_def;
157		switch (*p++) {
158		case 'h':
159			if (*p == 'h') p++, size = SIZE_hh;
160			else size = SIZE_h;
161			break;
162		case 'l':
163			if (*p == 'l') p++, size = SIZE_ll;
164			else size = SIZE_l;
165			break;
166		case 'j':
167			size = SIZE_ll;
168			break;
169		case 'z':
170		case 't':
171			size = SIZE_l;
172			break;
173		case 'L':
174			size = SIZE_L;
175			break;
176		case 'd': case 'i': case 'o': case 'u': case 'x':
177		case 'a': case 'e': case 'f': case 'g':
178		case 'A': case 'E': case 'F': case 'G': case 'X':
179		case 's': case 'c': case '[':
180		case 'S': case 'C':
181		case 'p': case 'n':
182			p--;
183			break;
184		default:
185			goto fmt_fail;
186		}
187
188		t = *p;
189
190		/* Transform S,C -> ls,lc */
191		if ((t&0x2f)==3) {
192			size = SIZE_l;
193			t |= 32;
194		}
195
196		if (t != 'n') {
197			if (t != '[' && (t|32) != 'c')
198				while (iswspace((c=getwc(f)))) pos++;
199			else
200				c=getwc(f);
201			if (c < 0) goto input_fail;
202			ungetwc(c, f);
203		}
204
205		switch (t) {
206		case 'n':
207			store_int(dest, size, pos);
208			/* do not increment match count, etc! */
209			continue;
210
211		case 's':
212		case 'c':
213		case '[':
214			if (t == 'c') {
215				if (width<1) width = 1;
216				invert = 1;
217				set = L"";
218			} else if (t == 's') {
219				invert = 1;
220				static const wchar_t spaces[] = {
221					' ', '\t', '\n', '\r', 11, 12,  0x0085,
222					0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
223					0x2006, 0x2008, 0x2009, 0x200a,
224					0x2028, 0x2029, 0x205f, 0x3000, 0 };
225				set = spaces;
226			} else {
227				if (*++p == '^') p++, invert = 1;
228				else invert = 0;
229				set = p;
230				if (*p==']') p++;
231				while (*p!=']') {
232					if (!*p) goto fmt_fail;
233					p++;
234				}
235			}
236
237			s = (size == SIZE_def) ? dest : 0;
238			wcs = (size == SIZE_l) ? dest : 0;
239
240			int gotmatch = 0;
241
242			if (width < 1) width = -1;
243
244			i = 0;
245			if (alloc) {
246				k = t=='c' ? width+1U : 31;
247				if (size == SIZE_l) {
248					wcs = malloc(k*sizeof(wchar_t));
249					if (!wcs) goto alloc_fail;
250				} else {
251					s = malloc(k);
252					if (!s) goto alloc_fail;
253				}
254			}
255			while (width) {
256				if ((c=getwc(f))<0) break;
257				if (in_set(set, c) == invert)
258					break;
259				if (wcs) {
260					wcs[i++] = c;
261					if (alloc && i==k) {
262						k += k+1;
263						wchar_t *tmp = realloc(wcs, k*sizeof(wchar_t));
264						if (!tmp) goto alloc_fail;
265						wcs = tmp;
266					}
267				} else if (size != SIZE_l) {
268					int l = wctomb(s?s+i:tmp, c);
269					if (l<0) goto input_fail;
270					i += l;
271					if (alloc && i > k-4) {
272						k += k+1;
273						char *tmp = realloc(s, k);
274						if (!tmp) goto alloc_fail;
275						s = tmp;
276					}
277				}
278				pos++;
279				width-=(width>0);
280				gotmatch=1;
281			}
282			if (width) {
283				ungetwc(c, f);
284				if (t == 'c' || !gotmatch) goto match_fail;
285			}
286
287			if (alloc) {
288				if (size == SIZE_l) *(wchar_t **)dest = wcs;
289				else *(char **)dest = s;
290			}
291			if (t != 'c') {
292				if (wcs) wcs[i] = 0;
293				if (s) s[i] = 0;
294			}
295			break;
296
297		case 'd': case 'i': case 'o': case 'u': case 'x':
298		case 'a': case 'e': case 'f': case 'g':
299		case 'A': case 'E': case 'F': case 'G': case 'X':
300		case 'p':
301			if (width < 1) width = 0;
302			snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln",
303				1+!dest, "%*", width, size_pfx[size+2], t);
304			cnt = 0;
305			if (fscanf(f, tmp, dest?dest:&cnt, &cnt) == -1)
306				goto input_fail;
307			else if (!cnt)
308				goto match_fail;
309			pos += cnt;
310			break;
311		default:
312			goto fmt_fail;
313		}
314
315		if (dest) matches++;
316	}
317	if (0) {
318fmt_fail:
319alloc_fail:
320input_fail:
321		if (!matches) matches--;
322match_fail:
323		if (alloc) {
324			free(s);
325			free(wcs);
326		}
327	}
328	FUNLOCK(f);
329	return matches;
330}
331
332weak_alias(vfwscanf,__isoc99_vfwscanf);