master
  1//! Finds the end of an HTTP head in a stream.
  2
  3state: State = .start,
  4
  5pub const State = enum {
  6    start,
  7    seen_n,
  8    seen_r,
  9    seen_rn,
 10    seen_rnr,
 11    finished,
 12};
 13
 14/// Returns the number of bytes consumed by headers. This is always less
 15/// than or equal to `bytes.len`.
 16///
 17/// If the amount returned is less than `bytes.len`, the parser is in a
 18/// content state and the first byte of content is located at
 19/// `bytes[result]`.
 20pub fn feed(p: *HeadParser, bytes: []const u8) usize {
 21    const vector_len: comptime_int = @max(std.simd.suggestVectorLength(u8) orelse 1, 8);
 22    var index: usize = 0;
 23
 24    while (true) {
 25        switch (p.state) {
 26            .finished => return index,
 27            .start => switch (bytes.len - index) {
 28                0 => return index,
 29                1 => {
 30                    switch (bytes[index]) {
 31                        '\r' => p.state = .seen_r,
 32                        '\n' => p.state = .seen_n,
 33                        else => {},
 34                    }
 35
 36                    return index + 1;
 37                },
 38                2 => {
 39                    const b16 = int16(bytes[index..][0..2]);
 40                    const b8 = intShift(u8, b16);
 41
 42                    switch (b8) {
 43                        '\r' => p.state = .seen_r,
 44                        '\n' => p.state = .seen_n,
 45                        else => {},
 46                    }
 47
 48                    switch (b16) {
 49                        int16("\r\n") => p.state = .seen_rn,
 50                        int16("\n\n") => p.state = .finished,
 51                        else => {},
 52                    }
 53
 54                    return index + 2;
 55                },
 56                3 => {
 57                    const b24 = int24(bytes[index..][0..3]);
 58                    const b16 = intShift(u16, b24);
 59                    const b8 = intShift(u8, b24);
 60
 61                    switch (b8) {
 62                        '\r' => p.state = .seen_r,
 63                        '\n' => p.state = .seen_n,
 64                        else => {},
 65                    }
 66
 67                    switch (b16) {
 68                        int16("\r\n") => p.state = .seen_rn,
 69                        int16("\n\n") => p.state = .finished,
 70                        else => {},
 71                    }
 72
 73                    switch (b24) {
 74                        int24("\r\n\r") => p.state = .seen_rnr,
 75                        else => {},
 76                    }
 77
 78                    return index + 3;
 79                },
 80                4...vector_len - 1 => {
 81                    const b32 = int32(bytes[index..][0..4]);
 82                    const b24 = intShift(u24, b32);
 83                    const b16 = intShift(u16, b32);
 84                    const b8 = intShift(u8, b32);
 85
 86                    switch (b8) {
 87                        '\r' => p.state = .seen_r,
 88                        '\n' => p.state = .seen_n,
 89                        else => {},
 90                    }
 91
 92                    switch (b16) {
 93                        int16("\r\n") => p.state = .seen_rn,
 94                        int16("\n\n") => p.state = .finished,
 95                        else => {},
 96                    }
 97
 98                    switch (b24) {
 99                        int24("\r\n\r") => p.state = .seen_rnr,
100                        else => {},
101                    }
102
103                    switch (b32) {
104                        int32("\r\n\r\n") => p.state = .finished,
105                        else => {},
106                    }
107
108                    index += 4;
109                    continue;
110                },
111                else => {
112                    const Vector = @Vector(vector_len, u8);
113                    // const BoolVector = @Vector(vector_len, bool);
114                    const BitVector = @Vector(vector_len, u1);
115                    const SizeVector = @Vector(vector_len, u8);
116
117                    const chunk = bytes[index..][0..vector_len];
118                    const v: Vector = chunk.*;
119                    // depends on https://github.com/ziglang/zig/issues/19755
120                    // const matches_r: BitVector = @bitCast(v == @as(Vector, @splat('\r')));
121                    // const matches_n: BitVector = @bitCast(v == @as(Vector, @splat('\n')));
122                    const matches_r: BitVector = @select(u1, v == @as(Vector, @splat('\r')), @as(Vector, @splat(1)), @as(Vector, @splat(0)));
123                    const matches_n: BitVector = @select(u1, v == @as(Vector, @splat('\n')), @as(Vector, @splat(1)), @as(Vector, @splat(0)));
124                    const matches_or: SizeVector = matches_r | matches_n;
125
126                    const matches = @reduce(.Add, matches_or);
127                    switch (matches) {
128                        0 => {},
129                        1 => switch (chunk[vector_len - 1]) {
130                            '\r' => p.state = .seen_r,
131                            '\n' => p.state = .seen_n,
132                            else => {},
133                        },
134                        2 => {
135                            const b16 = int16(chunk[vector_len - 2 ..][0..2]);
136                            const b8 = intShift(u8, b16);
137
138                            switch (b8) {
139                                '\r' => p.state = .seen_r,
140                                '\n' => p.state = .seen_n,
141                                else => {},
142                            }
143
144                            switch (b16) {
145                                int16("\r\n") => p.state = .seen_rn,
146                                int16("\n\n") => p.state = .finished,
147                                else => {},
148                            }
149                        },
150                        3 => {
151                            const b24 = int24(chunk[vector_len - 3 ..][0..3]);
152                            const b16 = intShift(u16, b24);
153                            const b8 = intShift(u8, b24);
154
155                            switch (b8) {
156                                '\r' => p.state = .seen_r,
157                                '\n' => p.state = .seen_n,
158                                else => {},
159                            }
160
161                            switch (b16) {
162                                int16("\r\n") => p.state = .seen_rn,
163                                int16("\n\n") => p.state = .finished,
164                                else => {},
165                            }
166
167                            switch (b24) {
168                                int24("\r\n\r") => p.state = .seen_rnr,
169                                else => {},
170                            }
171                        },
172                        4...vector_len => {
173                            inline for (0..vector_len - 3) |i_usize| {
174                                const i = @as(u32, @truncate(i_usize));
175
176                                const b32 = int32(chunk[i..][0..4]);
177                                const b16 = intShift(u16, b32);
178
179                                if (b32 == int32("\r\n\r\n")) {
180                                    p.state = .finished;
181                                    return index + i + 4;
182                                } else if (b16 == int16("\n\n")) {
183                                    p.state = .finished;
184                                    return index + i + 2;
185                                }
186                            }
187
188                            const b24 = int24(chunk[vector_len - 3 ..][0..3]);
189                            const b16 = intShift(u16, b24);
190                            const b8 = intShift(u8, b24);
191
192                            switch (b8) {
193                                '\r' => p.state = .seen_r,
194                                '\n' => p.state = .seen_n,
195                                else => {},
196                            }
197
198                            switch (b16) {
199                                int16("\r\n") => p.state = .seen_rn,
200                                int16("\n\n") => p.state = .finished,
201                                else => {},
202                            }
203
204                            switch (b24) {
205                                int24("\r\n\r") => p.state = .seen_rnr,
206                                else => {},
207                            }
208                        },
209                        else => unreachable,
210                    }
211
212                    index += vector_len;
213                    continue;
214                },
215            },
216            .seen_n => switch (bytes.len - index) {
217                0 => return index,
218                else => {
219                    switch (bytes[index]) {
220                        '\n' => p.state = .finished,
221                        else => p.state = .start,
222                    }
223
224                    index += 1;
225                    continue;
226                },
227            },
228            .seen_r => switch (bytes.len - index) {
229                0 => return index,
230                1 => {
231                    switch (bytes[index]) {
232                        '\n' => p.state = .seen_rn,
233                        '\r' => p.state = .seen_r,
234                        else => p.state = .start,
235                    }
236
237                    return index + 1;
238                },
239                2 => {
240                    const b16 = int16(bytes[index..][0..2]);
241                    const b8 = intShift(u8, b16);
242
243                    switch (b8) {
244                        '\r' => p.state = .seen_r,
245                        '\n' => p.state = .seen_rn,
246                        else => p.state = .start,
247                    }
248
249                    switch (b16) {
250                        int16("\r\n") => p.state = .seen_rn,
251                        int16("\n\r") => p.state = .seen_rnr,
252                        int16("\n\n") => p.state = .finished,
253                        else => {},
254                    }
255
256                    return index + 2;
257                },
258                else => {
259                    const b24 = int24(bytes[index..][0..3]);
260                    const b16 = intShift(u16, b24);
261                    const b8 = intShift(u8, b24);
262
263                    switch (b8) {
264                        '\r' => p.state = .seen_r,
265                        '\n' => p.state = .seen_n,
266                        else => p.state = .start,
267                    }
268
269                    switch (b16) {
270                        int16("\r\n") => p.state = .seen_rn,
271                        int16("\n\n") => p.state = .finished,
272                        else => {},
273                    }
274
275                    switch (b24) {
276                        int24("\n\r\n") => p.state = .finished,
277                        else => {},
278                    }
279
280                    index += 3;
281                    continue;
282                },
283            },
284            .seen_rn => switch (bytes.len - index) {
285                0 => return index,
286                1 => {
287                    switch (bytes[index]) {
288                        '\r' => p.state = .seen_rnr,
289                        '\n' => p.state = .seen_n,
290                        else => p.state = .start,
291                    }
292
293                    return index + 1;
294                },
295                else => {
296                    const b16 = int16(bytes[index..][0..2]);
297                    const b8 = intShift(u8, b16);
298
299                    switch (b8) {
300                        '\r' => p.state = .seen_rnr,
301                        '\n' => p.state = .seen_n,
302                        else => p.state = .start,
303                    }
304
305                    switch (b16) {
306                        int16("\r\n") => p.state = .finished,
307                        int16("\n\n") => p.state = .finished,
308                        else => {},
309                    }
310
311                    index += 2;
312                    continue;
313                },
314            },
315            .seen_rnr => switch (bytes.len - index) {
316                0 => return index,
317                else => {
318                    switch (bytes[index]) {
319                        '\n' => p.state = .finished,
320                        else => p.state = .start,
321                    }
322
323                    index += 1;
324                    continue;
325                },
326            },
327        }
328
329        return index;
330    }
331}
332
333inline fn int16(array: *const [2]u8) u16 {
334    return @bitCast(array.*);
335}
336
337inline fn int24(array: *const [3]u8) u24 {
338    return @bitCast(array.*);
339}
340
341inline fn int32(array: *const [4]u8) u32 {
342    return @bitCast(array.*);
343}
344
345inline fn intShift(comptime T: type, x: anytype) T {
346    switch (@import("builtin").cpu.arch.endian()) {
347        .little => return @truncate(x >> (@bitSizeOf(@TypeOf(x)) - @bitSizeOf(T))),
348        .big => return @truncate(x),
349    }
350}
351
352const HeadParser = @This();
353const std = @import("std");
354const builtin = @import("builtin");
355
356test feed {
357    const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\nHello";
358
359    for (0..36) |i| {
360        var p: HeadParser = .{};
361        try std.testing.expectEqual(i, p.feed(data[0..i]));
362        try std.testing.expectEqual(35 - i, p.feed(data[i..]));
363    }
364}