Commit 67bd45f0cf
Changed files (8)
src-self-hosted
src-self-hosted/libc_installation.zig
@@ -57,10 +57,10 @@ pub const LibCInstallation = struct {
const contents = try std.io.readFileAlloc(allocator, libc_file);
defer allocator.free(contents);
- var it = std.mem.split(contents, "\n");
+ var it = std.mem.tokenize(contents, "\n");
while (it.next()) |line| {
if (line.len == 0 or line[0] == '#') continue;
- var line_it = std.mem.split(line, "=");
+ var line_it = std.mem.separate(line, "=");
const name = line_it.next() orelse {
try stderr.print("missing equal sign after field name\n");
return error.ParseError;
@@ -213,7 +213,7 @@ pub const LibCInstallation = struct {
},
}
- var it = std.mem.split(exec_result.stderr, "\n\r");
+ var it = std.mem.tokenize(exec_result.stderr, "\n\r");
var search_paths = std.ArrayList([]const u8).init(loop.allocator);
defer search_paths.deinit();
while (it.next()) |line| {
@@ -410,7 +410,7 @@ async fn ccPrintFileName(loop: *event.Loop, o_file: []const u8, want_dirname: bo
return error.CCompilerCrashed;
},
}
- var it = std.mem.split(exec_result.stdout, "\n\r");
+ var it = std.mem.tokenize(exec_result.stdout, "\n\r");
const line = it.next() orelse return error.LibCRuntimeNotFound;
const dirname = std.os.path.dirname(line) orelse return error.LibCRuntimeNotFound;
src-self-hosted/main.zig
@@ -351,7 +351,7 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Co
const root_name = if (provided_name) |n| n else blk: {
if (root_source_file) |file| {
const basename = os.path.basename(file);
- var it = mem.split(basename, ".");
+ var it = mem.separate(basename, ".");
break :blk it.next() orelse basename;
} else {
try stderr.write("--name [name] not provided and unable to infer\n");
std/os/child_process.zig
@@ -595,7 +595,7 @@ pub const ChildProcess = struct {
const PATH = try os.getEnvVarOwned(self.allocator, "PATH");
defer self.allocator.free(PATH);
- var it = mem.split(PATH, ";");
+ var it = mem.tokenize(PATH, ";");
while (it.next()) |search_path| {
const joined_path = try os.path.join(self.allocator, search_path, app_name);
defer self.allocator.free(joined_path);
std/os/index.zig
@@ -608,7 +608,7 @@ pub fn posixExecve(argv: []const []const u8, env_map: *const BufMap, allocator:
// +1 for the null terminating byte
const path_buf = try allocator.alloc(u8, PATH.len + exe_path.len + 2);
defer allocator.free(path_buf);
- var it = mem.split(PATH, ":");
+ var it = mem.tokenize(PATH, ":");
var seen_eacces = false;
var err: usize = undefined;
while (it.next()) |search_path| {
std/os/path.zig
@@ -184,7 +184,7 @@ pub fn windowsParsePath(path: []const u8) WindowsPath {
return relative_path;
}
- var it = mem.split(path, []u8{this_sep});
+ var it = mem.tokenize(path, []u8{this_sep});
_ = (it.next() orelse return relative_path);
_ = (it.next() orelse return relative_path);
return WindowsPath{
@@ -202,7 +202,7 @@ pub fn windowsParsePath(path: []const u8) WindowsPath {
return relative_path;
}
- var it = mem.split(path, []u8{this_sep});
+ var it = mem.tokenize(path, []u8{this_sep});
_ = (it.next() orelse return relative_path);
_ = (it.next() orelse return relative_path);
return WindowsPath{
@@ -264,8 +264,8 @@ fn networkShareServersEql(ns1: []const u8, ns2: []const u8) bool {
const sep1 = ns1[0];
const sep2 = ns2[0];
- var it1 = mem.split(ns1, []u8{sep1});
- var it2 = mem.split(ns2, []u8{sep2});
+ var it1 = mem.tokenize(ns1, []u8{sep1});
+ var it2 = mem.tokenize(ns2, []u8{sep2});
// TODO ASCII is wrong, we actually need full unicode support to compare paths.
return asciiEqlIgnoreCase(it1.next().?, it2.next().?);
@@ -285,8 +285,8 @@ fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8
const sep1 = p1[0];
const sep2 = p2[0];
- var it1 = mem.split(p1, []u8{sep1});
- var it2 = mem.split(p2, []u8{sep2});
+ var it1 = mem.tokenize(p1, []u8{sep1});
+ var it2 = mem.tokenize(p2, []u8{sep2});
// TODO ASCII is wrong, we actually need full unicode support to compare paths.
return asciiEqlIgnoreCase(it1.next().?, it2.next().?) and asciiEqlIgnoreCase(it1.next().?, it2.next().?);
@@ -337,6 +337,8 @@ pub fn resolveSlice(allocator: *Allocator, paths: []const []const u8) ![]u8 {
/// If all paths are relative it uses the current working directory as a starting point.
/// Each drive has its own current working directory.
/// Path separators are canonicalized to '\\' and drives are canonicalized to capital letters.
+/// Note: all usage of this function should be audited due to the existence of symlinks.
+/// Without performing actual syscalls, resolving `..` could be incorrect.
pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
if (paths.len == 0) {
assert(is_windows); // resolveWindows called on non windows can't use getCwd
@@ -416,7 +418,7 @@ pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
},
WindowsPath.Kind.NetworkShare => {
result = try allocator.alloc(u8, max_size);
- var it = mem.split(paths[first_index], "/\\");
+ var it = mem.tokenize(paths[first_index], "/\\");
const server_name = it.next().?;
const other_name = it.next().?;
@@ -483,7 +485,7 @@ pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
if (!correct_disk_designator) {
continue;
}
- var it = mem.split(p[parsed.disk_designator.len..], "/\\");
+ var it = mem.tokenize(p[parsed.disk_designator.len..], "/\\");
while (it.next()) |component| {
if (mem.eql(u8, component, ".")) {
continue;
@@ -516,6 +518,8 @@ pub fn resolveWindows(allocator: *Allocator, paths: []const []const u8) ![]u8 {
/// It resolves "." and "..".
/// The result does not have a trailing path separator.
/// If all paths are relative it uses the current working directory as a starting point.
+/// Note: all usage of this function should be audited due to the existence of symlinks.
+/// Without performing actual syscalls, resolving `..` could be incorrect.
pub fn resolvePosix(allocator: *Allocator, paths: []const []const u8) ![]u8 {
if (paths.len == 0) {
assert(!is_windows); // resolvePosix called on windows can't use getCwd
@@ -550,7 +554,7 @@ pub fn resolvePosix(allocator: *Allocator, paths: []const []const u8) ![]u8 {
errdefer allocator.free(result);
for (paths[first_index..]) |p, i| {
- var it = mem.split(p, "/");
+ var it = mem.tokenize(p, "/");
while (it.next()) |component| {
if (mem.eql(u8, component, ".")) {
continue;
@@ -937,8 +941,8 @@ pub fn relativeWindows(allocator: *Allocator, from: []const u8, to: []const u8)
return resolved_to;
}
- var from_it = mem.split(resolved_from, "/\\");
- var to_it = mem.split(resolved_to, "/\\");
+ var from_it = mem.tokenize(resolved_from, "/\\");
+ var to_it = mem.tokenize(resolved_to, "/\\");
while (true) {
const from_component = from_it.next() orelse return mem.dupe(allocator, u8, to_it.rest());
const to_rest = to_it.rest();
@@ -967,14 +971,12 @@ pub fn relativeWindows(allocator: *Allocator, from: []const u8, to: []const u8)
// shave off the trailing slash
result_index -= 1;
- if (to_rest.len > 0) {
- var rest_it = mem.split(to_rest, "/\\");
- while (rest_it.next()) |to_component| {
- result[result_index] = '\\';
- result_index += 1;
- mem.copy(u8, result[result_index..], to_component);
- result_index += to_component.len;
- }
+ var rest_it = mem.tokenize(to_rest, "/\\");
+ while (rest_it.next()) |to_component| {
+ result[result_index] = '\\';
+ result_index += 1;
+ mem.copy(u8, result[result_index..], to_component);
+ result_index += to_component.len;
}
return result[0..result_index];
@@ -990,8 +992,8 @@ pub fn relativePosix(allocator: *Allocator, from: []const u8, to: []const u8) ![
const resolved_to = try resolvePosix(allocator, [][]const u8{to});
defer allocator.free(resolved_to);
- var from_it = mem.split(resolved_from, "/");
- var to_it = mem.split(resolved_to, "/");
+ var from_it = mem.tokenize(resolved_from, "/");
+ var to_it = mem.tokenize(resolved_to, "/");
while (true) {
const from_component = from_it.next() orelse return mem.dupe(allocator, u8, to_it.rest());
const to_rest = to_it.rest();
std/build.zig
@@ -324,7 +324,7 @@ pub const Builder = struct {
fn processNixOSEnvVars(self: *Builder) void {
if (os.getEnvVarOwned(self.allocator, "NIX_CFLAGS_COMPILE")) |nix_cflags_compile| {
- var it = mem.split(nix_cflags_compile, " ");
+ var it = mem.tokenize(nix_cflags_compile, " ");
while (true) {
const word = it.next() orelse break;
if (mem.eql(u8, word, "-isystem")) {
@@ -342,7 +342,7 @@ pub const Builder = struct {
assert(err == error.EnvironmentVariableNotFound);
}
if (os.getEnvVarOwned(self.allocator, "NIX_LDFLAGS")) |nix_ldflags| {
- var it = mem.split(nix_ldflags, " ");
+ var it = mem.tokenize(nix_ldflags, " ");
while (true) {
const word = it.next() orelse break;
if (mem.eql(u8, word, "-rpath")) {
@@ -689,7 +689,7 @@ pub const Builder = struct {
if (os.path.isAbsolute(name)) {
return name;
}
- var it = mem.split(PATH, []u8{os.path.delimiter});
+ var it = mem.tokenize(PATH, []u8{os.path.delimiter});
while (it.next()) |path| {
const full_path = try os.path.join(self.allocator, path, self.fmt("{}{}", name, exe_extension));
if (os.path.real(self.allocator, full_path)) |real_path| {
std/mem.zig
@@ -689,58 +689,57 @@ pub fn eql_slice_u8(a: []const u8, b: []const u8) bool {
}
/// Returns an iterator that iterates over the slices of `buffer` that are not
-/// any of the bytes in `split_bytes`.
-/// split(" abc def ghi ", " ")
+/// any of the bytes in `delimiter_bytes`.
+/// tokenize(" abc def ghi ", " ")
/// Will return slices for "abc", "def", "ghi", null, in that order.
-/// If `split_bytes` does not exist in buffer,
+/// If `buffer` is empty, the iterator will return null.
+/// If `delimiter_bytes` does not exist in buffer,
/// the iterator will return `buffer`, null, in that order.
-pub fn split(buffer: []const u8, split_bytes: []const u8) SplitIterator {
- return SplitIterator{
+/// See also the related function `separate`.
+pub fn tokenize(buffer: []const u8, delimiter_bytes: []const u8) TokenIterator {
+ return TokenIterator{
.index = 0,
.buffer = buffer,
- .split_bytes = split_bytes,
- .glob = true,
- .spun = false,
+ .delimiter_bytes = delimiter_bytes,
};
}
-test "mem.split" {
- var it = split(" abc def ghi ", " ");
+test "mem.tokenize" {
+ var it = tokenize(" abc def ghi ", " ");
assert(eql(u8, it.next().?, "abc"));
assert(eql(u8, it.next().?, "def"));
assert(eql(u8, it.next().?, "ghi"));
assert(it.next() == null);
- it = split("..\\bob", "\\");
+ it = tokenize("..\\bob", "\\");
assert(eql(u8, it.next().?, ".."));
assert(eql(u8, "..", "..\\bob"[0..it.index]));
assert(eql(u8, it.next().?, "bob"));
assert(it.next() == null);
- it = split("//a/b", "/");
+ it = tokenize("//a/b", "/");
assert(eql(u8, it.next().?, "a"));
assert(eql(u8, it.next().?, "b"));
assert(eql(u8, "//a/b", "//a/b"[0..it.index]));
assert(it.next() == null);
- it = split("|", "|");
+ it = tokenize("|", "|");
assert(it.next() == null);
- it = split("", "|");
- assert(eql(u8, it.next().?, ""));
+ it = tokenize("", "|");
assert(it.next() == null);
- it = split("hello", "");
+ it = tokenize("hello", "");
assert(eql(u8, it.next().?, "hello"));
assert(it.next() == null);
- it = split("hello", " ");
+ it = tokenize("hello", " ");
assert(eql(u8, it.next().?, "hello"));
assert(it.next() == null);
}
-test "mem.split (multibyte)" {
- var it = split("a|b,c/d e", " /,|");
+test "mem.tokenize (multibyte)" {
+ var it = tokenize("a|b,c/d e", " /,|");
assert(eql(u8, it.next().?, "a"));
assert(eql(u8, it.next().?, "b"));
assert(eql(u8, it.next().?, "c"));
@@ -750,18 +749,21 @@ test "mem.split (multibyte)" {
}
/// Returns an iterator that iterates over the slices of `buffer` that
-/// seperates by bytes in `delimiter`.
+/// are separated by bytes in `delimiter`.
/// separate("abc|def||ghi", "|")
-/// Will return slices for "abc", "def", "", "ghi", null, in that order.
+/// will return slices for "abc", "def", "", "ghi", null, in that order.
/// If `delimiter` does not exist in buffer,
/// the iterator will return `buffer`, null, in that order.
+/// The delimiter length must not be zero.
+/// See also the related function `tokenize`.
+/// It is planned to rename this function to `split` before 1.0.0, like this:
+/// pub fn split(buffer: []const u8, delimiter: []const u8) SplitIterator {
pub fn separate(buffer: []const u8, delimiter: []const u8) SplitIterator {
+ assert(delimiter.len != 0);
return SplitIterator{
.index = 0,
.buffer = buffer,
- .split_bytes = delimiter,
- .glob = false,
- .spun = false,
+ .delimiter = delimiter,
};
}
@@ -782,19 +784,15 @@ test "mem.separate" {
assert(eql(u8, it.next().?, ""));
assert(it.next() == null);
- it = separate("hello", "");
- assert(eql(u8, it.next().?, "hello"));
- assert(it.next() == null);
-
it = separate("hello", " ");
assert(eql(u8, it.next().?, "hello"));
assert(it.next() == null);
}
test "mem.separate (multibyte)" {
- var it = separate("a|b,c/d e", " /,|");
+ var it = separate("a, b ,, c, d, e", ", ");
assert(eql(u8, it.next().?, "a"));
- assert(eql(u8, it.next().?, "b"));
+ assert(eql(u8, it.next().?, "b ,"));
assert(eql(u8, it.next().?, "c"));
assert(eql(u8, it.next().?, "d"));
assert(eql(u8, it.next().?, "e"));
@@ -819,49 +817,38 @@ test "mem.endsWith" {
assert(!endsWith(u8, "Bob", "Bo"));
}
-pub const SplitIterator = struct {
+pub const TokenIterator = struct {
buffer: []const u8,
- split_bytes: []const u8,
+ delimiter_bytes: []const u8,
index: usize,
- glob: bool,
- spun: bool,
- /// Iterates and returns null or optionally a slice the next split segment
- pub fn next(self: *SplitIterator) ?[]const u8 {
- if (self.spun) {
- if (self.index + 1 > self.buffer.len) return null;
- self.index += 1;
- }
-
- self.spun = true;
-
- if (self.glob) {
- while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
+ /// Returns a slice of the next token, or null if tokenization is complete.
+ pub fn next(self: *TokenIterator) ?[]const u8 {
+ // move to beginning of token
+ while (self.index < self.buffer.len and self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
+ const start = self.index;
+ if (start == self.buffer.len) {
+ return null;
}
- var cursor = self.index;
- while (cursor < self.buffer.len and !self.isSplitByte(self.buffer[cursor])) : (cursor += 1) {}
-
- defer self.index = cursor;
+ // move to end of token
+ while (self.index < self.buffer.len and !self.isSplitByte(self.buffer[self.index])) : (self.index += 1) {}
+ const end = self.index;
- if (cursor == self.buffer.len) {
- return if (self.glob and self.index == cursor and self.index > 0) null else self.buffer[self.index..];
- }
-
- return self.buffer[self.index..cursor];
+ return self.buffer[start..end];
}
/// Returns a slice of the remaining bytes. Does not affect iterator state.
- pub fn rest(self: *const SplitIterator) []const u8 {
+ pub fn rest(self: TokenIterator) []const u8 {
// move to beginning of token
var index: usize = self.index;
while (index < self.buffer.len and self.isSplitByte(self.buffer[index])) : (index += 1) {}
return self.buffer[index..];
}
- fn isSplitByte(self: *const SplitIterator, byte: u8) bool {
- for (self.split_bytes) |split_byte| {
- if (byte == split_byte) {
+ fn isSplitByte(self: TokenIterator, byte: u8) bool {
+ for (self.delimiter_bytes) |delimiter_byte| {
+ if (byte == delimiter_byte) {
return true;
}
}
@@ -869,6 +856,32 @@ pub const SplitIterator = struct {
}
};
+pub const SplitIterator = struct {
+ buffer: []const u8,
+ index: ?usize,
+ delimiter: []const u8,
+
+ /// Returns a slice of the next field, or null if splitting is complete.
+ pub fn next(self: *SplitIterator) ?[]const u8 {
+ const start = self.index orelse return null;
+ const end = if (indexOfPos(u8, self.buffer, start, self.delimiter)) |delim_start| blk: {
+ self.index = delim_start + self.delimiter.len;
+ break :blk delim_start;
+ } else blk: {
+ self.index = null;
+ break :blk self.buffer.len;
+ };
+ return self.buffer[start..end];
+ }
+
+ /// Returns a slice of the remaining bytes. Does not affect iterator state.
+ pub fn rest(self: SplitIterator) []const u8 {
+ const end = self.buffer.len;
+ const start = self.index orelse end;
+ return self.buffer[start..end];
+ }
+};
+
/// Naively combines a series of strings with a separator.
/// Allocates memory for the result, which must be freed by the caller.
pub fn join(allocator: *Allocator, sep: u8, strings: ...) ![]u8 {
build.zig
@@ -189,14 +189,14 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
const prefix_output = try b.exec([][]const u8{ llvm_config_exe, "--prefix" });
var result = LibraryDep{
- .prefix = mem.split(prefix_output, " \r\n").next().?,
+ .prefix = mem.tokenize(prefix_output, " \r\n").next().?,
.libs = ArrayList([]const u8).init(b.allocator),
.system_libs = ArrayList([]const u8).init(b.allocator),
.includes = ArrayList([]const u8).init(b.allocator),
.libdirs = ArrayList([]const u8).init(b.allocator),
};
{
- var it = mem.split(libs_output, " \r\n");
+ var it = mem.tokenize(libs_output, " \r\n");
while (it.next()) |lib_arg| {
if (mem.startsWith(u8, lib_arg, "-l")) {
try result.system_libs.append(lib_arg[2..]);
@@ -210,7 +210,7 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
}
}
{
- var it = mem.split(includes_output, " \r\n");
+ var it = mem.tokenize(includes_output, " \r\n");
while (it.next()) |include_arg| {
if (mem.startsWith(u8, include_arg, "-I")) {
try result.includes.append(include_arg[2..]);
@@ -220,7 +220,7 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
}
}
{
- var it = mem.split(libdir_output, " \r\n");
+ var it = mem.tokenize(libdir_output, " \r\n");
while (it.next()) |libdir| {
if (mem.startsWith(u8, libdir, "-L")) {
try result.libdirs.append(libdir[2..]);
@@ -233,7 +233,7 @@ fn findLLVM(b: *Builder, llvm_config_exe: []const u8) !LibraryDep {
}
pub fn installStdLib(b: *Builder, stdlib_files: []const u8) void {
- var it = mem.split(stdlib_files, ";");
+ var it = mem.tokenize(stdlib_files, ";");
while (it.next()) |stdlib_file| {
const src_path = os.path.join(b.allocator, "std", stdlib_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "std", stdlib_file) catch unreachable;
@@ -242,7 +242,7 @@ pub fn installStdLib(b: *Builder, stdlib_files: []const u8) void {
}
pub fn installCHeaders(b: *Builder, c_header_files: []const u8) void {
- var it = mem.split(c_header_files, ";");
+ var it = mem.tokenize(c_header_files, ";");
while (it.next()) |c_header_file| {
const src_path = os.path.join(b.allocator, "c_headers", c_header_file) catch unreachable;
const dest_path = os.path.join(b.allocator, "lib", "zig", "include", c_header_file) catch unreachable;
@@ -277,7 +277,7 @@ fn configureStage2(b: *Builder, exe: var, ctx: Context) !void {
addCppLib(b, exe, ctx.cmake_binary_dir, "zig_cpp");
if (ctx.lld_include_dir.len != 0) {
exe.addIncludeDir(ctx.lld_include_dir);
- var it = mem.split(ctx.lld_libraries, ";");
+ var it = mem.tokenize(ctx.lld_libraries, ";");
while (it.next()) |lib| {
exe.addObjectFile(lib);
}
@@ -334,7 +334,7 @@ fn addCxxKnownPath(
ctx.cxx_compiler,
b.fmt("-print-file-name={}", objname),
});
- const path_unpadded = mem.split(path_padded, "\r\n").next().?;
+ const path_unpadded = mem.tokenize(path_padded, "\r\n").next().?;
if (mem.eql(u8, path_unpadded, objname)) {
if (errtxt) |msg| {
warn("{}", msg);