master
1/**
2 * This file has no copyright assigned and is placed in the Public Domain.
3 * This file is part of the mingw-w64 runtime package.
4 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
5 */
6#ifndef WIN32_LEAN_AND_MEAN
7#define WIN32_LEAN_AND_MEAN
8#endif
9#include <stdlib.h>
10#include <libgen.h>
11#include <windows.h>
12
13/* A 'directory separator' is a byte that equals 0x2F ('solidus' or more
14 * commonly 'forward slash') or 0x5C ('reverse solidus' or more commonly
15 * 'backward slash'). The byte 0x5C may look different from a backward slash
16 * in some locales; for example, it looks the same as a Yen sign in Japanese
17 * locales and a Won sign in Korean locales. Despite its appearance, it still
18 * functions as a directory separator.
19 *
20 * A 'path' comprises an optional DOS drive letter with a colon, and then an
21 * arbitrary number of possibily empty components, separated by non-empty
22 * sequences of directory separators (in other words, consecutive directory
23 * separators are treated as a single one). A path that comprises an empty
24 * component denotes the current working directory.
25 *
26 * An 'absolute path' comprises at least two components, the first of which
27 * is empty.
28 *
29 * A 'relative path' is a path that is not an absolute path. In other words,
30 * it either comprises an empty component, or begins with a non-empty
31 * component.
32 *
33 * POSIX doesn't have a concept about DOS drives. A path that does not have a
34 * drive letter starts from the same drive as the current working directory.
35 *
36 * For example:
37 * (Examples without drive letters match POSIX.)
38 *
39 * Argument dirname() returns basename() returns
40 * -------- ----------------- ------------------
41 * `` or NULL `.` `.`
42 * `usr` `.` `usr`
43 * `usr\` `.` `usr`
44 * `\` `\` `\`
45 * `\usr` `\` `usr`
46 * `\usr\lib` `\usr` `lib`
47 * `\home\\dwc\\test` `\home\\dwc` `test`
48 * `\\host\usr` `\\host\.` `usr`
49 * `\\host\usr\lib` `\\host\usr` `lib`
50 * `\\host\\usr` `\\host\\` `usr`
51 * `\\host\\usr\lib` `\\host\\usr` `lib`
52 * `C:` `C:.` `.`
53 * `C:usr` `C:.` `usr`
54 * `C:usr\` `C:.` `usr`
55 * `C:\` `C:\` `\`
56 * `C:\\` `C:\` `\`
57 * `C:\\\` `C:\` `\`
58 * `C:\usr` `C:\` `usr`
59 * `C:\usr\lib` `C:\usr` `lib`
60 * `C:\\usr\\lib\\` `C:\\usr` `lib`
61 * `C:\home\\dwc\\test` `C:\home\\dwc` `test`
62 */
63
64struct path_info
65 {
66 /* This points to end of the UNC prefix and drive letter, if any. */
67 char* prefix_end;
68
69 /* These point to the directory separator in front of the last non-empty
70 * component. */
71 char* base_sep_begin;
72 char* base_sep_end;
73
74 /* This points to the last directory separator sequence if no other
75 * non-separator characters follow it. */
76 char* term_sep_begin;
77
78 /* This points to the end of the string. */
79 char* path_end;
80 };
81
82#define IS_DIR_SEP(c) ((c) == '/' || (c) == '\\')
83
84static
85void
86do_get_path_info(struct path_info* info, char* path)
87 {
88 char* pos = path;
89 int unc_ncoms = 0;
90 DWORD cp;
91 int dbcs_tb, prev_dir_sep, dir_sep;
92
93 /* Get the code page for paths in the same way as `fopen()`. */
94 cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
95
96 /* Set the structure to 'no data'. */
97 info->prefix_end = NULL;
98 info->base_sep_begin = NULL;
99 info->base_sep_end = NULL;
100 info->term_sep_begin = NULL;
101
102 if(IS_DIR_SEP(pos[0]) && IS_DIR_SEP(pos[1])) {
103 /* The path is UNC. */
104 pos += 2;
105
106 /* Seek to the end of the share/device name. */
107 dbcs_tb = 0;
108 prev_dir_sep = 0;
109
110 while(*pos != 0) {
111 dir_sep = 0;
112
113 if(dbcs_tb)
114 dbcs_tb = 0;
115 else if(IsDBCSLeadByteEx(cp, *pos))
116 dbcs_tb = 1;
117 else
118 dir_sep = IS_DIR_SEP(*pos);
119
120 /* If a separator has been encountered and the previous character
121 * was not, mark this as the end of the current component. */
122 if(dir_sep && !prev_dir_sep) {
123 unc_ncoms ++;
124
125 /* The first component is the host name, and the second is the
126 * share name. So we stop at the end of the second component. */
127 if(unc_ncoms == 2)
128 break;
129 }
130
131 prev_dir_sep = dir_sep;
132 pos ++;
133 }
134
135 /* The UNC prefix terminates here. The terminating directory separator
136 * is not part of the prefix, and initiates a new absolute path. */
137 info->prefix_end = pos;
138 }
139 else if((pos[0] >= 'A' && pos[0] <= 'Z' && pos[1] == ':')
140 || (pos[0] >= 'a' && pos[0] <= 'z' && pos[1] == ':')) {
141 /* The path contains a DOS drive letter in the beginning. */
142 pos += 2;
143
144 /* The DOS drive prefix terminates here. Unlike UNC paths, the remaing
145 * part can be relative. For example, `C:foo` denotes `foo` in the
146 * working directory of drive `C:`. */
147 info->prefix_end = pos;
148 }
149
150 /* The remaining part of the path is almost the same as POSIX. */
151 dbcs_tb = 0;
152 prev_dir_sep = 0;
153
154 while(*pos != 0) {
155 dir_sep = 0;
156
157 if(dbcs_tb)
158 dbcs_tb = 0;
159 else if(IsDBCSLeadByteEx(cp, *pos))
160 dbcs_tb = 1;
161 else
162 dir_sep = IS_DIR_SEP(*pos);
163
164 /* If a separator has been encountered and the previous character
165 * was not, mark this as the beginning of the terminating separator
166 * sequence. */
167 if(dir_sep && !prev_dir_sep)
168 info->term_sep_begin = pos;
169
170 /* If a non-separator character has been encountered and a previous
171 * terminating separator sequence exists, start a new component. */
172 if(!dir_sep && prev_dir_sep) {
173 info->base_sep_begin = info->term_sep_begin;
174 info->base_sep_end = pos;
175 info->term_sep_begin = NULL;
176 }
177
178 prev_dir_sep = dir_sep;
179 pos ++;
180 }
181
182 /* Store the end of the path for convenience. */
183 info->path_end = pos;
184 }
185
186char*
187dirname(char* path)
188 {
189 struct path_info info;
190 char* upath;
191 const char* top;
192 static char* static_path_copy;
193
194 if(path == NULL || path[0] == 0)
195 return (char*) ".";
196
197 do_get_path_info(&info, path);
198 upath = info.prefix_end ? info.prefix_end : path;
199 top = (IS_DIR_SEP(path[0]) || IS_DIR_SEP(upath[0])) ? "\\" : ".";
200
201 /* If a non-terminating directory separator exists, it terminates the
202 * dirname. Truncate the path there. */
203 if(info.base_sep_begin) {
204 info.base_sep_begin[0] = 0;
205
206 /* If the unprefixed path has not been truncated to empty, it is now
207 * the dirname, so return it. */
208 if(upath[0])
209 return path;
210 }
211
212 /* The dirname is empty. In principle we return `<prefix>.` if the
213 * path is relative and `<prefix>\` if it is absolute. This can be
214 * optimized if there is no prefix. */
215 if(upath == path)
216 return (char*) top;
217
218 /* When there is a prefix, we must append a character to the prefix.
219 * If there is enough room in the original path, we just reuse its
220 * storage. */
221 if(upath != info.path_end) {
222 upath[0] = *top;
223 upath[1] = 0;
224 return path;
225 }
226
227 /* This is only the last resort. If there is no room, we have to copy
228 * the prefix elsewhere. */
229 upath = realloc(static_path_copy, info.prefix_end - path + 2);
230 if(!upath)
231 return (char*) top;
232
233 static_path_copy = upath;
234 memcpy(upath, path, info.prefix_end - path);
235 upath += info.prefix_end - path;
236 upath[0] = *top;
237 upath[1] = 0;
238 return static_path_copy;
239 }
240
241char*
242basename(char* path)
243 {
244 struct path_info info;
245 char* upath;
246
247 if(path == NULL || path[0] == 0)
248 return (char*) ".";
249
250 do_get_path_info(&info, path);
251 upath = info.prefix_end ? info.prefix_end : path;
252
253 /* If the path is non-UNC and empty, then it's relative. POSIX says '.'
254 * shall be returned. */
255 if(IS_DIR_SEP(path[0]) == 0 && upath[0] == 0)
256 return (char*) ".";
257
258 /* If a terminating separator sequence exists, it is not part of the
259 * name and shall be truncated. */
260 if(info.term_sep_begin)
261 info.term_sep_begin[0] = 0;
262
263 /* If some other separator sequence has been found, the basename
264 * immediately follows it. */
265 if(info.base_sep_end)
266 return info.base_sep_end;
267
268 /* If removal of the terminating separator sequence has caused the
269 * unprefixed path to become empty, it must have comprised only
270 * separators. POSIX says `/` shall be returned, but on Windows, we
271 * return `\` instead. */
272 if(upath[0] == 0)
273 return (char*) "\\";
274
275 /* Return the unprefixed path. */
276 return upath;
277 }