master
  1/**
  2 * This file has no copyright assigned and is placed in the Public Domain.
  3 * This file is part of the mingw-w64 runtime package.
  4 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
  5 */
  6#ifndef WIN32_LEAN_AND_MEAN
  7#define WIN32_LEAN_AND_MEAN
  8#endif
  9#include <stdlib.h>
 10#include <libgen.h>
 11#include <windows.h>
 12
 13/* A 'directory separator' is a byte that equals 0x2F ('solidus' or more
 14 * commonly 'forward slash') or 0x5C ('reverse solidus' or more commonly
 15 * 'backward slash'). The byte 0x5C may look different from a backward slash
 16 * in some locales; for example, it looks the same as a Yen sign in Japanese
 17 * locales and a Won sign in Korean locales. Despite its appearance, it still
 18 * functions as a directory separator.
 19 *
 20 * A 'path' comprises an optional DOS drive letter with a colon, and then an
 21 * arbitrary number of possibily empty components, separated by non-empty
 22 * sequences of directory separators (in other words, consecutive directory
 23 * separators are treated as a single one). A path that comprises an empty
 24 * component denotes the current working directory.
 25 *
 26 * An 'absolute path' comprises at least two components, the first of which
 27 * is empty.
 28 *
 29 * A 'relative path' is a path that is not an absolute path. In other words,
 30 * it either comprises an empty component, or begins with a non-empty
 31 * component.
 32 *
 33 * POSIX doesn't have a concept about DOS drives. A path that does not have a
 34 * drive letter starts from the same drive as the current working directory.
 35 *
 36 * For example:
 37 * (Examples without drive letters match POSIX.)
 38 *
 39 *   Argument                 dirname() returns        basename() returns
 40 *   --------                 -----------------        ------------------
 41 *   `` or NULL               `.`                      `.`
 42 *   `usr`                    `.`                      `usr`
 43 *   `usr\`                   `.`                      `usr`
 44 *   `\`                      `\`                      `\`
 45 *   `\usr`                   `\`                      `usr`
 46 *   `\usr\lib`               `\usr`                   `lib`
 47 *   `\home\\dwc\\test`       `\home\\dwc`             `test`
 48 *   `\\host\usr`             `\\host\.`               `usr`
 49 *   `\\host\usr\lib`         `\\host\usr`             `lib`
 50 *   `\\host\\usr`            `\\host\\`               `usr`
 51 *   `\\host\\usr\lib`        `\\host\\usr`            `lib`
 52 *   `C:`                     `C:.`                    `.`
 53 *   `C:usr`                  `C:.`                    `usr`
 54 *   `C:usr\`                 `C:.`                    `usr`
 55 *   `C:\`                    `C:\`                    `\`
 56 *   `C:\\`                   `C:\`                    `\`
 57 *   `C:\\\`                  `C:\`                    `\`
 58 *   `C:\usr`                 `C:\`                    `usr`
 59 *   `C:\usr\lib`             `C:\usr`                 `lib`
 60 *   `C:\\usr\\lib\\`         `C:\\usr`                `lib`
 61 *   `C:\home\\dwc\\test`     `C:\home\\dwc`           `test`
 62 */
 63
 64struct path_info
 65  {
 66    /* This points to end of the UNC prefix and drive letter, if any.  */
 67    char* prefix_end;
 68
 69    /* These point to the directory separator in front of the last non-empty
 70     * component.  */
 71    char* base_sep_begin;
 72    char* base_sep_end;
 73
 74    /* This points to the last directory separator sequence if no other
 75     * non-separator characters follow it.  */
 76    char* term_sep_begin;
 77
 78    /* This points to the end of the string.  */
 79    char* path_end;
 80  };
 81
 82#define IS_DIR_SEP(c)  ((c) == '/' || (c) == '\\')
 83
 84static
 85void
 86do_get_path_info(struct path_info* info, char* path)
 87  {
 88    char* pos = path;
 89    int unc_ncoms = 0;
 90    DWORD cp;
 91    int dbcs_tb, prev_dir_sep, dir_sep;
 92
 93    /* Get the code page for paths in the same way as `fopen()`.  */
 94    cp = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
 95
 96    /* Set the structure to 'no data'.  */
 97    info->prefix_end = NULL;
 98    info->base_sep_begin = NULL;
 99    info->base_sep_end = NULL;
100    info->term_sep_begin = NULL;
101
102    if(IS_DIR_SEP(pos[0]) && IS_DIR_SEP(pos[1])) {
103      /* The path is UNC.  */
104      pos += 2;
105
106      /* Seek to the end of the share/device name.  */
107      dbcs_tb = 0;
108      prev_dir_sep = 0;
109
110      while(*pos != 0) {
111        dir_sep = 0;
112
113        if(dbcs_tb)
114          dbcs_tb = 0;
115        else if(IsDBCSLeadByteEx(cp, *pos))
116          dbcs_tb = 1;
117        else
118          dir_sep = IS_DIR_SEP(*pos);
119
120        /* If a separator has been encountered and the previous character
121         * was not, mark this as the end of the current component.  */
122        if(dir_sep && !prev_dir_sep) {
123          unc_ncoms ++;
124
125          /* The first component is the host name, and the second is the
126           * share name. So  we stop at the end of the second component.  */
127          if(unc_ncoms == 2)
128            break;
129        }
130
131        prev_dir_sep = dir_sep;
132        pos ++;
133      }
134
135      /* The UNC prefix terminates here. The terminating directory separator
136       * is not part of the prefix, and initiates a new absolute path.  */
137      info->prefix_end = pos;
138    }
139    else if((pos[0] >= 'A' && pos[0] <= 'Z' && pos[1] == ':')
140            || (pos[0] >= 'a' && pos[0] <= 'z' && pos[1] == ':')) {
141      /* The path contains a DOS drive letter in the beginning.  */
142      pos += 2;
143
144      /* The DOS drive prefix terminates here. Unlike UNC paths, the remaing
145       * part can be relative. For example, `C:foo` denotes `foo` in the
146       * working directory of drive `C:`.  */
147      info->prefix_end = pos;
148    }
149
150    /* The remaining part of the path is almost the same as POSIX.  */
151    dbcs_tb = 0;
152    prev_dir_sep = 0;
153
154    while(*pos != 0) {
155      dir_sep = 0;
156
157      if(dbcs_tb)
158        dbcs_tb = 0;
159      else if(IsDBCSLeadByteEx(cp, *pos))
160        dbcs_tb = 1;
161      else
162        dir_sep = IS_DIR_SEP(*pos);
163
164      /* If a separator has been encountered and the previous character
165       * was not, mark this as the beginning of the terminating separator
166       * sequence.  */
167      if(dir_sep && !prev_dir_sep)
168        info->term_sep_begin = pos;
169
170      /* If a non-separator character has been encountered and a previous
171       * terminating separator sequence exists, start a new component.  */
172      if(!dir_sep && prev_dir_sep) {
173        info->base_sep_begin = info->term_sep_begin;
174        info->base_sep_end = pos;
175        info->term_sep_begin = NULL;
176      }
177
178      prev_dir_sep = dir_sep;
179      pos ++;
180    }
181
182    /* Store the end of the path for convenience.  */
183    info->path_end = pos;
184  }
185
186char*
187dirname(char* path)
188  {
189    struct path_info info;
190    char* upath;
191    const char* top;
192    static char* static_path_copy;
193
194    if(path == NULL || path[0] == 0)
195      return (char*) ".";
196
197    do_get_path_info(&info, path);
198    upath = info.prefix_end ? info.prefix_end : path;
199    top = (IS_DIR_SEP(path[0]) || IS_DIR_SEP(upath[0])) ? "\\" : ".";
200
201    /* If a non-terminating directory separator exists, it terminates the
202     * dirname. Truncate the path there.  */
203    if(info.base_sep_begin) {
204      info.base_sep_begin[0] = 0;
205
206      /* If the unprefixed path has not been truncated to empty, it is now
207       * the dirname, so return it.  */
208      if(upath[0])
209        return path;
210    }
211
212    /* The dirname is empty. In principle we return `<prefix>.` if the
213     * path is relative and `<prefix>\` if it is absolute. This can be
214     * optimized if there is no prefix.  */
215    if(upath == path)
216      return (char*) top;
217
218    /* When there is a prefix, we must append a character to the prefix.
219     * If there is enough room in the original path, we just reuse its
220     * storage.  */
221    if(upath != info.path_end) {
222      upath[0] = *top;
223      upath[1] = 0;
224      return path;
225    }
226
227    /* This is only the last resort. If there is no room, we have to copy
228     * the prefix elsewhere.  */
229    upath = realloc(static_path_copy, info.prefix_end - path + 2);
230    if(!upath)
231      return (char*) top;
232
233    static_path_copy = upath;
234    memcpy(upath, path, info.prefix_end - path);
235    upath += info.prefix_end - path;
236    upath[0] = *top;
237    upath[1] = 0;
238    return static_path_copy;
239  }
240
241char*
242basename(char* path)
243  {
244    struct path_info info;
245    char* upath;
246
247    if(path == NULL || path[0] == 0)
248      return (char*) ".";
249
250    do_get_path_info(&info, path);
251    upath = info.prefix_end ? info.prefix_end : path;
252
253    /* If the path is non-UNC and empty, then it's relative. POSIX says '.'
254     * shall be returned.  */
255    if(IS_DIR_SEP(path[0]) == 0 && upath[0] == 0)
256      return (char*) ".";
257
258    /* If a terminating separator sequence exists, it is not part of the
259     * name and shall be truncated.  */
260    if(info.term_sep_begin)
261      info.term_sep_begin[0] = 0;
262
263    /* If some other separator sequence has been found, the basename
264     * immediately follows it.  */
265    if(info.base_sep_end)
266      return info.base_sep_end;
267
268    /* If removal of the terminating separator sequence has caused the
269     * unprefixed path to become empty, it must have comprised only
270     * separators. POSIX says `/` shall be returned, but on Windows, we
271     * return `\` instead.  */
272    if(upath[0] == 0)
273      return (char*) "\\";
274
275    /* Return the unprefixed path.  */
276    return upath;
277  }