master
  1//===----------------------------------------------------------------------===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8
  9#ifndef PATH_PARSER_H
 10#define PATH_PARSER_H
 11
 12#include <__config>
 13#include <__utility/unreachable.h>
 14#include <cstddef>
 15#include <filesystem>
 16#include <utility>
 17
 18#include "format_string.h"
 19
 20_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 21
 22inline bool isSeparator(path::value_type C) {
 23  if (C == '/')
 24    return true;
 25#if defined(_LIBCPP_WIN32API)
 26  if (C == '\\')
 27    return true;
 28#endif
 29  return false;
 30}
 31
 32inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); }
 33
 34namespace parser {
 35
 36using string_view_t    = path::__string_view;
 37using string_view_pair = pair<string_view_t, string_view_t>;
 38using PosPtr           = path::value_type const*;
 39
 40struct PathParser {
 41  enum ParserState : unsigned char {
 42    // Zero is a special sentinel value used by default constructed iterators.
 43    PS_BeforeBegin   = path::iterator::_BeforeBegin,
 44    PS_InRootName    = path::iterator::_InRootName,
 45    PS_InRootDir     = path::iterator::_InRootDir,
 46    PS_InFilenames   = path::iterator::_InFilenames,
 47    PS_InTrailingSep = path::iterator::_InTrailingSep,
 48    PS_AtEnd         = path::iterator::_AtEnd
 49  };
 50
 51  const string_view_t Path;
 52  string_view_t RawEntry;
 53  ParserState State_;
 54
 55private:
 56  PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}
 57
 58public:
 59  PathParser(string_view_t P, string_view_t E, unsigned char S)
 60      : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {
 61    // S cannot be '0' or PS_BeforeBegin.
 62  }
 63
 64  static PathParser CreateBegin(string_view_t P) noexcept {
 65    PathParser PP(P, PS_BeforeBegin);
 66    PP.increment();
 67    return PP;
 68  }
 69
 70  static PathParser CreateEnd(string_view_t P) noexcept {
 71    PathParser PP(P, PS_AtEnd);
 72    return PP;
 73  }
 74
 75  PosPtr peek() const noexcept {
 76    auto TkEnd = getNextTokenStartPos();
 77    auto End   = getAfterBack();
 78    return TkEnd == End ? nullptr : TkEnd;
 79  }
 80
 81  void increment() noexcept {
 82    const PosPtr End   = getAfterBack();
 83    const PosPtr Start = getNextTokenStartPos();
 84    if (Start == End)
 85      return makeState(PS_AtEnd);
 86
 87    switch (State_) {
 88    case PS_BeforeBegin: {
 89      PosPtr TkEnd = consumeRootName(Start, End);
 90      if (TkEnd)
 91        return makeState(PS_InRootName, Start, TkEnd);
 92    }
 93      [[__fallthrough__]];
 94    case PS_InRootName: {
 95      PosPtr TkEnd = consumeAllSeparators(Start, End);
 96      if (TkEnd)
 97        return makeState(PS_InRootDir, Start, TkEnd);
 98      else
 99        return makeState(PS_InFilenames, Start, consumeName(Start, End));
100    }
101    case PS_InRootDir:
102      return makeState(PS_InFilenames, Start, consumeName(Start, End));
103
104    case PS_InFilenames: {
105      PosPtr SepEnd = consumeAllSeparators(Start, End);
106      if (SepEnd != End) {
107        PosPtr TkEnd = consumeName(SepEnd, End);
108        if (TkEnd)
109          return makeState(PS_InFilenames, SepEnd, TkEnd);
110      }
111      return makeState(PS_InTrailingSep, Start, SepEnd);
112    }
113
114    case PS_InTrailingSep:
115      return makeState(PS_AtEnd);
116
117    case PS_AtEnd:
118      __libcpp_unreachable();
119    }
120  }
121
122  void decrement() noexcept {
123    const PosPtr REnd   = getBeforeFront();
124    const PosPtr RStart = getCurrentTokenStartPos() - 1;
125    if (RStart == REnd) // we're decrementing the begin
126      return makeState(PS_BeforeBegin);
127
128    switch (State_) {
129    case PS_AtEnd: {
130      // Try to consume a trailing separator or root directory first.
131      if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
132        if (SepEnd == REnd)
133          return makeState(PS_InRootDir, Path.data(), RStart + 1);
134        PosPtr TkStart = consumeRootName(SepEnd, REnd);
135        if (TkStart == REnd)
136          return makeState(PS_InRootDir, RStart, RStart + 1);
137        return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
138      } else {
139        PosPtr TkStart = consumeRootName(RStart, REnd);
140        if (TkStart == REnd)
141          return makeState(PS_InRootName, TkStart + 1, RStart + 1);
142        TkStart = consumeName(RStart, REnd);
143        return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
144      }
145    }
146    case PS_InTrailingSep:
147      return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1, RStart + 1);
148    case PS_InFilenames: {
149      PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
150      if (SepEnd == REnd)
151        return makeState(PS_InRootDir, Path.data(), RStart + 1);
152      PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
153      if (TkStart == REnd) {
154        if (SepEnd)
155          return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
156        return makeState(PS_InRootName, TkStart + 1, RStart + 1);
157      }
158      TkStart = consumeName(SepEnd, REnd);
159      return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
160    }
161    case PS_InRootDir:
162      return makeState(PS_InRootName, Path.data(), RStart + 1);
163    case PS_InRootName:
164    case PS_BeforeBegin:
165      __libcpp_unreachable();
166    }
167  }
168
169  /// \brief Return a view with the "preferred representation" of the current
170  ///   element. For example trailing separators are represented as a '.'
171  string_view_t operator*() const noexcept {
172    switch (State_) {
173    case PS_BeforeBegin:
174    case PS_AtEnd:
175      return PATHSTR("");
176    case PS_InRootDir:
177      if (RawEntry[0] == '\\')
178        return PATHSTR("\\");
179      else
180        return PATHSTR("/");
181    case PS_InTrailingSep:
182      return PATHSTR("");
183    case PS_InRootName:
184    case PS_InFilenames:
185      return RawEntry;
186    }
187    __libcpp_unreachable();
188  }
189
190  explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }
191
192  PathParser& operator++() noexcept {
193    increment();
194    return *this;
195  }
196
197  PathParser& operator--() noexcept {
198    decrement();
199    return *this;
200  }
201
202  bool atEnd() const noexcept { return State_ == PS_AtEnd; }
203
204  bool inRootDir() const noexcept { return State_ == PS_InRootDir; }
205
206  bool inRootName() const noexcept { return State_ == PS_InRootName; }
207
208  bool inRootPath() const noexcept { return inRootName() || inRootDir(); }
209
210private:
211  void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
212    State_    = NewState;
213    RawEntry = string_view_t(Start, End - Start);
214  }
215  void makeState(ParserState NewState) noexcept {
216    State_    = NewState;
217    RawEntry = {};
218  }
219
220  PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
221
222  PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
223
224  /// \brief Return a pointer to the first character after the currently
225  ///   lexed element.
226  PosPtr getNextTokenStartPos() const noexcept {
227    switch (State_) {
228    case PS_BeforeBegin:
229      return Path.data();
230    case PS_InRootName:
231    case PS_InRootDir:
232    case PS_InFilenames:
233      return &RawEntry.back() + 1;
234    case PS_InTrailingSep:
235    case PS_AtEnd:
236      return getAfterBack();
237    }
238    __libcpp_unreachable();
239  }
240
241  /// \brief Return a pointer to the first character in the currently lexed
242  ///   element.
243  PosPtr getCurrentTokenStartPos() const noexcept {
244    switch (State_) {
245    case PS_BeforeBegin:
246    case PS_InRootName:
247      return &Path.front();
248    case PS_InRootDir:
249    case PS_InFilenames:
250    case PS_InTrailingSep:
251      return &RawEntry.front();
252    case PS_AtEnd:
253      return &Path.back() + 1;
254    }
255    __libcpp_unreachable();
256  }
257
258  // Consume all consecutive separators.
259  PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
260    if (P == nullptr || P == End || !isSeparator(*P))
261      return nullptr;
262    const int Inc = P < End ? 1 : -1;
263    P += Inc;
264    while (P != End && isSeparator(*P))
265      P += Inc;
266    return P;
267  }
268
269  // Consume exactly N separators, or return nullptr.
270  PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
271    PosPtr Ret = consumeAllSeparators(P, End);
272    if (Ret == nullptr)
273      return nullptr;
274    if (P < End) {
275      if (Ret == P + N)
276        return Ret;
277    } else {
278      if (Ret == P - N)
279        return Ret;
280    }
281    return nullptr;
282  }
283
284  PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
285    PosPtr Start = P;
286    if (P == nullptr || P == End || isSeparator(*P))
287      return nullptr;
288    const int Inc = P < End ? 1 : -1;
289    P += Inc;
290    while (P != End && !isSeparator(*P))
291      P += Inc;
292    if (P == End && Inc < 0) {
293      // Iterating backwards and consumed all the rest of the input.
294      // Check if the start of the string would have been considered
295      // a root name.
296      PosPtr RootEnd = consumeRootName(End + 1, Start);
297      if (RootEnd)
298        return RootEnd - 1;
299    }
300    return P;
301  }
302
303  PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
304    if (P == End)
305      return nullptr;
306    if (P < End) {
307      if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
308        return nullptr;
309      return P + 2;
310    } else {
311      if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
312        return nullptr;
313      return P - 2;
314    }
315  }
316
317  PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
318    if (P == End)
319      return nullptr;
320    if (P < End)
321      return consumeName(consumeNSeparators(P, End, 2), End);
322    else
323      return consumeNSeparators(consumeName(P, End), End, 2);
324  }
325
326  PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
327#if defined(_LIBCPP_WIN32API)
328    if (PosPtr Ret = consumeDriveLetter(P, End))
329      return Ret;
330    if (PosPtr Ret = consumeNetworkRoot(P, End))
331      return Ret;
332#endif
333    return nullptr;
334  }
335};
336
337inline string_view_pair separate_filename(string_view_t const& s) {
338  if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
339    return string_view_pair{s, PATHSTR("")};
340  auto pos = s.find_last_of('.');
341  if (pos == string_view_t::npos || pos == 0)
342    return string_view_pair{s, string_view_t{}};
343  return string_view_pair{s.substr(0, pos), s.substr(pos)};
344}
345
346inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; }
347
348} // namespace parser
349
350_LIBCPP_END_NAMESPACE_FILESYSTEM
351
352#endif // PATH_PARSER_H