diff --git a/source/blender/blenlib/BLI_path_util.h b/source/blender/blenlib/BLI_path_util.h index 1b7880eaf0e..a557f74f46c 100644 --- a/source/blender/blenlib/BLI_path_util.h +++ b/source/blender/blenlib/BLI_path_util.h @@ -321,12 +321,25 @@ void BLI_path_sequence_encode( char *string, const char *head, const char *tail, unsigned short numlen, int pic); /** - * Remove redundant characters from \a path and optionally make absolute. + * Remove redundant characters from \a path. * - * \param path: Can be any input, and this function converts it to a regular full path. - * Also removes garbage from directory paths, like `/../` or double slashes etc. + * The following operations are performed: + * - Redundant path components such as `//`, `/./` & `./` (prefix) are stripped. + * (with the exception of `//` prefix used for blend-file relative paths). + * - `..` are resolved so `/..//` resolves to `/`. + * Note that the resulting path may begin with `..` if it's relative. * - * \note \a path isn't protected for max string names. + * Details: + * - The slash direction is expected to be native (see #SEP). + * When calculating a canonical paths you may need to run #BLI_path_slash_native first. + * #BLI_path_cmp_normalized can be used for canonical path comparison. + * - Trailing slashes are left intact (unlike Python which strips them). + * - Handling paths beginning with `..` depends on them being absolute or relative. + * For absolute paths they are removed (e.g. `/../path` becomes `/path`). + * For relative paths they are kept as it's valid to reference paths above a relative location + * such as `//../parent` or `../parent`. + * + * \param path: The path to a file or directory which can be absolute or relative. */ void BLI_path_normalize(char *path) ATTR_NONNULL(1); /** diff --git a/source/blender/blenlib/intern/path_util.c b/source/blender/blenlib/intern/path_util.c index b38c5d7fff0..ddf24ebd257 100644 --- a/source/blender/blenlib/intern/path_util.c +++ b/source/blender/blenlib/intern/path_util.c @@ -115,13 +115,12 @@ void BLI_path_sequence_encode( void BLI_path_normalize(char *path) { const char *path_orig = path; - int path_len; - - ptrdiff_t a; - char *start, *eind; - - path_len = strlen(path); + int path_len = strlen(path); + /* + * Skip absolute prefix. + * --------------------- + */ if (path[0] == '/' && path[1] == '/') { path = path + 2; /* Leave the initial `//` untouched. */ path_len -= 2; @@ -157,10 +156,14 @@ void BLI_path_normalize(char *path) } } #endif /* WIN32 */ - /* Works on WIN32 as well, because the drive component is skipped. */ const bool is_relative = path[0] && (path[0] != SEP); + /* + * Strip redundant path components. + * -------------------------------- + */ + /* NOTE(@ideasman42): * `memmove(start, eind, strlen(eind) + 1);` * is the same as @@ -189,7 +192,6 @@ void BLI_path_normalize(char *path) else { break; } - } while (i > 0); if (i < i_end) { @@ -200,8 +202,7 @@ void BLI_path_normalize(char *path) } } } - - /* Remove redundant `./` prefix, while it could be kept, it confuses the loop below. */ + /* Remove redundant `./` prefix as it's redundant & complicates collapsing directories. */ if (is_relative) { if ((path_len > 2) && (path[0] == '.') && (path[1] == SEP)) { memmove(path, path + 2, (path_len - 2) + 1); @@ -209,69 +210,127 @@ void BLI_path_normalize(char *path) } } - const ptrdiff_t a_start = is_relative ? 0 : 1; - start = path; - while ((start = strstr(start, SEP_STR ".."))) { - if (!ELEM(start[3], SEP, '\0')) { - start += 3; - continue; - } + /* + * Collapse Parent Directories. + * ---------------------------- + * + * Example: `//../` -> `/` + * + * Notes: + * - Leading `../` are skipped as they cannot be collapsed (see `start_base`). + * - Multiple parent directories are handled at once to reduce number of `memmove` calls. + */ - a = (start - path) - 1; - if (a >= a_start) { - /* `//../ => /`. */ - eind = start + (4 - 1) /* `strlen("/../") - 1` */; /* Strip "/.." and keep the char after. */ - while (a > 0 && path[a] != SEP) { /* Find start of ``. */ - a--; - } +#define IS_PARENT_DIR(p) ((p)[0] == '.' && (p)[1] == '.' && ELEM((p)[2], SEP, '\0')) - if (is_relative && (a == 0) && *eind) { - /* When the path does not start with a slash, don't copy the first `/` to the destination - * as it will make a relative path into an absolute path. */ - eind += 1; - } - const size_t eind_len = path_len - (eind - path); - BLI_assert(eind_len == strlen(eind)); - - /* Only remove the parent if it's not also a `..`. */ - if (is_relative && STRPREFIX(path + ((path[a] == SEP) ? a + 1 : a), ".." SEP_STR)) { - start += 3 /* `strlen("/..")` */; - } - else { - start = path + a; - BLI_assert(start < eind); - memmove(start, eind, eind_len + 1); - path_len -= (eind - start); - BLI_assert(strlen(path) == path_len); - BLI_assert(!is_relative || (path[0] != SEP)); - } - } - else { - /* Support for odd paths: eg `/../home/me` --> `/home/me` - * this is a valid path in blender but we can't handle this the usual way below - * simply strip this prefix then evaluate the path as usual. - * Python's `os.path.normpath()` does this. */ - - /* NOTE: previous version of following call used an offset of 3 instead of 4, - * which meant that the `/../home/me` example actually became `home/me`. - * Using offset of 3 gives behavior consistent with the aforementioned - * Python routine. */ - eind = start + 3; - const size_t eind_len = path_len - (eind - path); - memmove(start, eind, eind_len + 1); - path_len -= 3; - BLI_assert(strlen(path) == path_len); - BLI_assert(!is_relative || (path[0] != SEP)); - } + /* First non prefix path component. */ + char *path_first_non_slash_part = path; + while (*path_first_non_slash_part && *path_first_non_slash_part == SEP) { + path_first_non_slash_part++; } - if (is_relative && path_len == 0 && (path == path_orig)) { - path[0] = '.'; - path[1] = '\0'; - path_len += 1; + /* Maintain a pointer to the end of leading `..` component. + * Skip leading parent directories because logically they cannot be collapsed. */ + char *start_base = path_first_non_slash_part; + while (IS_PARENT_DIR(start_base)) { + start_base += 3; + } + + /* It's possible the entire path is made of up `../`, + * in this case there is nothing to do. */ + if (start_base < path + path_len) { + /* Step over directories, always starting out on the character after the slash. */ + char *start = start_base; + char *start_temp; + while (((start_temp = strstr(start, SEP_STR ".." SEP_STR)) || + /* Check if the string ends with `/..` & assign when found, else NULL. */ + (start_temp = ((start <= &path[path_len - 3]) && + STREQ(&path[path_len - 3], SEP_STR "..")) ? + &path[path_len - 3] : + NULL))) { + start = start_temp + 1; /* Skip the `/`. */ + BLI_assert(start_base != start); + + /* Step `end_all` forwards (over all `..`). */ + char *end_all = start; + do { + BLI_assert(IS_PARENT_DIR(end_all)); + end_all += 3; + BLI_assert(end_all <= path + path_len + 1); + } while (IS_PARENT_DIR(end_all)); + + /* Step `start` backwards (until `end` meets `end_all` or `start` meets `start_base`). */ + char *end = start; + do { + BLI_assert(start_base < start); + BLI_assert(*(start - 1) == SEP); + /* Step `start` backwards one. */ + do { + start--; + } while (start_base < start && *(start - 1) != SEP); + BLI_assert(*start != SEP); /* Ensure the loop ran at least once. */ + BLI_assert(!IS_PARENT_DIR(start)); /* Clamping by `start_base` prevents this. */ + end += 3; + } while ((start != start_base) && (end < end_all)); + + if (end > path + path_len) { + BLI_assert(*(end - 1) == '\0'); + end--; + end_all--; + } + BLI_assert(start < end && start >= start_base); + const size_t start_len = path_len - (end - path); + memmove(start, end, start_len + 1); + path_len -= end - start; + BLI_assert(strlen(path) == path_len); + /* Other `..` directories may have been moved to the front, step `start_base` past them. */ + if (UNLIKELY(start == start_base && (end != end_all))) { + start_base += (end_all - end); + start = (start_base < path + path_len) ? start_base : start_base - 1; + } + } } BLI_assert(strlen(path) == path_len); + /* Characters before the `start_base` must *only* be `../../../` (multiples of 3). */ + BLI_assert((start_base - path_first_non_slash_part) % 3 == 0); + /* All `..` ahead of `start_base` were collapsed (including trailing `/..`). */ + BLI_assert(!(start_base < path + path_len) || + (!strstr(start_base, SEP_STR ".." SEP_STR) && + !(path_len >= 3 && STREQ(&path[path_len - 3], SEP_STR "..")))); + + /* + * Final Prefix Cleanup. + * --------------------- + */ + if (is_relative) { + if (path_len == 0 && (path == path_orig)) { + path[0] = '.'; + path[1] = '\0'; + path_len = 1; + } + } + else { + /* Support for odd paths: eg `/../home/me` --> `/home/me` + * this is a valid path in blender but we can't handle this the usual way below + * simply strip this prefix then evaluate the path as usual. + * Python's `os.path.normpath()` does this. */ + if (start_base != path_first_non_slash_part) { + char *start = start_base > path + path_len ? start_base - 1 : start_base; + /* As long as `start` is set correctly, it should never begin with `../` + * as these directories are expected to be skipped. */ + BLI_assert(!IS_PARENT_DIR(start)); + const size_t start_len = path_len - (start - path); + memmove(path_first_non_slash_part, start, start_len + 1); + BLI_assert(strlen(start) == start_len); + path_len -= start - path_first_non_slash_part; + BLI_assert(strlen(path) == path_len); + } + } + + BLI_assert(strlen(path) == path_len); + +#undef IS_PARENT_DIR } void BLI_path_normalize_dir(char *dir, size_t dir_maxlen) diff --git a/source/blender/blenlib/tests/BLI_path_util_test.cc b/source/blender/blenlib/tests/BLI_path_util_test.cc index a0263945a2a..fa5cd2572e8 100644 --- a/source/blender/blenlib/tests/BLI_path_util_test.cc +++ b/source/blender/blenlib/tests/BLI_path_util_test.cc @@ -84,11 +84,16 @@ TEST(path_util, Normalize_Dot) NORMALIZE("/a/./././b/", "/a/b/"); } /* #BLI_path_normalize: complex "/./" -> "/", "//" -> "/", "./path/../" -> "./". */ -TEST(path_util, Normalize_Complex) +TEST(path_util, Normalize_ComplexAbsolute) { NORMALIZE("/a/./b/./c/./.././.././", "/a/"); NORMALIZE("/a//.//b//.//c//.//..//.//..//.//", "/a/"); } +TEST(path_util, Normalize_ComplexRelative) +{ + NORMALIZE("a/b/c/d/e/f/g/../a/../b/../../c/../../../d/../../../..", "."); + NORMALIZE("a/b/c/d/e/f/g/../a/../../../../b/../../../c/../../d/..", "."); +} /* #BLI_path_normalize: "//" -> "/" */ TEST(path_util, Normalize_DoubleSlash) {