Refactor: path normalize now collapses multiple '..' directories at once
- Avoid a separate memmove call for each `..`. - Avoid ambiguous path stepping, where separator literals needed to be checked to avoid fence post errors. - Correct & update the doc-string.
This commit is contained in:
@@ -321,12 +321,25 @@ void BLI_path_sequence_encode(
|
||||
char *string, const char *head, const char *tail, unsigned short numlen, int pic);
|
||||
|
||||
/**
|
||||
* Remove redundant characters from \a path and optionally make absolute.
|
||||
* Remove redundant characters from \a path.
|
||||
*
|
||||
* \param path: Can be any input, and this function converts it to a regular full path.
|
||||
* Also removes garbage from directory paths, like `/../` or double slashes etc.
|
||||
* The following operations are performed:
|
||||
* - Redundant path components such as `//`, `/./` & `./` (prefix) are stripped.
|
||||
* (with the exception of `//` prefix used for blend-file relative paths).
|
||||
* - `..` are resolved so `<parent>/../<child>/` resolves to `<child>/`.
|
||||
* Note that the resulting path may begin with `..` if it's relative.
|
||||
*
|
||||
* \note \a path isn't protected for max string names.
|
||||
* Details:
|
||||
* - The slash direction is expected to be native (see #SEP).
|
||||
* When calculating a canonical paths you may need to run #BLI_path_slash_native first.
|
||||
* #BLI_path_cmp_normalized can be used for canonical path comparison.
|
||||
* - Trailing slashes are left intact (unlike Python which strips them).
|
||||
* - Handling paths beginning with `..` depends on them being absolute or relative.
|
||||
* For absolute paths they are removed (e.g. `/../path` becomes `/path`).
|
||||
* For relative paths they are kept as it's valid to reference paths above a relative location
|
||||
* such as `//../parent` or `../parent`.
|
||||
*
|
||||
* \param path: The path to a file or directory which can be absolute or relative.
|
||||
*/
|
||||
void BLI_path_normalize(char *path) ATTR_NONNULL(1);
|
||||
/**
|
||||
|
||||
@@ -115,13 +115,12 @@ void BLI_path_sequence_encode(
|
||||
void BLI_path_normalize(char *path)
|
||||
{
|
||||
const char *path_orig = path;
|
||||
int path_len;
|
||||
|
||||
ptrdiff_t a;
|
||||
char *start, *eind;
|
||||
|
||||
path_len = strlen(path);
|
||||
int path_len = strlen(path);
|
||||
|
||||
/*
|
||||
* Skip absolute prefix.
|
||||
* ---------------------
|
||||
*/
|
||||
if (path[0] == '/' && path[1] == '/') {
|
||||
path = path + 2; /* Leave the initial `//` untouched. */
|
||||
path_len -= 2;
|
||||
@@ -157,10 +156,14 @@ void BLI_path_normalize(char *path)
|
||||
}
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
/* Works on WIN32 as well, because the drive component is skipped. */
|
||||
const bool is_relative = path[0] && (path[0] != SEP);
|
||||
|
||||
/*
|
||||
* Strip redundant path components.
|
||||
* --------------------------------
|
||||
*/
|
||||
|
||||
/* NOTE(@ideasman42):
|
||||
* `memmove(start, eind, strlen(eind) + 1);`
|
||||
* is the same as
|
||||
@@ -189,7 +192,6 @@ void BLI_path_normalize(char *path)
|
||||
else {
|
||||
break;
|
||||
}
|
||||
|
||||
} while (i > 0);
|
||||
|
||||
if (i < i_end) {
|
||||
@@ -200,8 +202,7 @@ void BLI_path_normalize(char *path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove redundant `./` prefix, while it could be kept, it confuses the loop below. */
|
||||
/* Remove redundant `./` prefix as it's redundant & complicates collapsing directories. */
|
||||
if (is_relative) {
|
||||
if ((path_len > 2) && (path[0] == '.') && (path[1] == SEP)) {
|
||||
memmove(path, path + 2, (path_len - 2) + 1);
|
||||
@@ -209,69 +210,127 @@ void BLI_path_normalize(char *path)
|
||||
}
|
||||
}
|
||||
|
||||
const ptrdiff_t a_start = is_relative ? 0 : 1;
|
||||
start = path;
|
||||
while ((start = strstr(start, SEP_STR ".."))) {
|
||||
if (!ELEM(start[3], SEP, '\0')) {
|
||||
start += 3;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Collapse Parent Directories.
|
||||
* ----------------------------
|
||||
*
|
||||
* Example: `<parent>/<child>/../` -> `<parent>/`
|
||||
*
|
||||
* Notes:
|
||||
* - Leading `../` are skipped as they cannot be collapsed (see `start_base`).
|
||||
* - Multiple parent directories are handled at once to reduce number of `memmove` calls.
|
||||
*/
|
||||
|
||||
a = (start - path) - 1;
|
||||
if (a >= a_start) {
|
||||
/* `<prefix>/<parent>/../<postfix> => <prefix>/<postfix>`. */
|
||||
eind = start + (4 - 1) /* `strlen("/../") - 1` */; /* Strip "/.." and keep the char after. */
|
||||
while (a > 0 && path[a] != SEP) { /* Find start of `<parent>`. */
|
||||
a--;
|
||||
}
|
||||
#define IS_PARENT_DIR(p) ((p)[0] == '.' && (p)[1] == '.' && ELEM((p)[2], SEP, '\0'))
|
||||
|
||||
if (is_relative && (a == 0) && *eind) {
|
||||
/* When the path does not start with a slash, don't copy the first `/` to the destination
|
||||
* as it will make a relative path into an absolute path. */
|
||||
eind += 1;
|
||||
}
|
||||
const size_t eind_len = path_len - (eind - path);
|
||||
BLI_assert(eind_len == strlen(eind));
|
||||
|
||||
/* Only remove the parent if it's not also a `..`. */
|
||||
if (is_relative && STRPREFIX(path + ((path[a] == SEP) ? a + 1 : a), ".." SEP_STR)) {
|
||||
start += 3 /* `strlen("/..")` */;
|
||||
}
|
||||
else {
|
||||
start = path + a;
|
||||
BLI_assert(start < eind);
|
||||
memmove(start, eind, eind_len + 1);
|
||||
path_len -= (eind - start);
|
||||
BLI_assert(strlen(path) == path_len);
|
||||
BLI_assert(!is_relative || (path[0] != SEP));
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Support for odd paths: eg `/../home/me` --> `/home/me`
|
||||
* this is a valid path in blender but we can't handle this the usual way below
|
||||
* simply strip this prefix then evaluate the path as usual.
|
||||
* Python's `os.path.normpath()` does this. */
|
||||
|
||||
/* NOTE: previous version of following call used an offset of 3 instead of 4,
|
||||
* which meant that the `/../home/me` example actually became `home/me`.
|
||||
* Using offset of 3 gives behavior consistent with the aforementioned
|
||||
* Python routine. */
|
||||
eind = start + 3;
|
||||
const size_t eind_len = path_len - (eind - path);
|
||||
memmove(start, eind, eind_len + 1);
|
||||
path_len -= 3;
|
||||
BLI_assert(strlen(path) == path_len);
|
||||
BLI_assert(!is_relative || (path[0] != SEP));
|
||||
}
|
||||
/* First non prefix path component. */
|
||||
char *path_first_non_slash_part = path;
|
||||
while (*path_first_non_slash_part && *path_first_non_slash_part == SEP) {
|
||||
path_first_non_slash_part++;
|
||||
}
|
||||
|
||||
if (is_relative && path_len == 0 && (path == path_orig)) {
|
||||
path[0] = '.';
|
||||
path[1] = '\0';
|
||||
path_len += 1;
|
||||
/* Maintain a pointer to the end of leading `..` component.
|
||||
* Skip leading parent directories because logically they cannot be collapsed. */
|
||||
char *start_base = path_first_non_slash_part;
|
||||
while (IS_PARENT_DIR(start_base)) {
|
||||
start_base += 3;
|
||||
}
|
||||
|
||||
/* It's possible the entire path is made of up `../`,
|
||||
* in this case there is nothing to do. */
|
||||
if (start_base < path + path_len) {
|
||||
/* Step over directories, always starting out on the character after the slash. */
|
||||
char *start = start_base;
|
||||
char *start_temp;
|
||||
while (((start_temp = strstr(start, SEP_STR ".." SEP_STR)) ||
|
||||
/* Check if the string ends with `/..` & assign when found, else NULL. */
|
||||
(start_temp = ((start <= &path[path_len - 3]) &&
|
||||
STREQ(&path[path_len - 3], SEP_STR "..")) ?
|
||||
&path[path_len - 3] :
|
||||
NULL))) {
|
||||
start = start_temp + 1; /* Skip the `/`. */
|
||||
BLI_assert(start_base != start);
|
||||
|
||||
/* Step `end_all` forwards (over all `..`). */
|
||||
char *end_all = start;
|
||||
do {
|
||||
BLI_assert(IS_PARENT_DIR(end_all));
|
||||
end_all += 3;
|
||||
BLI_assert(end_all <= path + path_len + 1);
|
||||
} while (IS_PARENT_DIR(end_all));
|
||||
|
||||
/* Step `start` backwards (until `end` meets `end_all` or `start` meets `start_base`). */
|
||||
char *end = start;
|
||||
do {
|
||||
BLI_assert(start_base < start);
|
||||
BLI_assert(*(start - 1) == SEP);
|
||||
/* Step `start` backwards one. */
|
||||
do {
|
||||
start--;
|
||||
} while (start_base < start && *(start - 1) != SEP);
|
||||
BLI_assert(*start != SEP); /* Ensure the loop ran at least once. */
|
||||
BLI_assert(!IS_PARENT_DIR(start)); /* Clamping by `start_base` prevents this. */
|
||||
end += 3;
|
||||
} while ((start != start_base) && (end < end_all));
|
||||
|
||||
if (end > path + path_len) {
|
||||
BLI_assert(*(end - 1) == '\0');
|
||||
end--;
|
||||
end_all--;
|
||||
}
|
||||
BLI_assert(start < end && start >= start_base);
|
||||
const size_t start_len = path_len - (end - path);
|
||||
memmove(start, end, start_len + 1);
|
||||
path_len -= end - start;
|
||||
BLI_assert(strlen(path) == path_len);
|
||||
/* Other `..` directories may have been moved to the front, step `start_base` past them. */
|
||||
if (UNLIKELY(start == start_base && (end != end_all))) {
|
||||
start_base += (end_all - end);
|
||||
start = (start_base < path + path_len) ? start_base : start_base - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BLI_assert(strlen(path) == path_len);
|
||||
/* Characters before the `start_base` must *only* be `../../../` (multiples of 3). */
|
||||
BLI_assert((start_base - path_first_non_slash_part) % 3 == 0);
|
||||
/* All `..` ahead of `start_base` were collapsed (including trailing `/..`). */
|
||||
BLI_assert(!(start_base < path + path_len) ||
|
||||
(!strstr(start_base, SEP_STR ".." SEP_STR) &&
|
||||
!(path_len >= 3 && STREQ(&path[path_len - 3], SEP_STR ".."))));
|
||||
|
||||
/*
|
||||
* Final Prefix Cleanup.
|
||||
* ---------------------
|
||||
*/
|
||||
if (is_relative) {
|
||||
if (path_len == 0 && (path == path_orig)) {
|
||||
path[0] = '.';
|
||||
path[1] = '\0';
|
||||
path_len = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Support for odd paths: eg `/../home/me` --> `/home/me`
|
||||
* this is a valid path in blender but we can't handle this the usual way below
|
||||
* simply strip this prefix then evaluate the path as usual.
|
||||
* Python's `os.path.normpath()` does this. */
|
||||
if (start_base != path_first_non_slash_part) {
|
||||
char *start = start_base > path + path_len ? start_base - 1 : start_base;
|
||||
/* As long as `start` is set correctly, it should never begin with `../`
|
||||
* as these directories are expected to be skipped. */
|
||||
BLI_assert(!IS_PARENT_DIR(start));
|
||||
const size_t start_len = path_len - (start - path);
|
||||
memmove(path_first_non_slash_part, start, start_len + 1);
|
||||
BLI_assert(strlen(start) == start_len);
|
||||
path_len -= start - path_first_non_slash_part;
|
||||
BLI_assert(strlen(path) == path_len);
|
||||
}
|
||||
}
|
||||
|
||||
BLI_assert(strlen(path) == path_len);
|
||||
|
||||
#undef IS_PARENT_DIR
|
||||
}
|
||||
|
||||
void BLI_path_normalize_dir(char *dir, size_t dir_maxlen)
|
||||
|
||||
@@ -84,11 +84,16 @@ TEST(path_util, Normalize_Dot)
|
||||
NORMALIZE("/a/./././b/", "/a/b/");
|
||||
}
|
||||
/* #BLI_path_normalize: complex "/./" -> "/", "//" -> "/", "./path/../" -> "./". */
|
||||
TEST(path_util, Normalize_Complex)
|
||||
TEST(path_util, Normalize_ComplexAbsolute)
|
||||
{
|
||||
NORMALIZE("/a/./b/./c/./.././.././", "/a/");
|
||||
NORMALIZE("/a//.//b//.//c//.//..//.//..//.//", "/a/");
|
||||
}
|
||||
TEST(path_util, Normalize_ComplexRelative)
|
||||
{
|
||||
NORMALIZE("a/b/c/d/e/f/g/../a/../b/../../c/../../../d/../../../..", ".");
|
||||
NORMALIZE("a/b/c/d/e/f/g/../a/../../../../b/../../../c/../../d/..", ".");
|
||||
}
|
||||
/* #BLI_path_normalize: "//" -> "/" */
|
||||
TEST(path_util, Normalize_DoubleSlash)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user