aboutsummaryrefslogtreecommitdiffstats
path: root/cwalk.c
diff options
context:
space:
mode:
authorSam Anthony <sam@samanthony.xyz>2025-04-25 21:03:01 -0400
committerSam Anthony <sam@samanthony.xyz>2025-04-25 21:03:01 -0400
commit134b79095ef69d4b5ba180b39f1177c011f40502 (patch)
treefc78dc8818eba1913b355aa1c6f4fa40b6de76c6 /cwalk.c
parent851def4babe5a987d2e88306173d5b6b9e102e68 (diff)
downloadvolute-134b79095ef69d4b5ba180b39f1177c011f40502.zip
cwalk path library
Diffstat (limited to 'cwalk.c')
-rwxr-xr-xcwalk.c1479
1 files changed, 1479 insertions, 0 deletions
diff --git a/cwalk.c b/cwalk.c
new file mode 100755
index 0000000..e4c9a49
--- /dev/null
+++ b/cwalk.c
@@ -0,0 +1,1479 @@
+#include <assert.h>
+#include <ctype.h>
+#include <cwalk.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+/**
+ * We try to default to a different path style depending on the operating
+ * system. So this should detect whether we should use windows or unix paths.
+ */
+#if defined(WIN32) || defined(_WIN32) || \
+ defined(__WIN32) && !defined(__CYGWIN__)
+static enum cwk_path_style path_style = CWK_STYLE_WINDOWS;
+#else
+static enum cwk_path_style path_style = CWK_STYLE_UNIX;
+#endif
+
+/**
+ * This is a list of separators used in different styles. Windows can read
+ * multiple separators, but it generally outputs just a backslash. The output
+ * will always use the first character for the output.
+ */
+static const char *separators[] = {
+ "\\/", // CWK_STYLE_WINDOWS
+ "/" // CWK_STYLE_UNIX
+};
+
+/**
+ * A joined path represents multiple path strings which are concatenated, but
+ * not (necessarily) stored in contiguous memory. The joined path allows to
+ * iterate over the segments as if it was one piece of path.
+ */
+struct cwk_segment_joined
+{
+ struct cwk_segment segment;
+ const char **paths;
+ size_t path_index;
+};
+
+static size_t cwk_path_output_sized(char *buffer, size_t buffer_size,
+ size_t position, const char *str, size_t length)
+{
+ size_t amount_written;
+
+ // First we determine the amount which we can write to the buffer. There are
+ // three cases. In the first case we have enough to store the whole string in
+ // it. In the second one we can only store a part of it, and in the third we
+ // have no space left.
+ if (buffer_size > position + length) {
+ amount_written = length;
+ } else if (buffer_size > position) {
+ amount_written = buffer_size - position;
+ } else {
+ amount_written = 0;
+ }
+
+ // If we actually want to write out something we will do that here. We will
+ // always append a '\0', this way we are guaranteed to have a valid string at
+ // all times.
+ if (amount_written > 0) {
+ memmove(&buffer[position], str, amount_written);
+ }
+
+ // Return the theoretical length which would have been written when everything
+ // would have fit in the buffer.
+ return length;
+}
+
+static size_t cwk_path_output_current(char *buffer, size_t buffer_size,
+ size_t position)
+{
+ // We output a "current" directory, which is a single character. This
+ // character is currently not style dependant.
+ return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
+}
+
+static size_t cwk_path_output_back(char *buffer, size_t buffer_size,
+ size_t position)
+{
+ // We output a "back" directory, which ahs two characters. This
+ // character is currently not style dependant.
+ return cwk_path_output_sized(buffer, buffer_size, position, "..", 2);
+}
+
+static size_t cwk_path_output_separator(char *buffer, size_t buffer_size,
+ size_t position)
+{
+ // We output a separator, which is a single character.
+ return cwk_path_output_sized(buffer, buffer_size, position,
+ separators[path_style], 1);
+}
+
+static size_t cwk_path_output_dot(char *buffer, size_t buffer_size,
+ size_t position)
+{
+ // We output a dot, which is a single character. This is used for extensions.
+ return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
+}
+
+static size_t cwk_path_output(char *buffer, size_t buffer_size, size_t position,
+ const char *str)
+{
+ size_t length;
+
+ // This just does a sized output internally, but first measuring the
+ // null-terminated string.
+ length = strlen(str);
+ return cwk_path_output_sized(buffer, buffer_size, position, str, length);
+}
+
+static void cwk_path_terminate_output(char *buffer, size_t buffer_size,
+ size_t pos)
+{
+ if (buffer_size > 0) {
+ if (pos >= buffer_size) {
+ buffer[buffer_size - 1] = '\0';
+ } else {
+ buffer[pos] = '\0';
+ }
+ }
+}
+
+static bool cwk_path_is_string_equal(const char *first, const char *second,
+ size_t first_size, size_t second_size)
+{
+ bool are_both_separators;
+
+ // The two strings are not equal if the sizes are not equal.
+ if (first_size != second_size) {
+ return false;
+ }
+
+ // If the path style is UNIX, we will compare case sensitively. This can be
+ // done easily using strncmp.
+ if (path_style == CWK_STYLE_UNIX) {
+ return strncmp(first, second, first_size) == 0;
+ }
+
+ // However, if this is windows we will have to compare case insensitively.
+ // Since there is no standard method to do that we will have to do it on our
+ // own.
+ while (*first && *second && first_size > 0) {
+ // We can consider the string to be not equal if the two lowercase
+ // characters are not equal. The two chars may also be separators, which
+ // means they would be equal.
+ are_both_separators = strchr(separators[path_style], *first) != NULL &&
+ strchr(separators[path_style], *second) != NULL;
+
+ if (tolower(*first) != tolower(*second) && !are_both_separators) {
+ return false;
+ }
+
+ first++;
+ second++;
+
+ --first_size;
+ }
+
+ // The string must be equal since they both have the same length and all the
+ // characters are the same.
+ return true;
+}
+
+static const char *cwk_path_find_next_stop(const char *c)
+{
+ // We just move forward until we find a '\0' or a separator, which will be our
+ // next "stop".
+ while (*c != '\0' && !cwk_path_is_separator(c)) {
+ ++c;
+ }
+
+ // Return the pointer of the next stop.
+ return c;
+}
+
+static const char *cwk_path_find_previous_stop(const char *begin, const char *c)
+{
+ // We just move back until we find a separator or reach the beginning of the
+ // path, which will be our previous "stop".
+ while (c > begin && !cwk_path_is_separator(c)) {
+ --c;
+ }
+
+ // Return the pointer to the previous stop. We have to return the first
+ // character after the separator, not on the separator itself.
+ if (cwk_path_is_separator(c)) {
+ return c + 1;
+ } else {
+ return c;
+ }
+}
+
+static bool cwk_path_get_first_segment_without_root(const char *path,
+ const char *segments, struct cwk_segment *segment)
+{
+ // Let's remember the path. We will move the path pointer afterwards, that's
+ // why this has to be done first.
+ segment->path = path;
+ segment->segments = segments;
+ segment->begin = segments;
+ segment->end = segments;
+ segment->size = 0;
+
+ // Now let's check whether this is an empty string. An empty string has no
+ // segment it could use.
+ if (*segments == '\0') {
+ return false;
+ }
+
+ // If the string starts with separators, we will jump over those. If there is
+ // only a slash and a '\0' after it, we can't determine the first segment
+ // since there is none.
+ while (cwk_path_is_separator(segments)) {
+ ++segments;
+ if (*segments == '\0') {
+ return false;
+ }
+ }
+
+ // So this is the beginning of our segment.
+ segment->begin = segments;
+
+ // Now let's determine the end of the segment, which we do by moving the path
+ // pointer further until we find a separator.
+ segments = cwk_path_find_next_stop(segments);
+
+ // And finally, calculate the size of the segment by subtracting the position
+ // from the end.
+ segment->size = (size_t)(segments - segment->begin);
+ segment->end = segments;
+
+ // Tell the caller that we found a segment.
+ return true;
+}
+
+static bool cwk_path_get_last_segment_without_root(const char *path,
+ struct cwk_segment *segment)
+{
+ // Now this is fairly similar to the normal algorithm, however, it will assume
+ // that there is no root in the path. So we grab the first segment at this
+ // position, assuming there is no root.
+ if (!cwk_path_get_first_segment_without_root(path, path, segment)) {
+ return false;
+ }
+
+ // Now we find our last segment. The segment struct of the caller
+ // will contain the last segment, since the function we call here will not
+ // change the segment struct when it reaches the end.
+ while (cwk_path_get_next_segment(segment)) {
+ // We just loop until there is no other segment left.
+ }
+
+ return true;
+}
+
+static bool cwk_path_get_first_segment_joined(const char **paths,
+ struct cwk_segment_joined *sj)
+{
+ bool result;
+
+ // Prepare the first segment. We position the joined segment on the first path
+ // and assign the path array to the struct.
+ sj->path_index = 0;
+ sj->paths = paths;
+
+ // We loop through all paths until we find one which has a segment. The result
+ // is stored in a variable, so we can let the caller know whether we found one
+ // or not.
+ result = false;
+ while (paths[sj->path_index] != NULL &&
+ (result = cwk_path_get_first_segment(paths[sj->path_index],
+ &sj->segment)) == false) {
+ ++sj->path_index;
+ }
+
+ return result;
+}
+
+static bool cwk_path_get_next_segment_joined(struct cwk_segment_joined *sj)
+{
+ bool result;
+
+ if (sj->paths[sj->path_index] == NULL) {
+ // We reached already the end of all paths, so there is no other segment
+ // left.
+ return false;
+ } else if (cwk_path_get_next_segment(&sj->segment)) {
+ // There was another segment on the current path, so we are good to
+ // continue.
+ return true;
+ }
+
+ // We try to move to the next path which has a segment available. We must at
+ // least move one further since the current path reached the end.
+ result = false;
+
+ do {
+ ++sj->path_index;
+
+ // And we obviously have to stop this loop if there are no more paths left.
+ if (sj->paths[sj->path_index] == NULL) {
+ break;
+ }
+
+ // Grab the first segment of the next path and determine whether this path
+ // has anything useful in it. There is one more thing we have to consider
+ // here - for the first time we do this we want to skip the root, but
+ // afterwards we will consider that to be part of the segments.
+ result = cwk_path_get_first_segment_without_root(sj->paths[sj->path_index],
+ sj->paths[sj->path_index], &sj->segment);
+
+ } while (!result);
+
+ // Finally, report the result back to the caller.
+ return result;
+}
+
+static bool cwk_path_get_previous_segment_joined(struct cwk_segment_joined *sj)
+{
+ bool result;
+
+ if (*sj->paths == NULL) {
+ // It's possible that there is no initialized segment available in the
+ // struct since there are no paths. In that case we can return false, since
+ // there is no previous segment.
+ return false;
+ } else if (cwk_path_get_previous_segment(&sj->segment)) {
+ // Now we try to get the previous segment from the current path. If we can
+ // do that successfully, we can let the caller know that we found one.
+ return true;
+ }
+
+ result = false;
+
+ do {
+ // We are done once we reached index 0. In that case there are no more
+ // segments left.
+ if (sj->path_index == 0) {
+ break;
+ }
+
+ // There is another path which we have to inspect. So we decrease the path
+ // index.
+ --sj->path_index;
+
+ // If this is the first path we will have to consider that this path might
+ // include a root, otherwise we just treat is as a segment.
+ if (sj->path_index == 0) {
+ result = cwk_path_get_last_segment(sj->paths[sj->path_index],
+ &sj->segment);
+ } else {
+ result = cwk_path_get_last_segment_without_root(sj->paths[sj->path_index],
+ &sj->segment);
+ }
+
+ } while (!result);
+
+ return result;
+}
+
+static bool cwk_path_segment_back_will_be_removed(struct cwk_segment_joined *sj)
+{
+ enum cwk_segment_type type;
+ int counter;
+
+ // We are handling back segments here. We must verify how many back segments
+ // and how many normal segments come before this one to decide whether we keep
+ // or remove it.
+
+ // The counter determines how many normal segments are our current segment,
+ // which will popped off before us. If the counter goes above zero it means
+ // that our segment will be popped as well.
+ counter = 0;
+
+ // We loop over all previous segments until we either reach the beginning,
+ // which means our segment will not be dropped or the counter goes above zero.
+ while (cwk_path_get_previous_segment_joined(sj)) {
+
+ // Now grab the type. The type determines whether we will increase or
+ // decrease the counter. We don't handle a CWK_CURRENT frame here since it
+ // has no influence.
+ type = cwk_path_get_segment_type(&sj->segment);
+ if (type == CWK_NORMAL) {
+ // This is a normal segment. The normal segment will increase the counter
+ // since it neutralizes one back segment. If we go above zero we can
+ // return immediately.
+ ++counter;
+ if (counter > 0) {
+ return true;
+ }
+ } else if (type == CWK_BACK) {
+ // A CWK_BACK segment will reduce the counter by one. We can not remove a
+ // back segment as long we are not above zero since we don't have the
+ // opposite normal segment which we would remove.
+ --counter;
+ }
+ }
+
+ // We never got a count larger than zero, so we will keep this segment alive.
+ return false;
+}
+
+static bool cwk_path_segment_normal_will_be_removed(
+ struct cwk_segment_joined *sj)
+{
+ enum cwk_segment_type type;
+ int counter;
+
+ // The counter determines how many segments are above our current segment,
+ // which will popped off before us. If the counter goes below zero it means
+ // that our segment will be popped as well.
+ counter = 0;
+
+ // We loop over all following segments until we either reach the end, which
+ // means our segment will not be dropped or the counter goes below zero.
+ while (cwk_path_get_next_segment_joined(sj)) {
+
+ // First, grab the type. The type determines whether we will increase or
+ // decrease the counter. We don't handle a CWK_CURRENT frame here since it
+ // has no influence.
+ type = cwk_path_get_segment_type(&sj->segment);
+ if (type == CWK_NORMAL) {
+ // This is a normal segment. The normal segment will increase the counter
+ // since it will be removed by a "../" before us.
+ ++counter;
+ } else if (type == CWK_BACK) {
+ // A CWK_BACK segment will reduce the counter by one. If we are below zero
+ // we can return immediately.
+ --counter;
+ if (counter < 0) {
+ return true;
+ }
+ }
+ }
+
+ // We never got a negative count, so we will keep this segment alive.
+ return false;
+}
+
+static bool
+cwk_path_segment_will_be_removed(const struct cwk_segment_joined *sj,
+ bool absolute)
+{
+ enum cwk_segment_type type;
+ struct cwk_segment_joined sjc;
+
+ // We copy the joined path so we don't need to modify it.
+ sjc = *sj;
+
+ // First we check whether this is a CWK_CURRENT or CWK_BACK segment, since
+ // those will always be dropped.
+ type = cwk_path_get_segment_type(&sj->segment);
+ if (type == CWK_CURRENT || (type == CWK_BACK && absolute)) {
+ return true;
+ } else if (type == CWK_BACK) {
+ return cwk_path_segment_back_will_be_removed(&sjc);
+ } else {
+ return cwk_path_segment_normal_will_be_removed(&sjc);
+ }
+}
+
+static bool
+cwk_path_segment_joined_skip_invisible(struct cwk_segment_joined *sj,
+ bool absolute)
+{
+ while (cwk_path_segment_will_be_removed(sj, absolute)) {
+ if (!cwk_path_get_next_segment_joined(sj)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void cwk_path_get_root_windows(const char *path, size_t *length)
+{
+ const char *c;
+ bool is_device_path;
+
+ // We can not determine the root if this is an empty string. So we set the
+ // root to NULL and the length to zero and cancel the whole thing.
+ c = path;
+ *length = 0;
+ if (!*c) {
+ return;
+ }
+
+ // Now we have to verify whether this is a windows network path (UNC), which
+ // we will consider our root.
+ if (cwk_path_is_separator(c)) {
+ ++c;
+
+ // Check whether the path starts with a single backslash, which means this
+ // is not a network path - just a normal path starting with a backslash.
+ if (!cwk_path_is_separator(c)) {
+ // Okay, this is not a network path but we still use the backslash as a
+ // root.
+ ++(*length);
+ return;
+ }
+
+ // A device path is a path which starts with "\\." or "\\?". A device path
+ // can be a UNC path as well, in which case it will take up one more
+ // segment. So, this is a network or device path. Skip the previous
+ // separator. Now we need to determine whether this is a device path. We
+ // might advance one character here if the server name starts with a '?' or
+ // a '.', but that's fine since we will search for a separator afterwards
+ // anyway.
+ ++c;
+ is_device_path = (*c == '?' || *c == '.') && cwk_path_is_separator(++c);
+ if (is_device_path) {
+ // That's a device path, and the root must be either "\\.\" or "\\?\"
+ // which is 4 characters long. (at least that's how Windows
+ // GetFullPathName behaves.)
+ *length = 4;
+ return;
+ }
+
+ // We will grab anything up to the next stop. The next stop might be a '\0'
+ // or another separator. That will be the server name.
+ c = cwk_path_find_next_stop(c);
+
+ // If this is a separator and not the end of a string we wil have to include
+ // it. However, if this is a '\0' we must not skip it.
+ while (cwk_path_is_separator(c)) {
+ ++c;
+ }
+
+ // We are now skipping the shared folder name, which will end after the
+ // next stop.
+ c = cwk_path_find_next_stop(c);
+
+ // Then there might be a separator at the end. We will include that as well,
+ // it will mark the path as absolute.
+ if (cwk_path_is_separator(c)) {
+ ++c;
+ }
+
+ // Finally, calculate the size of the root.
+ *length = (size_t)(c - path);
+ return;
+ }
+
+ // Move to the next and check whether this is a colon.
+ if (*++c == ':') {
+ *length = 2;
+
+ // Now check whether this is a backslash (or slash). If it is not, we could
+ // assume that the next character is a '\0' if it is a valid path. However,
+ // we will not assume that - since ':' is not valid in a path it must be a
+ // mistake by the caller than. We will try to understand it anyway.
+ if (cwk_path_is_separator(++c)) {
+ *length = 3;
+ }
+ }
+}
+
+static void cwk_path_get_root_unix(const char *path, size_t *length)
+{
+ // The slash of the unix path represents the root. There is no root if there
+ // is no slash.
+ if (cwk_path_is_separator(path)) {
+ *length = 1;
+ } else {
+ *length = 0;
+ }
+}
+
+static bool cwk_path_is_root_absolute(const char *path, size_t length)
+{
+ // This is definitely not absolute if there is no root.
+ if (length == 0) {
+ return false;
+ }
+
+ // If there is a separator at the end of the root, we can safely consider this
+ // to be an absolute path.
+ return cwk_path_is_separator(&path[length - 1]);
+}
+
+static void cwk_path_fix_root(char *buffer, size_t buffer_size, size_t length)
+{
+ size_t i;
+
+ // This only affects windows.
+ if (path_style != CWK_STYLE_WINDOWS) {
+ return;
+ }
+
+ // Make sure we are not writing further than we are actually allowed to.
+ if (length > buffer_size) {
+ length = buffer_size;
+ }
+
+ // Replace all forward slashes with backwards slashes. Since this is windows
+ // we can't have any forward slashes in the root.
+ for (i = 0; i < length; ++i) {
+ if (cwk_path_is_separator(&buffer[i])) {
+ buffer[i] = *separators[CWK_STYLE_WINDOWS];
+ }
+ }
+}
+
+static size_t cwk_path_join_and_normalize_multiple(const char **paths,
+ char *buffer, size_t buffer_size)
+{
+ size_t pos;
+ bool absolute, has_segment_output;
+ struct cwk_segment_joined sj;
+
+ // We initialize the position after the root, which should get us started.
+ cwk_path_get_root(paths[0], &pos);
+
+ // Determine whether the path is absolute or not. We need that to determine
+ // later on whether we can remove superfluous "../" or not.
+ absolute = cwk_path_is_root_absolute(paths[0], pos);
+
+ // First copy the root to the output. After copying, we will normalize the
+ // root.
+ cwk_path_output_sized(buffer, buffer_size, 0, paths[0], pos);
+ cwk_path_fix_root(buffer, buffer_size, pos);
+
+ // So we just grab the first segment. If there is no segment we will always
+ // output a "/", since we currently only support absolute paths here.
+ if (!cwk_path_get_first_segment_joined(paths, &sj)) {
+ goto done;
+ }
+
+ // Let's assume that we don't have any segment output for now. We will toggle
+ // this flag once there is some output.
+ has_segment_output = false;
+
+ do {
+ // Check whether we have to drop this segment because of resolving a
+ // relative path or because it is a CWK_CURRENT segment.
+ if (cwk_path_segment_will_be_removed(&sj, absolute)) {
+ continue;
+ }
+
+ // We add a separator if we previously wrote a segment. The last segment
+ // must not have a trailing separator. This must happen before the segment
+ // output, since we would override the null terminating character with
+ // reused buffers if this was done afterwards.
+ if (has_segment_output) {
+ pos += cwk_path_output_separator(buffer, buffer_size, pos);
+ }
+
+ // Remember that we have segment output, so we can handle the trailing slash
+ // later on. This is necessary since we might have segments but they are all
+ // removed.
+ has_segment_output = true;
+
+ // Write out the segment but keep in mind that we need to follow the
+ // buffer size limitations. That's why we use the path output functions
+ // here.
+ pos += cwk_path_output_sized(buffer, buffer_size, pos, sj.segment.begin,
+ sj.segment.size);
+ } while (cwk_path_get_next_segment_joined(&sj));
+
+ // Remove the trailing slash, but only if we have segment output. We don't
+ // want to remove anything from the root.
+ if (!has_segment_output && pos == 0) {
+ // This may happen if the path is absolute and all segments have been
+ // removed. We can not have an empty output - and empty output means we stay
+ // in the current directory. So we will output a ".".
+ assert(absolute == false);
+ pos += cwk_path_output_current(buffer, buffer_size, pos);
+ }
+
+ // We must append a '\0' in any case, unless the buffer size is zero. If the
+ // buffer size is zero, which means we can not.
+done:
+ cwk_path_terminate_output(buffer, buffer_size, pos);
+
+ // And finally let our caller know about the total size of the normalized
+ // path.
+ return pos;
+}
+
+size_t cwk_path_get_absolute(const char *base, const char *path, char *buffer,
+ size_t buffer_size)
+{
+ size_t i;
+ const char *paths[4];
+
+ // The basename should be an absolute path if the caller is using the API
+ // correctly. However, he might not and in that case we will append a fake
+ // root at the beginning.
+ if (cwk_path_is_absolute(base)) {
+ i = 0;
+ } else if (path_style == CWK_STYLE_WINDOWS) {
+ paths[0] = "\\";
+ i = 1;
+ } else {
+ paths[0] = "/";
+ i = 1;
+ }
+
+ if (cwk_path_is_absolute(path)) {
+ // If the submitted path is not relative the base path becomes irrelevant.
+ // We will only normalize the submitted path instead.
+ paths[i++] = path;
+ paths[i] = NULL;
+ } else {
+ // Otherwise we append the relative path to the base path and normalize it.
+ // The result will be a new absolute path.
+ paths[i++] = base;
+ paths[i++] = path;
+ paths[i] = NULL;
+ }
+
+ // Finally join everything together and normalize it.
+ return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
+}
+
+static void cwk_path_skip_segments_until_diverge(struct cwk_segment_joined *bsj,
+ struct cwk_segment_joined *osj, bool absolute, bool *base_available,
+ bool *other_available)
+{
+ // Now looping over all segments until they start to diverge. A path may
+ // diverge if two segments are not equal or if one path reaches the end.
+ do {
+
+ // Check whether there is anything available after we skip everything which
+ // is invisible. We do that for both paths, since we want to let the caller
+ // know which path has some trailing segments after they diverge.
+ *base_available = cwk_path_segment_joined_skip_invisible(bsj, absolute);
+ *other_available = cwk_path_segment_joined_skip_invisible(osj, absolute);
+
+ // We are done if one or both of those paths reached the end. They either
+ // diverge or both reached the end - but in both cases we can not continue
+ // here.
+ if (!*base_available || !*other_available) {
+ break;
+ }
+
+ // Compare the content of both segments. We are done if they are not equal,
+ // since they diverge.
+ if (!cwk_path_is_string_equal(bsj->segment.begin, osj->segment.begin,
+ bsj->segment.size, osj->segment.size)) {
+ break;
+ }
+
+ // We keep going until one of those segments reached the end. The next
+ // segment might be invisible, but we will check for that in the beginning
+ // of the loop once again.
+ *base_available = cwk_path_get_next_segment_joined(bsj);
+ *other_available = cwk_path_get_next_segment_joined(osj);
+ } while (*base_available && *other_available);
+}
+
+size_t cwk_path_get_relative(const char *base_directory, const char *path,
+ char *buffer, size_t buffer_size)
+{
+ size_t pos, base_root_length, path_root_length;
+ bool absolute, base_available, other_available, has_output;
+ const char *base_paths[2], *other_paths[2];
+ struct cwk_segment_joined bsj, osj;
+
+ pos = 0;
+
+ // First we compare the roots of those two paths. If the roots are not equal
+ // we can't continue, since there is no way to get a relative path from
+ // different roots.
+ cwk_path_get_root(base_directory, &base_root_length);
+ cwk_path_get_root(path, &path_root_length);
+ if (base_root_length != path_root_length ||
+ !cwk_path_is_string_equal(base_directory, path, base_root_length,
+ path_root_length)) {
+ cwk_path_terminate_output(buffer, buffer_size, pos);
+ return pos;
+ }
+
+ // Verify whether this is an absolute path. We need to know that since we can
+ // remove all back-segments if it is.
+ absolute = cwk_path_is_root_absolute(base_directory, base_root_length);
+
+ // Initialize our joined segments. This will allow us to use the internal
+ // functions to skip until diverge and invisible. We only have one path in
+ // them though.
+ base_paths[0] = base_directory;
+ base_paths[1] = NULL;
+ other_paths[0] = path;
+ other_paths[1] = NULL;
+ cwk_path_get_first_segment_joined(base_paths, &bsj);
+ cwk_path_get_first_segment_joined(other_paths, &osj);
+
+ // Okay, now we skip until the segments diverge. We don't have anything to do
+ // with the segments which are equal.
+ cwk_path_skip_segments_until_diverge(&bsj, &osj, absolute, &base_available,
+ &other_available);
+
+ // Assume there is no output until we have got some. We will need this
+ // information later on to remove trailing slashes or alternatively output a
+ // current-segment.
+ has_output = false;
+
+ // So if we still have some segments left in the base path we will now output
+ // a back segment for all of them.
+ if (base_available) {
+ do {
+ // Skip any invisible segment. We don't care about those and we don't need
+ // to navigate back because of them.
+ if (!cwk_path_segment_joined_skip_invisible(&bsj, absolute)) {
+ break;
+ }
+
+ // Toggle the flag if we have output. We need to remember that, since we
+ // want to remove the trailing slash.
+ has_output = true;
+
+ // Output the back segment and a separator. No need to worry about the
+ // superfluous segment since it will be removed later on.
+ pos += cwk_path_output_back(buffer, buffer_size, pos);
+ pos += cwk_path_output_separator(buffer, buffer_size, pos);
+ } while (cwk_path_get_next_segment_joined(&bsj));
+ }
+
+ // And if we have some segments available of the target path we will output
+ // all of those.
+ if (other_available) {
+ do {
+ // Again, skip any invisible segments since we don't need to navigate into
+ // them.
+ if (!cwk_path_segment_joined_skip_invisible(&osj, absolute)) {
+ break;
+ }
+
+ // Toggle the flag if we have output. We need to remember that, since we
+ // want to remove the trailing slash.
+ has_output = true;
+
+ // Output the current segment and a separator. No need to worry about the
+ // superfluous segment since it will be removed later on.
+ pos += cwk_path_output_sized(buffer, buffer_size, pos, osj.segment.begin,
+ osj.segment.size);
+ pos += cwk_path_output_separator(buffer, buffer_size, pos);
+ } while (cwk_path_get_next_segment_joined(&osj));
+ }
+
+ // If we have some output by now we will have to remove the trailing slash. We
+ // simply do that by moving back one character. The terminate output function
+ // will then place the '\0' on this position. Otherwise, if there is no
+ // output, we will have to output a "current directory", since the target path
+ // points to the base path.
+ if (has_output) {
+ --pos;
+ } else {
+ pos += cwk_path_output_current(buffer, buffer_size, pos);
+ }
+
+ // Finally, we can terminate the output - which means we place a '\0' at the
+ // current position or at the end of the buffer.
+ cwk_path_terminate_output(buffer, buffer_size, pos);
+
+ return pos;
+}
+
+size_t cwk_path_join(const char *path_a, const char *path_b, char *buffer,
+ size_t buffer_size)
+{
+ const char *paths[3];
+
+ // This is simple. We will just create an array with the two paths which we
+ // wish to join.
+ paths[0] = path_a;
+ paths[1] = path_b;
+ paths[2] = NULL;
+
+ // And then call the join and normalize function which will do the hard work
+ // for us.
+ return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
+}
+
+size_t cwk_path_join_multiple(const char **paths, char *buffer,
+ size_t buffer_size)
+{
+ // We can just call the internal join and normalize function for this one,
+ // since it will handle everything.
+ return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
+}
+
+void cwk_path_get_root(const char *path, size_t *length)
+{
+ // We use a different implementation here based on the configuration of the
+ // library.
+ if (path_style == CWK_STYLE_WINDOWS) {
+ cwk_path_get_root_windows(path, length);
+ } else {
+ cwk_path_get_root_unix(path, length);
+ }
+}
+
+size_t cwk_path_change_root(const char *path, const char *new_root,
+ char *buffer, size_t buffer_size)
+{
+ const char *tail;
+ size_t root_length, path_length, tail_length, new_root_length, new_path_size;
+
+ // First we need to determine the actual size of the root which we will
+ // change.
+ cwk_path_get_root(path, &root_length);
+
+ // Now we determine the sizes of the new root and the path. We need that to
+ // determine the size of the part after the root (the tail).
+ new_root_length = strlen(new_root);
+ path_length = strlen(path);
+
+ // Okay, now we calculate the position of the tail and the length of it.
+ tail = path + root_length;
+ tail_length = path_length - root_length;
+
+ // We first output the tail and then the new root, that's because the source
+ // path and the buffer may be overlapping. This way the root will not
+ // overwrite the tail.
+ cwk_path_output_sized(buffer, buffer_size, new_root_length, tail,
+ tail_length);
+ cwk_path_output_sized(buffer, buffer_size, 0, new_root, new_root_length);
+
+ // Finally we calculate the size o the new path and terminate the output with
+ // a '\0'.
+ new_path_size = tail_length + new_root_length;
+ cwk_path_terminate_output(buffer, buffer_size, new_path_size);
+
+ return new_path_size;
+}
+
+bool cwk_path_is_absolute(const char *path)
+{
+ size_t length;
+
+ // We grab the root of the path. This root does not include the first
+ // separator of a path.
+ cwk_path_get_root(path, &length);
+
+ // Now we can determine whether the root is absolute or not.
+ return cwk_path_is_root_absolute(path, length);
+}
+
+bool cwk_path_is_relative(const char *path)
+{
+ // The path is relative if it is not absolute.
+ return !cwk_path_is_absolute(path);
+}
+
+void cwk_path_get_basename(const char *path, const char **basename,
+ size_t *length)
+{
+ struct cwk_segment segment;
+
+ // We get the last segment of the path. The last segment will contain the
+ // basename if there is any. If there are no segments we will set the basename
+ // to NULL and the length to 0.
+ if (!cwk_path_get_last_segment(path, &segment)) {
+ *basename = NULL;
+ if (length) {
+ *length = 0;
+ }
+ return;
+ }
+
+ // Now we can just output the segment contents, since that's our basename.
+ // There might be trailing separators after the basename, but the size does
+ // not include those.
+ *basename = segment.begin;
+ if (length) {
+ *length = segment.size;
+ }
+}
+
+size_t cwk_path_change_basename(const char *path, const char *new_basename,
+ char *buffer, size_t buffer_size)
+{
+ struct cwk_segment segment;
+ size_t pos, root_size, new_basename_size;
+
+ // First we try to get the last segment. We may only have a root without any
+ // segments, in which case we will create one.
+ if (!cwk_path_get_last_segment(path, &segment)) {
+
+ // So there is no segment in this path. First we grab the root and output
+ // that. We are not going to modify the root in any way.
+ cwk_path_get_root(path, &root_size);
+ pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
+
+ // We have to trim the separators from the beginning of the new basename.
+ // This is quite easy to do.
+ while (cwk_path_is_separator(new_basename)) {
+ ++new_basename;
+ }
+
+ // Now we measure the length of the new basename, this is a two step
+ // process. First we find the '\0' character at the end of the string.
+ new_basename_size = 0;
+ while (new_basename[new_basename_size]) {
+ ++new_basename_size;
+ }
+
+ // And then we trim the separators at the end of the basename until we reach
+ // the first valid character.
+ while (new_basename_size > 0 &&
+ cwk_path_is_separator(&new_basename[new_basename_size - 1])) {
+ --new_basename_size;
+ }
+
+ // Now we will output the new basename after the root.
+ pos += cwk_path_output_sized(buffer, buffer_size, pos, new_basename,
+ new_basename_size);
+
+ // And finally terminate the output and return the total size of the path.
+ cwk_path_terminate_output(buffer, buffer_size, pos);
+ return pos;
+ }
+
+ // If there is a last segment we can just forward this call, which is fairly
+ // easy.
+ return cwk_path_change_segment(&segment, new_basename, buffer, buffer_size);
+}
+
+void cwk_path_get_dirname(const char *path, size_t *length)
+{
+ struct cwk_segment segment;
+
+ // We get the last segment of the path. The last segment will contain the
+ // basename if there is any. If there are no segments we will set the length
+ // to 0.
+ if (!cwk_path_get_last_segment(path, &segment)) {
+ *length = 0;
+ return;
+ }
+
+ // We can now return the length from the beginning of the string up to the
+ // beginning of the last segment.
+ *length = (size_t)(segment.begin - path);
+}
+
+bool cwk_path_get_extension(const char *path, const char **extension,
+ size_t *length)
+{
+ struct cwk_segment segment;
+ const char *c;
+
+ // We get the last segment of the path. The last segment will contain the
+ // extension if there is any.
+ if (!cwk_path_get_last_segment(path, &segment)) {
+ return false;
+ }
+
+ // Now we search for a dot within the segment. If there is a dot, we consider
+ // the rest of the segment the extension. We do this from the end towards the
+ // beginning, since we want to find the last dot.
+ for (c = segment.end; c >= segment.begin; --c) {
+ if (*c == '.') {
+ // Okay, we found an extension. We can stop looking now.
+ *extension = c;
+ *length = (size_t)(segment.end - c);
+ return true;
+ }
+ }
+
+ // We couldn't find any extension.
+ return false;
+}
+
+bool cwk_path_has_extension(const char *path)
+{
+ const char *extension;
+ size_t length;
+
+ // We just wrap the get_extension call which will then do the work for us.
+ return cwk_path_get_extension(path, &extension, &length);
+}
+
+size_t cwk_path_change_extension(const char *path, const char *new_extension,
+ char *buffer, size_t buffer_size)
+{
+ struct cwk_segment segment;
+ const char *c, *old_extension;
+ size_t pos, root_size, trail_size, new_extension_size;
+
+ // First we try to get the last segment. We may only have a root without any
+ // segments, in which case we will create one.
+ if (!cwk_path_get_last_segment(path, &segment)) {
+
+ // So there is no segment in this path. First we grab the root and output
+ // that. We are not going to modify the root in any way. If there is no
+ // root, this will end up with a root size 0, and nothing will be written.
+ cwk_path_get_root(path, &root_size);
+ pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
+
+ // Add a dot if the submitted value doesn't have any.
+ if (*new_extension != '.') {
+ pos += cwk_path_output_dot(buffer, buffer_size, pos);
+ }
+
+ // And finally terminate the output and return the total size of the path.
+ pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
+ cwk_path_terminate_output(buffer, buffer_size, pos);
+ return pos;
+ }
+
+ // Now we seek the old extension in the last segment, which we will replace
+ // with the new one. If there is no old extension, it will point to the end of
+ // the segment.
+ old_extension = segment.end;
+ for (c = segment.begin; c < segment.end; ++c) {
+ if (*c == '.') {
+ old_extension = c;
+ }
+ }
+
+ pos = cwk_path_output_sized(buffer, buffer_size, 0, segment.path,
+ (size_t)(old_extension - segment.path));
+
+ // If the new extension starts with a dot, we will skip that dot. We always
+ // output exactly one dot before the extension. If the extension contains
+ // multiple dots, we will output those as part of the extension.
+ if (*new_extension == '.') {
+ ++new_extension;
+ }
+
+ // We calculate the size of the new extension, including the dot, in order to
+ // output the trail - which is any part of the path coming after the
+ // extension. We must output this first, since the buffer may overlap with the
+ // submitted path - and it would be overridden by longer extensions.
+ new_extension_size = strlen(new_extension) + 1;
+ trail_size = cwk_path_output(buffer, buffer_size, pos + new_extension_size,
+ segment.end);
+
+ // Finally we output the dot and the new extension. The new extension itself
+ // doesn't contain the dot anymore, so we must output that first.
+ pos += cwk_path_output_dot(buffer, buffer_size, pos);
+ pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
+
+ // Now we terminate the output with a null-terminating character, but before
+ // we do that we must add the size of the trail to the position which we
+ // output before.
+ pos += trail_size;
+ cwk_path_terminate_output(buffer, buffer_size, pos);
+
+ // And the position is our output size now.
+ return pos;
+}
+
+size_t cwk_path_normalize(const char *path, char *buffer, size_t buffer_size)
+{
+ const char *paths[2];
+
+ // Now we initialize the paths which we will normalize. Since this function
+ // only supports submitting a single path, we will only add that one.
+ paths[0] = path;
+ paths[1] = NULL;
+
+ return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
+}
+
+size_t cwk_path_get_intersection(const char *path_base, const char *path_other)
+{
+ bool absolute;
+ size_t base_root_length, other_root_length;
+ const char *end;
+ const char *paths_base[2], *paths_other[2];
+ struct cwk_segment_joined base, other;
+
+ // We first compare the two roots. We just return zero if they are not equal.
+ // This will also happen to return zero if the paths are mixed relative and
+ // absolute.
+ cwk_path_get_root(path_base, &base_root_length);
+ cwk_path_get_root(path_other, &other_root_length);
+ if (!cwk_path_is_string_equal(path_base, path_other, base_root_length,
+ other_root_length)) {
+ return 0;
+ }
+
+ // Configure our paths. We just have a single path in here for now.
+ paths_base[0] = path_base;
+ paths_base[1] = NULL;
+ paths_other[0] = path_other;
+ paths_other[1] = NULL;
+
+ // So we get the first segment of both paths. If one of those paths don't have
+ // any segment, we will return 0.
+ if (!cwk_path_get_first_segment_joined(paths_base, &base) ||
+ !cwk_path_get_first_segment_joined(paths_other, &other)) {
+ return base_root_length;
+ }
+
+ // We now determine whether the path is absolute or not. This is required
+ // because if will ignore removed segments, and this behaves differently if
+ // the path is absolute. However, we only need to check the base path because
+ // we are guaranteed that both paths are either relative or absolute.
+ absolute = cwk_path_is_root_absolute(path_base, base_root_length);
+
+ // We must keep track of the end of the previous segment. Initially, this is
+ // set to the beginning of the path. This means that 0 is returned if the
+ // first segment is not equal.
+ end = path_base + base_root_length;
+
+ // Now we loop over both segments until one of them reaches the end or their
+ // contents are not equal.
+ do {
+ // We skip all segments which will be removed in each path, since we want to
+ // know about the true path.
+ if (!cwk_path_segment_joined_skip_invisible(&base, absolute) ||
+ !cwk_path_segment_joined_skip_invisible(&other, absolute)) {
+ break;
+ }
+
+ if (!cwk_path_is_string_equal(base.segment.begin, other.segment.begin,
+ base.segment.size, other.segment.size)) {
+ // So the content of those two segments are not equal. We will return the
+ // size up to the beginning.
+ return (size_t)(end - path_base);
+ }
+
+ // Remember the end of the previous segment before we go to the next one.
+ end = base.segment.end;
+ } while (cwk_path_get_next_segment_joined(&base) &&
+ cwk_path_get_next_segment_joined(&other));
+
+ // Now we calculate the length up to the last point where our paths pointed to
+ // the same place.
+ return (size_t)(end - path_base);
+}
+
+bool cwk_path_get_first_segment(const char *path, struct cwk_segment *segment)
+{
+ size_t length;
+ const char *segments;
+
+ // We skip the root since that's not part of the first segment. The root is
+ // treated as a separate entity.
+ cwk_path_get_root(path, &length);
+ segments = path + length;
+
+ // Now, after we skipped the root we can continue and find the actual segment
+ // content.
+ return cwk_path_get_first_segment_without_root(path, segments, segment);
+}
+
+bool cwk_path_get_last_segment(const char *path, struct cwk_segment *segment)
+{
+ // We first grab the first segment. This might be our last segment as well,
+ // but we don't know yet. There is no last segment if there is no first
+ // segment, so we return false in that case.
+ if (!cwk_path_get_first_segment(path, segment)) {
+ return false;
+ }
+
+ // Now we find our last segment. The segment struct of the caller
+ // will contain the last segment, since the function we call here will not
+ // change the segment struct when it reaches the end.
+ while (cwk_path_get_next_segment(segment)) {
+ // We just loop until there is no other segment left.
+ }
+
+ return true;
+}
+
+bool cwk_path_get_next_segment(struct cwk_segment *segment)
+{
+ const char *c;
+
+ // First we jump to the end of the previous segment. The first character must
+ // be either a '\0' or a separator.
+ c = segment->begin + segment->size;
+ if (*c == '\0') {
+ return false;
+ }
+
+ // Now we skip all separator until we reach something else. We are not yet
+ // guaranteed to have a segment, since the string could just end afterwards.
+ assert(cwk_path_is_separator(c));
+ do {
+ ++c;
+ } while (cwk_path_is_separator(c));
+
+ // If the string ends here, we can safely assume that there is no other
+ // segment after this one.
+ if (*c == '\0') {
+ return false;
+ }
+
+ // Now we are safe to assume there is a segment. We store the beginning of
+ // this segment in the segment struct of the caller.
+ segment->begin = c;
+
+ // And now determine the size of this segment, and store it in the struct of
+ // the caller as well.
+ c = cwk_path_find_next_stop(c);
+ segment->end = c;
+ segment->size = (size_t)(c - segment->begin);
+
+ // Tell the caller that we found a segment.
+ return true;
+}
+
+bool cwk_path_get_previous_segment(struct cwk_segment *segment)
+{
+ const char *c;
+
+ // The current position might point to the first character of the path, which
+ // means there are no previous segments available.
+ c = segment->begin;
+ if (c <= segment->segments) {
+ return false;
+ }
+
+ // We move towards the beginning of the path until we either reached the
+ // beginning or the character is no separator anymore.
+ do {
+ --c;
+ if (c < segment->segments) {
+ // So we reached the beginning here and there is no segment. So we return
+ // false and don't change the segment structure submitted by the caller.
+ return false;
+ }
+ } while (cwk_path_is_separator(c));
+
+ // We are guaranteed now that there is another segment, since we moved before
+ // the previous separator and did not reach the segment path beginning.
+ segment->end = c + 1;
+ segment->begin = cwk_path_find_previous_stop(segment->segments, c);
+ segment->size = (size_t)(segment->end - segment->begin);
+
+ return true;
+}
+
+enum cwk_segment_type cwk_path_get_segment_type(
+ const struct cwk_segment *segment)
+{
+ // We just make a string comparison with the segment contents and return the
+ // appropriate type.
+ if (strncmp(segment->begin, ".", segment->size) == 0) {
+ return CWK_CURRENT;
+ } else if (strncmp(segment->begin, "..", segment->size) == 0) {
+ return CWK_BACK;
+ }
+
+ return CWK_NORMAL;
+}
+
+bool cwk_path_is_separator(const char *str)
+{
+ const char *c;
+
+ // We loop over all characters in the read symbols.
+ c = separators[path_style];
+ while (*c) {
+ if (*c == *str) {
+ return true;
+ }
+
+ ++c;
+ }
+
+ return false;
+}
+
+size_t cwk_path_change_segment(struct cwk_segment *segment, const char *value,
+ char *buffer, size_t buffer_size)
+{
+ size_t pos, value_size, tail_size;
+
+ // First we have to output the head, which is the whole string up to the
+ // beginning of the segment. This part of the path will just stay the same.
+ pos = cwk_path_output_sized(buffer, buffer_size, 0, segment->path,
+ (size_t)(segment->begin - segment->path));
+
+ // In order to trip the submitted value, we will skip any separator at the
+ // beginning of it and behave as if it was never there.
+ while (cwk_path_is_separator(value)) {
+ ++value;
+ }
+
+ // Now we determine the length of the value. In order to do that we first
+ // locate the '\0'.
+ value_size = 0;
+ while (value[value_size]) {
+ ++value_size;
+ }
+
+ // Since we trim separators at the beginning and in the end of the value we
+ // have to subtract from the size until there are either no more characters
+ // left or the last character is no separator.
+ while (value_size > 0 && cwk_path_is_separator(&value[value_size - 1])) {
+ --value_size;
+ }
+
+ // We also have to determine the tail size, which is the part of the string
+ // following the current segment. This part will not change.
+ tail_size = strlen(segment->end);
+
+ // Now we output the tail. We have to do that, because if the buffer and the
+ // source are overlapping we would override the tail if the value is
+ // increasing in length.
+ cwk_path_output_sized(buffer, buffer_size, pos + value_size, segment->end,
+ tail_size);
+
+ // Finally we can output the value in the middle of the head and the tail,
+ // where we have enough space to fit the whole trimmed value.
+ pos += cwk_path_output_sized(buffer, buffer_size, pos, value, value_size);
+
+ // Now we add the tail size to the current position and terminate the output -
+ // basically, ensure that there is a '\0' at the end of the buffer.
+ pos += tail_size;
+ cwk_path_terminate_output(buffer, buffer_size, pos);
+
+ // And now tell the caller how long the whole path would be.
+ return pos;
+}
+
+enum cwk_path_style cwk_path_guess_style(const char *path)
+{
+ const char *c;
+ size_t root_length;
+ struct cwk_segment segment;
+
+ // First we determine the root. Only windows roots can be longer than a single
+ // slash, so if we can determine that it starts with something like "C:", we
+ // know that this is a windows path.
+ cwk_path_get_root_windows(path, &root_length);
+ if (root_length > 1) {
+ return CWK_STYLE_WINDOWS;
+ }
+
+ // Next we check for slashes. Windows uses backslashes, while unix uses
+ // forward slashes. Windows actually supports both, but our best guess is to
+ // assume windows with backslashes and unix with forward slashes.
+ for (c = path; *c; ++c) {
+ if (*c == *separators[CWK_STYLE_UNIX]) {
+ return CWK_STYLE_UNIX;
+ } else if (*c == *separators[CWK_STYLE_WINDOWS]) {
+ return CWK_STYLE_WINDOWS;
+ }
+ }
+
+ // This path does not have any slashes. We grab the last segment (which
+ // actually must be the first one), and determine whether the segment starts
+ // with a dot. A dot is a hidden folder or file in the UNIX world, in that
+ // case we assume the path to have UNIX style.
+ if (!cwk_path_get_last_segment(path, &segment)) {
+ // We couldn't find any segments, so we default to a UNIX path style since
+ // there is no way to make any assumptions.
+ return CWK_STYLE_UNIX;
+ }
+
+ if (*segment.begin == '.') {
+ return CWK_STYLE_UNIX;
+ }
+
+ // And finally we check whether the last segment contains a dot. If it
+ // contains a dot, that might be an extension. Windows is more likely to have
+ // file names with extensions, so our guess would be windows.
+ for (c = segment.begin; *c; ++c) {
+ if (*c == '.') {
+ return CWK_STYLE_WINDOWS;
+ }
+ }
+
+ // All our checks failed, so we will return a default value which is currently
+ // UNIX.
+ return CWK_STYLE_UNIX;
+}
+
+void cwk_path_set_style(enum cwk_path_style style)
+{
+ // We can just set the global path style variable and then the behaviour for
+ // all functions will change accordingly.
+ assert(style == CWK_STYLE_UNIX || style == CWK_STYLE_WINDOWS);
+ path_style = style;
+}
+
+enum cwk_path_style cwk_path_get_style(void)
+{
+ // Simply return the path style which we store in a global variable.
+ return path_style;
+}