fs-util.c 46.6 KB
Newer Older
1
/* SPDX-License-Identifier: LGPL-2.1+ */
2

3 4 5 6 7 8
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
9
#include <linux/falloc.h>
10
#include <linux/magic.h>
11 12 13
#include <time.h>
#include <unistd.h>

14 15 16 17
#include "alloc-util.h"
#include "dirent-util.h"
#include "fd-util.h"
#include "fs-util.h"
18
#include "locale-util.h"
19 20 21
#include "log.h"
#include "macro.h"
#include "missing.h"
22 23 24
#include "mkdir.h"
#include "parse-util.h"
#include "path-util.h"
25
#include "process-util.h"
26
#include "stat-util.h"
27
#include "stdio-util.h"
28 29
#include "string-util.h"
#include "strv.h"
30
#include "time-util.h"
31
#include "tmpfile-util.h"
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
#include "user-util.h"
#include "util.h"

int unlink_noerrno(const char *path) {
        PROTECT_ERRNO;
        int r;

        r = unlink(path);
        if (r < 0)
                return -errno;

        return 0;
}

int rmdir_parents(const char *path, const char *stop) {
        size_t l;
        int r = 0;

        assert(path);
        assert(stop);

        l = strlen(path);

        /* Skip trailing slashes */
        while (l > 0 && path[l-1] == '/')
                l--;

        while (l > 0) {
                char *t;

                /* Skip last component */
                while (l > 0 && path[l-1] != '/')
                        l--;

                /* Skip trailing slashes */
                while (l > 0 && path[l-1] == '/')
                        l--;

                if (l <= 0)
                        break;

                t = strndup(path, l);
                if (!t)
                        return -ENOMEM;

                if (path_startswith(stop, t)) {
                        free(t);
                        return 0;
                }

                r = rmdir(t);
                free(t);

                if (r < 0)
                        if (errno != ENOENT)
                                return -errno;
        }

        return 0;
}

int rename_noreplace(int olddirfd, const char *oldpath, int newdirfd, const char *newpath) {
94
        int r;
95

96 97
        /* Try the ideal approach first */
        if (renameat2(olddirfd, oldpath, newdirfd, newpath, RENAME_NOREPLACE) >= 0)
98 99
                return 0;

100 101 102
        /* renameat2() exists since Linux 3.15, btrfs and FAT added support for it later. If it is not implemented,
         * fall back to a different method. */
        if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY))
103 104
                return -errno;

105 106 107 108 109 110 111 112 113 114 115 116
        /* Let's try to use linkat()+unlinkat() as fallback. This doesn't work on directories and on some file systems
         * that do not support hard links (such as FAT, most prominently), but for files it's pretty close to what we
         * want — though not atomic (i.e. for a short period both the new and the old filename will exist). */
        if (linkat(olddirfd, oldpath, newdirfd, newpath, 0) >= 0) {

                if (unlinkat(olddirfd, oldpath, 0) < 0) {
                        r = -errno; /* Backup errno before the following unlinkat() alters it */
                        (void) unlinkat(newdirfd, newpath, 0);
                        return r;
                }

                return 0;
117 118
        }

119
        if (!IN_SET(errno, EINVAL, ENOSYS, ENOTTY, EPERM)) /* FAT returns EPERM on link()… */
120 121
                return -errno;

122 123 124 125 126 127 128 129 130
        /* OK, neither RENAME_NOREPLACE nor linkat()+unlinkat() worked. Let's then fallback to the racy TOCTOU
         * vulnerable accessat(F_OK) check followed by classic, replacing renameat(), we have nothing better. */

        if (faccessat(newdirfd, newpath, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
                return -EEXIST;
        if (errno != ENOENT)
                return -errno;

        if (renameat(olddirfd, oldpath, newdirfd, newpath) < 0)
131 132 133 134 135 136
                return -errno;

        return 0;
}

int readlinkat_malloc(int fd, const char *p, char **ret) {
137
        size_t l = FILENAME_MAX+1;
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
        int r;

        assert(p);
        assert(ret);

        for (;;) {
                char *c;
                ssize_t n;

                c = new(char, l);
                if (!c)
                        return -ENOMEM;

                n = readlinkat(fd, p, c, l-1);
                if (n < 0) {
                        r = -errno;
                        free(c);
                        return r;
                }

                if ((size_t) n < l-1) {
                        c[n] = 0;
                        *ret = c;
                        return 0;
                }

                free(c);
                l *= 2;
        }
}

int readlink_malloc(const char *p, char **ret) {
        return readlinkat_malloc(AT_FDCWD, p, ret);
}

int readlink_value(const char *p, char **ret) {
        _cleanup_free_ char *link = NULL;
        char *value;
        int r;

        r = readlink_malloc(p, &link);
        if (r < 0)
                return r;

        value = basename(link);
        if (!value)
                return -ENOENT;

        value = strdup(value);
        if (!value)
                return -ENOMEM;

        *ret = value;

        return 0;
}

int readlink_and_make_absolute(const char *p, char **r) {
        _cleanup_free_ char *target = NULL;
        char *k;
        int j;

        assert(p);
        assert(r);

        j = readlink_malloc(p, &target);
        if (j < 0)
                return j;

        k = file_in_same_dir(p, target);
        if (!k)
                return -ENOMEM;

        *r = k;
        return 0;
}

int chmod_and_chown(const char *path, mode_t mode, uid_t uid, gid_t gid) {
216
        _cleanup_close_ int fd = -1;
217

218 219
        assert(path);

220 221
        fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW); /* Let's acquire an O_PATH fd, as precaution to change
                                                       * mode/owner on the same file */
222 223 224
        if (fd < 0)
                return -errno;

225
        return fchmod_and_chown(fd, mode, uid, gid);
226 227
}

228
int fchmod_and_chown(int fd, mode_t mode, uid_t uid, gid_t gid) {
229
        bool do_chown, do_chmod;
230 231
        struct stat st;

232 233 234 235 236 237 238
        /* Change ownership and access mode of the specified fd. Tries to do so safely, ensuring that at no
         * point in time the access mode is above the old access mode under the old ownership or the new
         * access mode under the new ownership. Note: this call tries hard to leave the access mode
         * unaffected if the uid/gid is changed, i.e. it undoes implicit suid/sgid dropping the kernel does
         * on chown().
         *
         * This call is happy with O_PATH fds. */
239

240 241
        if (fstat(fd, &st) < 0)
                return -errno;
242

243 244 245
        do_chown =
                (uid != UID_INVALID && st.st_uid != uid) ||
                (gid != GID_INVALID && st.st_gid != gid);
246

247 248 249 250 251
        do_chmod =
                !S_ISLNK(st.st_mode) && /* chmod is not defined on symlinks */
                ((mode != MODE_INVALID && ((st.st_mode ^ mode) & 07777) != 0) ||
                 do_chown); /* If we change ownership, make sure we reset the mode afterwards, since chown()
                             * modifies the access mode too */
252

253 254 255 256
        if (mode == MODE_INVALID)
                mode = st.st_mode; /* If we only shall do a chown(), save original mode, since chown() might break it. */
        else if ((mode & S_IFMT) != 0 && ((mode ^ st.st_mode) & S_IFMT) != 0)
                return -EINVAL; /* insist on the right file type if it was specified */
257

258 259
        if (do_chown && do_chmod) {
                mode_t minimal = st.st_mode & mode; /* the subset of the old and the new mask */
260

261 262
                if (((minimal ^ st.st_mode) & 07777) != 0)
                        if (fchmod_opath(fd, minimal & 07777) < 0)
263
                                return -errno;
264
        }
265

266 267 268
        if (do_chown)
                if (fchownat(fd, "", uid, gid, AT_EMPTY_PATH) < 0)
                        return -errno;
269

270 271 272
        if (do_chmod)
                if (fchmod_opath(fd, mode & 07777) < 0)
                        return -errno;
273

274
        return do_chown || do_chmod;
275 276
}

277 278 279 280 281 282 283 284 285 286 287
int fchmod_umask(int fd, mode_t m) {
        mode_t u;
        int r;

        u = umask(0777);
        r = fchmod(fd, m & (~u)) < 0 ? -errno : 0;
        umask(u);

        return r;
}

288
int fchmod_opath(int fd, mode_t m) {
289
        char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
290 291 292 293 294 295 296 297 298 299 300 301

        /* This function operates also on fd that might have been opened with
         * O_PATH. Indeed fchmodat() doesn't have the AT_EMPTY_PATH flag like
         * fchownat() does. */

        xsprintf(procfs_path, "/proc/self/fd/%i", fd);
        if (chmod(procfs_path, m) < 0)
                return -errno;

        return 0;
}

302 303 304 305 306 307
int fd_warn_permissions(const char *path, int fd) {
        struct stat st;

        if (fstat(fd, &st) < 0)
                return -errno;

308 309 310 311
        /* Don't complain if we are reading something that is not a file, for example /dev/null */
        if (!S_ISREG(st.st_mode))
                return 0;

312 313 314 315 316 317
        if (st.st_mode & 0111)
                log_warning("Configuration file %s is marked executable. Please remove executable permission bits. Proceeding anyway.", path);

        if (st.st_mode & 0002)
                log_warning("Configuration file %s is marked world-writable. Please remove world writability permission bits. Proceeding anyway.", path);

318
        if (getpid_cached() == 1 && (st.st_mode & 0044) != 0044)
319 320 321 322 323 324
                log_warning("Configuration file %s is marked world-inaccessible. This has no effect as configuration data is accessible via APIs without restrictions. Proceeding anyway.", path);

        return 0;
}

int touch_file(const char *path, bool parents, usec_t stamp, uid_t uid, gid_t gid, mode_t mode) {
325 326 327
        char fdpath[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
        _cleanup_close_ int fd = -1;
        int r, ret = 0;
328 329 330

        assert(path);

331 332 333 334
        /* Note that touch_file() does not follow symlinks: if invoked on an existing symlink, then it is the symlink
         * itself which is updated, not its target
         *
         * Returns the first error we encounter, but tries to apply as much as possible. */
335

336 337 338 339 340 341 342 343 344
        if (parents)
                (void) mkdir_parents(path, 0755);

        /* Initially, we try to open the node with O_PATH, so that we get a reference to the node. This is useful in
         * case the path refers to an existing device or socket node, as we can open it successfully in all cases, and
         * won't trigger any driver magic or so. */
        fd = open(path, O_PATH|O_CLOEXEC|O_NOFOLLOW);
        if (fd < 0) {
                if (errno != ENOENT)
345 346
                        return -errno;

347 348 349 350
                /* if the node doesn't exist yet, we create it, but with O_EXCL, so that we only create a regular file
                 * here, and nothing else */
                fd = open(path, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, IN_SET(mode, 0, MODE_INVALID) ? 0644 : mode);
                if (fd < 0)
351 352 353
                        return -errno;
        }

354 355 356 357 358
        /* Let's make a path from the fd, and operate on that. With this logic, we can adjust the access mode,
         * ownership and time of the file node in all cases, even if the fd refers to an O_PATH object — which is
         * something fchown(), fchmod(), futimensat() don't allow. */
        xsprintf(fdpath, "/proc/self/fd/%i", fd);

359
        ret = fchmod_and_chown(fd, mode, uid, gid);
360

361 362 363 364 365
        if (stamp != USEC_INFINITY) {
                struct timespec ts[2];

                timespec_store(&ts[0], stamp);
                ts[1] = ts[0];
366
                r = utimensat(AT_FDCWD, fdpath, ts, 0);
367
        } else
368 369
                r = utimensat(AT_FDCWD, fdpath, NULL, 0);
        if (r < 0 && ret >= 0)
370 371
                return -errno;

372
        return ret;
373 374 375 376 377 378
}

int touch(const char *path) {
        return touch_file(path, false, USEC_INFINITY, UID_INVALID, GID_INVALID, MODE_INVALID);
}

379 380
int symlink_idempotent(const char *from, const char *to, bool make_relative) {
        _cleanup_free_ char *relpath = NULL;
381 382 383 384 385
        int r;

        assert(from);
        assert(to);

386 387 388 389 390 391 392 393 394 395 396 397 398 399
        if (make_relative) {
                _cleanup_free_ char *parent = NULL;

                parent = dirname_malloc(to);
                if (!parent)
                        return -ENOMEM;

                r = path_make_relative(parent, from, &relpath);
                if (r < 0)
                        return r;

                from = relpath;
        }

400
        if (symlink(from, to) < 0) {
401 402
                _cleanup_free_ char *p = NULL;

403 404 405 406
                if (errno != EEXIST)
                        return -errno;

                r = readlink_malloc(to, &p);
407 408 409
                if (r == -EINVAL) /* Not a symlink? In that case return the original error we encountered: -EEXIST */
                        return -EEXIST;
                if (r < 0) /* Any other error? In that case propagate it as is */
410 411
                        return r;

412 413
                if (!streq(p, from)) /* Not the symlink we want it to be? In that case, propagate the original -EEXIST */
                        return -EEXIST;
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
        }

        return 0;
}

int symlink_atomic(const char *from, const char *to) {
        _cleanup_free_ char *t = NULL;
        int r;

        assert(from);
        assert(to);

        r = tempfn_random(to, NULL, &t);
        if (r < 0)
                return r;

        if (symlink(from, t) < 0)
                return -errno;

        if (rename(t, to) < 0) {
                unlink_noerrno(t);
                return -errno;
        }

        return 0;
}

int mknod_atomic(const char *path, mode_t mode, dev_t dev) {
        _cleanup_free_ char *t = NULL;
        int r;

        assert(path);

        r = tempfn_random(path, NULL, &t);
        if (r < 0)
                return r;

        if (mknod(t, mode, dev) < 0)
                return -errno;

        if (rename(t, path) < 0) {
                unlink_noerrno(t);
                return -errno;
        }

        return 0;
}

int mkfifo_atomic(const char *path, mode_t mode) {
        _cleanup_free_ char *t = NULL;
        int r;

        assert(path);

        r = tempfn_random(path, NULL, &t);
        if (r < 0)
                return r;

        if (mkfifo(t, mode) < 0)
                return -errno;

        if (rename(t, path) < 0) {
                unlink_noerrno(t);
                return -errno;
        }

        return 0;
}

483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
int mkfifoat_atomic(int dirfd, const char *path, mode_t mode) {
        _cleanup_free_ char *t = NULL;
        int r;

        assert(path);

        if (path_is_absolute(path))
                return mkfifo_atomic(path, mode);

        /* We're only interested in the (random) filename.  */
        r = tempfn_random_child("", NULL, &t);
        if (r < 0)
                return r;

        if (mkfifoat(dirfd, t, mode) < 0)
                return -errno;

        if (renameat(dirfd, t, dirfd, path) < 0) {
                unlink_noerrno(t);
                return -errno;
        }

        return 0;
}

508 509
int get_files_in_directory(const char *path, char ***list) {
        _cleanup_closedir_ DIR *d = NULL;
510
        struct dirent *de;
511 512 513 514 515 516 517 518 519 520 521 522 523
        size_t bufsize = 0, n = 0;
        _cleanup_strv_free_ char **l = NULL;

        assert(path);

        /* Returns all files in a directory in *list, and the number
         * of files as return value. If list is NULL returns only the
         * number. */

        d = opendir(path);
        if (!d)
                return -errno;

524
        FOREACH_DIRENT_ALL(de, d, return -errno) {
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
                dirent_ensure_type(d, de);

                if (!dirent_is_file(de))
                        continue;

                if (list) {
                        /* one extra slot is needed for the terminating NULL */
                        if (!GREEDY_REALLOC(l, bufsize, n + 2))
                                return -ENOMEM;

                        l[n] = strdup(de->d_name);
                        if (!l[n])
                                return -ENOMEM;

                        l[++n] = NULL;
                } else
                        n++;
        }

544 545
        if (list)
                *list = TAKE_PTR(l);
546 547 548

        return n;
}
549

550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
static int getenv_tmp_dir(const char **ret_path) {
        const char *n;
        int r, ret = 0;

        assert(ret_path);

        /* We use the same order of environment variables python uses in tempfile.gettempdir():
         * https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir */
        FOREACH_STRING(n, "TMPDIR", "TEMP", "TMP") {
                const char *e;

                e = secure_getenv(n);
                if (!e)
                        continue;
                if (!path_is_absolute(e)) {
                        r = -ENOTDIR;
                        goto next;
                }
568
                if (!path_is_normalized(e)) {
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
                        r = -EPERM;
                        goto next;
                }

                r = is_dir(e, true);
                if (r < 0)
                        goto next;
                if (r == 0) {
                        r = -ENOTDIR;
                        goto next;
                }

                *ret_path = e;
                return 1;

        next:
                /* Remember first error, to make this more debuggable */
                if (ret >= 0)
                        ret = r;
        }

        if (ret < 0)
                return ret;

        *ret_path = NULL;
        return ret;
}

static int tmp_dir_internal(const char *def, const char **ret) {
        const char *e;
        int r, k;

        assert(def);
        assert(ret);

        r = getenv_tmp_dir(&e);
        if (r > 0) {
                *ret = e;
                return 0;
        }

        k = is_dir(def, true);
        if (k == 0)
                k = -ENOTDIR;
        if (k < 0)
                return r < 0 ? r : k;

        *ret = def;
        return 0;
}

int var_tmp_dir(const char **ret) {

        /* Returns the location for "larger" temporary files, that is backed by physical storage if available, and thus
         * even might survive a boot: /var/tmp. If $TMPDIR (or related environment variables) are set, its value is
         * returned preferably however. Note that both this function and tmp_dir() below are affected by $TMPDIR,
         * making it a variable that overrides all temporary file storage locations. */

        return tmp_dir_internal("/var/tmp", ret);
}

int tmp_dir(const char **ret) {

        /* Similar to var_tmp_dir() above, but returns the location for "smaller" temporary files, which is usually
         * backed by an in-memory file system: /tmp. */

        return tmp_dir_internal("/tmp", ret);
}

638 639 640 641 642 643 644 645 646 647 648
int unlink_or_warn(const char *filename) {
        if (unlink(filename) < 0 && errno != ENOENT)
                /* If the file doesn't exist and the fs simply was read-only (in which
                 * case unlink() returns EROFS even if the file doesn't exist), don't
                 * complain */
                if (errno != EROFS || access(filename, F_OK) >= 0)
                        return log_error_errno(errno, "Failed to remove \"%s\": %m", filename);

        return 0;
}

649
int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
650
        char path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
651 652 653 654 655 656 657 658 659 660 661
        int r;

        /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
        xsprintf(path, "/proc/self/fd/%i", what);

        r = inotify_add_watch(fd, path, mask);
        if (r < 0)
                return -errno;

        return r;
}
662

663
static bool unsafe_transition(const struct stat *a, const struct stat *b) {
664 665 666 667 668
        /* Returns true if the transition from a to b is safe, i.e. that we never transition from unprivileged to
         * privileged files or directories. Why bother? So that unprivileged code can't symlink to privileged files
         * making us believe we read something safe even though it isn't safe in the specific context we open it in. */

        if (a->st_uid == 0) /* Transitioning from privileged to unprivileged is always fine */
669 670 671 672 673 674 675
                return false;

        return a->st_uid != b->st_uid; /* Otherwise we need to stay within the same UID */
}

static int log_unsafe_transition(int a, int b, const char *path, unsigned flags) {
        _cleanup_free_ char *n1 = NULL, *n2 = NULL;
676

677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
        if (!FLAGS_SET(flags, CHASE_WARN))
                return -ENOLINK;

        (void) fd_get_path(a, &n1);
        (void) fd_get_path(b, &n2);

        return log_warning_errno(SYNTHETIC_ERRNO(ENOLINK),
                                 "Detected unsafe path transition %s %s %s during canonicalization of %s.",
                                 n1, special_glyph(SPECIAL_GLYPH_ARROW), n2, path);
}

static int log_autofs_mount_point(int fd, const char *path, unsigned flags) {
        _cleanup_free_ char *n1 = NULL;

        if (!FLAGS_SET(flags, CHASE_WARN))
                return -EREMOTE;

        (void) fd_get_path(fd, &n1);

        return log_warning_errno(SYNTHETIC_ERRNO(EREMOTE),
                                 "Detected autofs mount point %s during canonicalization of %s.",
                                 n1, path);
699 700
}

701
int chase_symlinks(const char *path, const char *original_root, unsigned flags, char **ret) {
702 703
        _cleanup_free_ char *buffer = NULL, *done = NULL, *root = NULL;
        _cleanup_close_ int fd = -1;
704
        unsigned max_follow = CHASE_SYMLINKS_MAX; /* how many symlinks to follow before giving up and returning ELOOP */
705
        struct stat previous_stat;
706
        bool exists = true;
707 708 709 710 711
        char *todo;
        int r;

        assert(path);

712
        /* Either the file may be missing, or we return an fd to the final object, but both make no sense */
713
        if (FLAGS_SET(flags, CHASE_NONEXISTENT | CHASE_OPEN))
714 715
                return -EINVAL;

716
        if (FLAGS_SET(flags, CHASE_STEP | CHASE_OPEN))
717 718
                return -EINVAL;

719 720 721
        if (isempty(path))
                return -EINVAL;

722 723 724
        /* This is a lot like canonicalize_file_name(), but takes an additional "root" parameter, that allows following
         * symlinks relative to a root directory, instead of the root of the host.
         *
725 726 727 728
         * Note that "root" primarily matters if we encounter an absolute symlink. It is also used when following
         * relative symlinks to ensure they cannot be used to "escape" the root directory. The path parameter passed is
         * assumed to be already prefixed by it, except if the CHASE_PREFIX_ROOT flag is set, in which case it is first
         * prefixed accordingly.
729 730 731 732 733 734
         *
         * Algorithmically this operates on two path buffers: "done" are the components of the path we already
         * processed and resolved symlinks, "." and ".." of. "todo" are the components of the path we still need to
         * process. On each iteration, we move one component from "todo" to "done", processing it's special meaning
         * each time. The "todo" path always starts with at least one slash, the "done" path always ends in no
         * slash. We always keep an O_PATH fd to the component we are currently processing, thus keeping lookup races
735
         * to a minimum.
736 737 738 739
         *
         * Suggested usage: whenever you want to canonicalize a path, use this function. Pass the absolute path you got
         * as-is: fully qualified and relative to your host's root. Optionally, specify the root parameter to tell this
         * function what to do when encountering a symlink with an absolute path as directory: prefix it by the
740 741 742 743 744
         * specified path.
         *
         * There are three ways to invoke this function:
         *
         * 1. Without CHASE_STEP or CHASE_OPEN: in this case the path is resolved and the normalized path is returned
745 746 747
         *    in `ret`. The return value is < 0 on error. If CHASE_NONEXISTENT is also set, 0 is returned if the file
         *    doesn't exist, > 0 otherwise. If CHASE_NONEXISTENT is not set, >= 0 is returned if the destination was
         *    found, -ENOENT if it wasn't.
748 749 750 751 752 753 754 755 756 757 758 759 760
         *
         * 2. With CHASE_OPEN: in this case the destination is opened after chasing it as O_PATH and this file
         *    descriptor is returned as return value. This is useful to open files relative to some root
         *    directory. Note that the returned O_PATH file descriptors must be converted into a regular one (using
         *    fd_reopen() or such) before it can be used for reading/writing. CHASE_OPEN may not be combined with
         *    CHASE_NONEXISTENT.
         *
         * 3. With CHASE_STEP: in this case only a single step of the normalization is executed, i.e. only the first
         *    symlink or ".." component of the path is resolved, and the resulting path is returned. This is useful if
         *    a caller wants to trace the a path through the file system verbosely. Returns < 0 on error, > 0 if the
         *    path is fully normalized, and == 0 for each normalization step. This may be combined with
         *    CHASE_NONEXISTENT, in which case 1 is returned when a component is not found.
         *
761 762
         * 4. With CHASE_SAFE: in this case the path must not contain unsafe transitions, i.e. transitions from
         *    unprivileged to privileged files or directories. In such cases the return value is -ENOLINK. If
763
         *    CHASE_WARN is also set, a warning describing the unsafe transition is emitted.
764
         *
765 766 767
         * 5. With CHASE_NO_AUTOFS: in this case if an autofs mount point is encountered, path normalization
         *    is aborted and -EREMOTE is returned. If CHASE_WARN is also set, a warning showing the path of
         *    the mount point is emitted.
768
         *
769
         */
770

771
        /* A root directory of "/" or "" is identical to none */
772
        if (empty_or_root(original_root))
773 774
                original_root = NULL;

775
        if (!original_root && !ret && (flags & (CHASE_NONEXISTENT|CHASE_NO_AUTOFS|CHASE_SAFE|CHASE_OPEN|CHASE_STEP)) == CHASE_OPEN) {
776 777
                /* Shortcut the CHASE_OPEN case if the caller isn't interested in the actual path and has no root set
                 * and doesn't care about any of the other special features we provide either. */
778
                r = open(path, O_PATH|O_CLOEXEC|((flags & CHASE_NOFOLLOW) ? O_NOFOLLOW : 0));
779 780 781 782 783 784
                if (r < 0)
                        return -errno;

                return r;
        }

785 786
        if (original_root) {
                r = path_make_absolute_cwd(original_root, &root);
787 788
                if (r < 0)
                        return r;
789

790 791 792 793 794 795
                if (flags & CHASE_PREFIX_ROOT) {

                        /* We don't support relative paths in combination with a root directory */
                        if (!path_is_absolute(path))
                                return -EINVAL;

796
                        path = prefix_roota(root, path);
797
                }
798 799
        }

800 801 802 803
        r = path_make_absolute_cwd(path, &buffer);
        if (r < 0)
                return r;

804 805 806 807
        fd = open("/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
        if (fd < 0)
                return -errno;

808 809 810 811 812
        if (flags & CHASE_SAFE) {
                if (fstat(fd, &previous_stat) < 0)
                        return -errno;
        }

813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
        todo = buffer;
        for (;;) {
                _cleanup_free_ char *first = NULL;
                _cleanup_close_ int child = -1;
                struct stat st;
                size_t n, m;

                /* Determine length of first component in the path */
                n = strspn(todo, "/");                  /* The slashes */
                m = n + strcspn(todo + n, "/");         /* The entire length of the component */

                /* Extract the first component. */
                first = strndup(todo, m);
                if (!first)
                        return -ENOMEM;

                todo += m;

831 832 833 834
                /* Empty? Then we reached the end. */
                if (isempty(first))
                        break;

835
                /* Just a single slash? Then we reached the end. */
836 837
                if (path_equal(first, "/")) {
                        /* Preserve the trailing slash */
838 839 840 841

                        if (flags & CHASE_TRAIL_SLASH)
                                if (!strextend(&done, "/", NULL))
                                        return -ENOMEM;
842

843
                        break;
844
                }
845 846 847 848 849 850 851 852

                /* Just a dot? Then let's eat this up. */
                if (path_equal(first, "/."))
                        continue;

                /* Two dots? Then chop off the last bit of what we already found out. */
                if (path_equal(first, "/..")) {
                        _cleanup_free_ char *parent = NULL;
853
                        _cleanup_close_ int fd_parent = -1;
854

855 856
                        /* If we already are at the top, then going up will not change anything. This is in-line with
                         * how the kernel handles this. */
857
                        if (empty_or_root(done))
858
                                continue;
859 860 861 862 863

                        parent = dirname_malloc(done);
                        if (!parent)
                                return -ENOMEM;

864
                        /* Don't allow this to leave the root dir.  */
865 866 867
                        if (root &&
                            path_startswith(done, root) &&
                            !path_startswith(parent, root))
868
                                continue;
869 870 871

                        free_and_replace(done, parent);

872 873 874
                        if (flags & CHASE_STEP)
                                goto chased_one;

875 876 877 878
                        fd_parent = openat(fd, "..", O_CLOEXEC|O_NOFOLLOW|O_PATH);
                        if (fd_parent < 0)
                                return -errno;

879 880 881 882
                        if (flags & CHASE_SAFE) {
                                if (fstat(fd_parent, &st) < 0)
                                        return -errno;

883 884
                                if (unsafe_transition(&previous_stat, &st))
                                        return log_unsafe_transition(fd, fd_parent, path, flags);
885 886 887 888

                                previous_stat = st;
                        }

889
                        safe_close(fd);
890
                        fd = TAKE_FD(fd_parent);
891 892 893 894 895 896

                        continue;
                }

                /* Otherwise let's see what this is. */
                child = openat(fd, first + n, O_CLOEXEC|O_NOFOLLOW|O_PATH);
897 898 899 900
                if (child < 0) {

                        if (errno == ENOENT &&
                            (flags & CHASE_NONEXISTENT) &&
901
                            (isempty(todo) || path_is_normalized(todo))) {
902 903 904 905 906

                                /* If CHASE_NONEXISTENT is set, and the path does not exist, then that's OK, return
                                 * what we got so far. But don't allow this if the remaining path contains "../ or "./"
                                 * or something else weird. */

907 908 909 910
                                /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
                                if (streq_ptr(done, "/"))
                                        *done = '\0';

911 912 913 914 915 916 917
                                if (!strextend(&done, first, todo, NULL))
                                        return -ENOMEM;

                                exists = false;
                                break;
                        }

918
                        return -errno;
919
                }
920 921 922

                if (fstat(child, &st) < 0)
                        return -errno;
923
                if ((flags & CHASE_SAFE) &&
924
                    (empty_or_root(root) || (size_t)(todo - buffer) > strlen(root)) &&
925 926
                    unsafe_transition(&previous_stat, &st))
                        return log_unsafe_transition(fd, child, path, flags);
927 928 929

                previous_stat = st;

930
                if ((flags & CHASE_NO_AUTOFS) &&
931
                    fd_is_fs_type(child, AUTOFS_SUPER_MAGIC) > 0)
932
                        return log_autofs_mount_point(child, path, flags);
933

934
                if (S_ISLNK(st.st_mode) && !((flags & CHASE_NOFOLLOW) && isempty(todo))) {
935 936
                        char *joined;

937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959
                        _cleanup_free_ char *destination = NULL;

                        /* This is a symlink, in this case read the destination. But let's make sure we don't follow
                         * symlinks without bounds. */
                        if (--max_follow <= 0)
                                return -ELOOP;

                        r = readlinkat_malloc(fd, first + n, &destination);
                        if (r < 0)
                                return r;
                        if (isempty(destination))
                                return -EINVAL;

                        if (path_is_absolute(destination)) {

                                /* An absolute destination. Start the loop from the beginning, but use the root
                                 * directory as base. */

                                safe_close(fd);
                                fd = open(root ?: "/", O_CLOEXEC|O_NOFOLLOW|O_PATH);
                                if (fd < 0)
                                        return -errno;

960 961 962 963
                                if (flags & CHASE_SAFE) {
                                        if (fstat(fd, &st) < 0)
                                                return -errno;

964 965
                                        if (unsafe_transition(&previous_stat, &st))
                                                return log_unsafe_transition(child, fd, path, flags);
966 967 968 969

                                        previous_stat = st;
                                }

970 971 972 973 974 975 976 977 978 979 980
                                free(done);

                                /* Note that we do not revalidate the root, we take it as is. */
                                if (isempty(root))
                                        done = NULL;
                                else {
                                        done = strdup(root);
                                        if (!done)
                                                return -ENOMEM;
                                }

981 982
                                /* Prefix what's left to do with what we just read, and start the loop again, but
                                 * remain in the current directory. */
983
                                joined = path_join(destination, todo);
984
                        } else
985
                                joined = path_join("/", destination, todo);
986 987
                        if (!joined)
                                return -ENOMEM;
988

989 990
                        free(buffer);
                        todo = buffer = joined;
991

992 993 994
                        if (flags & CHASE_STEP)
                                goto chased_one;

995 996 997 998
                        continue;
                }

                /* If this is not a symlink, then let's just add the name we read to what we already verified. */
999 1000 1001
                if (!done)
                        done = TAKE_PTR(first);
                else {
1002 1003 1004 1005
                        /* If done is "/", as first also contains slash at the head, then remove this redundant slash. */
                        if (streq(done, "/"))
                                *done = '\0';

1006 1007 1008 1009 1010 1011
                        if (!strextend(&done, first, NULL))
                                return -ENOMEM;
                }

                /* And iterate again, but go one directory further down. */
                safe_close(fd);
1012
                fd = TAKE_FD(child);
1013 1014 1015 1016 1017 1018 1019 1020 1021
        }

        if (!done) {
                /* Special case, turn the empty string into "/", to indicate the root directory. */
                done = strdup("/");
                if (!done)
                        return -ENOMEM;
        }

1022 1023
        if (ret)
                *ret = TAKE_PTR(done);
1024

1025 1026 1027 1028 1029
        if (flags & CHASE_OPEN) {
                /* Return the O_PATH fd we currently are looking to the caller. It can translate it to a proper fd by
                 * opening /proc/self/fd/xyz. */

                assert(fd >= 0);
1030
                return TAKE_FD(fd);
1031 1032
        }

1033 1034 1035
        if (flags & CHASE_STEP)
                return 1;

1036
        return exists;
1037 1038 1039 1040 1041

chased_one:
        if (ret) {
                char *c;

1042 1043 1044
                c = strjoin(strempty(done), todo);
                if (!c)
                        return -ENOMEM;
1045 1046 1047 1048 1049

                *ret = c;
        }

        return 0;
1050
}
1051

1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
int chase_symlinks_and_open(
                const char *path,
                const char *root,
                unsigned chase_flags,
                int open_flags,
                char **ret_path) {

        _cleanup_close_ int path_fd = -1;
        _cleanup_free_ char *p = NULL;
        int r;

        if (chase_flags & CHASE_NONEXISTENT)
                return -EINVAL;

1066
        if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
                /* Shortcut this call if none of the special features of this call are requested */
                r = open(path, open_flags);
                if (r < 0)
                        return -errno;

                return r;
        }

        path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
        if (path_fd < 0)
                return path_fd;

        r = fd_reopen(path_fd, open_flags);
        if (r < 0)
                return r;

        if (ret_path)
                *ret_path = TAKE_PTR(p);

        return r;
}

int chase_symlinks_and_opendir(
                const char *path,
                const char *root,
                unsigned chase_flags,
                char **ret_path,
                DIR **ret_dir) {

        char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
        _cleanup_close_ int path_fd = -1;
        _cleanup_free_ char *p = NULL;
        DIR *d;

        if (!ret_dir)
                return -EINVAL;
        if (chase_flags & CHASE_NONEXISTENT)
                return -EINVAL;

1106
        if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
                /* Shortcut this call if none of the special features of this call are requested */
                d = opendir(path);
                if (!d)
                        return -errno;

                *ret_dir = d;
                return 0;
        }

        path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
        if (path_fd < 0)
                return path_fd;

        xsprintf(procfs_path, "/proc/self/fd/%i", path_fd);
        d = opendir(procfs_path);
        if (!d)
                return -errno;

        if (ret_path)
                *ret_path = TAKE_PTR(p);

        *ret_dir = d;
        return 0;
}

1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
int chase_symlinks_and_stat(
                const char *path,
                const char *root,
                unsigned chase_flags,
                char **ret_path,
                struct stat *ret_stat) {

        _cleanup_close_ int path_fd = -1;
        _cleanup_free_ char *p = NULL;

        assert(path);
        assert(ret_stat);

        if (chase_flags & CHASE_NONEXISTENT)
                return -EINVAL;

        if (empty_or_root(root) && !ret_path && (chase_flags & (CHASE_NO_AUTOFS|CHASE_SAFE)) == 0) {
                /* Shortcut this call if none of the special features of this call are requested */
                if (stat(path, ret_stat) < 0)
                        return -errno;

                return 1;
        }

        path_fd = chase_symlinks(path, root, chase_flags|CHASE_OPEN, ret_path ? &p : NULL);
        if (path_fd < 0)
                return path_fd;

        if (fstat(path_fd, ret_stat) < 0)
                return -errno;

        if (ret_path)
                *ret_path = TAKE_PTR(p);

        if (chase_flags & CHASE_OPEN)
                return TAKE_FD(path_fd);

        return 1;
}

1172
int access_fd(int fd, int mode) {
1173
        char p[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(fd) + 1];
1174 1175 1176 1177 1178 1179 1180
        int r;

        /* Like access() but operates on an already open fd */

        xsprintf(p, "/proc/self/fd/%i", fd);
        r = access(p, mode);
        if (r < 0)
1181
                return -errno;
1182 1183 1184

        return r;
}
1185

1186 1187 1188 1189 1190 1191 1192 1193 1194
void unlink_tempfilep(char (*p)[]) {
        /* If the file is created with mkstemp(), it will (almost always)
         * change the suffix. Treat this as a sign that the file was
         * successfully created. We ignore both the rare case where the
         * original suffix is used and unlink failures. */
        if (!endswith(*p, ".XXXXXX"))
                (void) unlink_noerrno(*p);
}

1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
int unlinkat_deallocate(int fd, const char *name, int flags) {
        _cleanup_close_ int truncate_fd = -1;
        struct stat st;
        off_t l, bs;

        /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other
         * link to it. This is useful to ensure that other processes that might have the file open for reading won't be
         * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up
         * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and
         * returned to the free pool.
         *
         * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means
         * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other
         * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes
         * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.)
         * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file
         * truncation (🔪), as our goal of deallocating the data space trumps our goal of being nice to readers (💐).
         *
         * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the
         * primary job – to delete the file – is accomplished. */

        if ((flags & AT_REMOVEDIR) == 0) {
                truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
                if (truncate_fd < 0) {

                        /* If this failed because the file doesn't exist propagate the error right-away. Also,
                         * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is
                         * returned when this is a directory but we are not supposed to delete those, hence propagate
                         * the error right-away too. */
                        if (IN_SET(errno, ENOENT, EISDIR))
                                return -errno;

                        if (errno != ELOOP) /* don't complain if this is a symlink */
                                log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name);
                }
        }

        if (unlinkat(fd, name, flags) < 0)
                return -errno;

        if (truncate_fd < 0) /* Don't have a file handle, can't do more ☹️ */
                return 0;

        if (fstat(truncate_fd, &st) < 0) {
1239
                log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring: %m", name);
1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262
                return 0;
        }

        if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0)
                return 0;

        /* If this is a regular file, it actually took up space on disk and there are no other links it's time to
         * punch-hole/truncate this to release the disk space. */

        bs = MAX(st.st_blksize, 512);
        l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */

        if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0)
                return 0; /* Successfully punched a hole! 😊 */

        /* Fall back to truncation */
        if (ftruncate(truncate_fd, 0) < 0) {
                log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m");
                return 0;
        }

        return 0;
}
1263 1264

int fsync_directory_of_file(int fd) {
1265
        _cleanup_free_ char *path = NULL;
1266 1267 1268 1269 1270 1271 1272 1273
        _cleanup_close_ int dfd = -1;
        int r;

        r = fd_verify_regular(fd);
        if (r < 0)
                return r;

        r = fd_get_path(fd, &path);
1274
        if (r < 0) {
1275 1276 1277
                log_debug_errno(r, "Failed to query /proc/self/fd/%d%s: %m",
                                fd,
                                r == -EOPNOTSUPP ? ", ignoring" : "");
1278 1279 1280 1281 1282 1283 1284

                if (r == -EOPNOTSUPP)
                        /* If /proc is not available, we're most likely running in some
                         * chroot environment, and syncing the directory is not very
                         * important in that case. Let's just silently do nothing. */
                        return 0;

1285
                return r;
1286
        }
1287 1288 1289 1290

        if (!path_is_absolute(path))
                return -EINVAL;

1291
        dfd = open_parent(path, O_CLOEXEC, 0);
1292
        if (dfd < 0)
1293
                return dfd;
1294 1295 1296 1297 1298 1299

        if (fsync(dfd) < 0)
                return -errno;

        return 0;
}
1300

1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311
int fsync_full(int fd) {
        int r, q;

        /* Sync both the file and the directory */

        r = fsync(fd) < 0 ? -errno : 0;
        q = fsync_directory_of_file(fd);

        return r < 0 ? r : q;
}

1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
int fsync_path_at(int at_fd, const char *path) {
        _cleanup_close_ int opened_fd = -1;
        int fd;

        if (isempty(path)) {
                if (at_fd == AT_FDCWD) {
                        opened_fd = open(".", O_RDONLY|O_DIRECTORY|O_CLOEXEC);
                        if (opened_fd < 0)
                                return -errno;

                        fd = opened_fd;
                } else
                        fd = at_fd;
        } else {

                opened_fd = openat(at_fd, path, O_RDONLY|O_CLOEXEC);
                if (opened_fd < 0)
                        return -errno;

                fd = opened_fd;
        }

        if (fsync(fd) < 0)
                return -errno;

        return 0;
}

1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354
int syncfs_path(int atfd, const char *path) {
        _cleanup_close_ int fd = -1;

        assert(path);

        fd = openat(atfd, path, O_CLOEXEC|O_RDONLY|O_NONBLOCK);
        if (fd < 0)
                return -errno;

        if (syncfs(fd) < 0)
                return -errno;

        return 0;
}

1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
int open_parent(const char *path, int flags, mode_t mode) {
        _cleanup_free_ char *parent = NULL;
        int fd;

        if (isempty(path))
                return -EINVAL;
        if (path_equal(path, "/")) /* requesting the parent of the root dir is fishy, let's prohibit that */
                return -EINVAL;

        parent = dirname_malloc(path);
        if (!parent)
                return -ENOMEM;

        /* Let's insist on O_DIRECTORY since the parent of a file or directory is a directory. Except if we open an
         * O_TMPFILE file, because in that case we are actually create a regular file below the parent directory. */

1371
        if (FLAGS_SET(flags, O_PATH))
1372
                flags |= O_DIRECTORY;
1373
        else if (!FLAGS_SET(flags, O_TMPFILE))
1374 1375 1376 1377 1378 1379 1380 1381
                flags |= O_DIRECTORY|O_RDONLY;

        fd = open(parent, flags, mode);
        if (fd < 0)
                return -errno;

        return fd;
}