aboutsummaryrefslogtreecommitdiff
path: root/src/basic
diff options
context:
space:
mode:
Diffstat (limited to 'src/basic')
-rw-r--r--src/basic/fd-util.c199
-rw-r--r--src/basic/fd-util.h2
2 files changed, 201 insertions, 0 deletions
diff --git a/src/basic/fd-util.c b/src/basic/fd-util.c
index 9536a50b7..dc63cb0d7 100644
--- a/src/basic/fd-util.c
+++ b/src/basic/fd-util.c
@@ -12,10 +12,13 @@
#include <sys/stat.h>
#include <unistd.h>
+#include "alloc-util.h"
+#include "copy.h"
#include "dirent-util.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
+#include "io-util.h"
#include "macro.h"
#include "memfd-util.h"
#include "missing.h"
@@ -562,6 +565,202 @@ try_dev_shm_without_o_tmpfile:
return -EOPNOTSUPP;
}
+/* When the data is smaller or equal to 64K, try to place the copy in a memfd/pipe */
+#define DATA_FD_MEMORY_LIMIT (64U*1024U)
+
+/* If memfd/pipe didn't work out, then let's use a file in /tmp up to a size of 1M. If it's large than that use /var/tmp instead. */
+#define DATA_FD_TMP_LIMIT (1024U*1024U)
+
+int fd_duplicate_data_fd(int fd) {
+
+ _cleanup_close_ int copy_fd = -1, tmp_fd = -1;
+ _cleanup_free_ void *remains = NULL;
+ _cleanup_free_ char *t = NULL;
+ size_t remains_size = 0;
+ const char *td;
+ struct stat st;
+ int r;
+
+ /* Creates a 'data' fd from the specified source fd, containing all the same data in a read-only fashion, but
+ * independent of it (i.e. the source fd can be closed and unmounted after this call succeeded). Tries to be
+ * somewhat smart about where to place the data. In the best case uses a memfd(). If memfd() are not supported
+ * uses a pipe instead. For larger data will use an unlinked file in /tmp, and for even larger data one in
+ * /var/tmp. */
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ /* For now, let's only accept regular files, sockets, pipes and char devices */
+ if (S_ISDIR(st.st_mode))
+ return -EISDIR;
+ if (S_ISLNK(st.st_mode))
+ return -ELOOP;
+ if (!S_ISREG(st.st_mode) && !S_ISSOCK(st.st_mode) && !S_ISFIFO(st.st_mode) && !S_ISCHR(st.st_mode))
+ return -EBADFD;
+
+ /* If we have reason to believe the data is bounded in size, then let's use memfds or pipes as backing fd. Note
+ * that we use the reported regular file size only as a hint, given that there are plenty special files in
+ * /proc and /sys which report a zero file size but can be read from. */
+
+ if (!S_ISREG(st.st_mode) || st.st_size < DATA_FD_MEMORY_LIMIT) {
+
+ /* Try a memfd first */
+ copy_fd = memfd_new("data-fd");
+ if (copy_fd >= 0) {
+ off_t f;
+
+ r = copy_bytes(fd, copy_fd, DATA_FD_MEMORY_LIMIT, 0);
+ if (r < 0)
+ return r;
+
+ f = lseek(copy_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ if (r == 0) {
+ /* Did it fit into the limit? If so, we are done. */
+ r = memfd_set_sealed(copy_fd);
+ if (r < 0)
+ return r;
+
+ return TAKE_FD(copy_fd);
+ }
+
+ /* Hmm, pity, this didn't fit. Let's fall back to /tmp then, see below */
+
+ } else {
+ _cleanup_(close_pairp) int pipefds[2] = { -1, -1 };
+ int isz;
+
+ /* If memfds aren't available, use a pipe. Set O_NONBLOCK so that we will get EAGAIN rather
+ * then block indefinitely when we hit the pipe size limit */
+
+ if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
+ return -errno;
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+
+ /* Try to enlarge the pipe size if necessary */
+ if ((size_t) isz < DATA_FD_MEMORY_LIMIT) {
+
+ (void) fcntl(pipefds[1], F_SETPIPE_SZ, DATA_FD_MEMORY_LIMIT);
+
+ isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
+ if (isz < 0)
+ return -errno;
+ }
+
+ if ((size_t) isz >= DATA_FD_MEMORY_LIMIT) {
+
+ r = copy_bytes_full(fd, pipefds[1], DATA_FD_MEMORY_LIMIT, 0, &remains, &remains_size);
+ if (r < 0 && r != -EAGAIN)
+ return r; /* If we get EAGAIN it could be because of the source or because of
+ * the destination fd, we can't know, as sendfile() and friends won't
+ * tell us. Hence, treat this as reason to fall back, just to be
+ * sure. */
+ if (r == 0) {
+ /* Everything fit in, yay! */
+ (void) fd_nonblock(pipefds[0], false);
+
+ return TAKE_FD(pipefds[0]);
+ }
+
+ /* Things didn't fit in. But we read data into the pipe, let's remember that, so that
+ * when writing the new file we incorporate this first. */
+ copy_fd = TAKE_FD(pipefds[0]);
+ }
+ }
+ }
+
+ /* If we have reason to believe this will fit fine in /tmp, then use that as first fallback. */
+ if ((!S_ISREG(st.st_mode) || st.st_size < DATA_FD_TMP_LIMIT) &&
+ (DATA_FD_MEMORY_LIMIT + remains_size) < DATA_FD_TMP_LIMIT) {
+ off_t f;
+
+ tmp_fd = open_tmpfile_unlinkable(NULL /* NULL as directory means /tmp */, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first and it ended up being too large, then copy this into the
+ * temporary file first. */
+
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, 0);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* If there were remaining bytes (i.e. read into memory, but not written out yet) from the
+ * failed copy operation, let's flush them out next. */
+
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ r = copy_bytes(fd, tmp_fd, DATA_FD_TMP_LIMIT - DATA_FD_MEMORY_LIMIT - remains_size, COPY_REFLINK);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ goto finish; /* Yay, it fit in */
+
+ /* It didn't fit in. Let's not forget to use what we already used */
+ f = lseek(tmp_fd, 0, SEEK_SET);
+ if (f != 0)
+ return -errno;
+
+ safe_close(copy_fd);
+ copy_fd = TAKE_FD(tmp_fd);
+
+ remains = mfree(remains);
+ remains_size = 0;
+ }
+
+ /* As last fallback use /var/tmp */
+ r = var_tmp_dir(&td);
+ if (r < 0)
+ return r;
+
+ tmp_fd = open_tmpfile_unlinkable(td, O_RDWR|O_CLOEXEC);
+ if (tmp_fd < 0)
+ return tmp_fd;
+
+ if (copy_fd >= 0) {
+ /* If we tried a memfd/pipe first, or a file in /tmp, and it ended up being too large, than copy this
+ * into the temporary file first. */
+ r = copy_bytes(copy_fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+ }
+
+ if (remains_size > 0) {
+ /* Then, copy in any read but not yet written bytes. */
+ r = loop_write(tmp_fd, remains, remains_size, false);
+ if (r < 0)
+ return r;
+ }
+
+ /* Copy in the rest */
+ r = copy_bytes(fd, tmp_fd, UINT64_MAX, COPY_REFLINK);
+ if (r < 0)
+ return r;
+
+ assert(r == 0);
+
+finish:
+ /* Now convert the O_RDWR file descriptor into an O_RDONLY one (and as side effect seek to the beginning of the
+ * file again */
+
+ return fd_reopen(tmp_fd, O_RDONLY|O_CLOEXEC);
+}
+
int fd_move_above_stdio(int fd) {
int flags, copy;
PROTECT_ERRNO;
diff --git a/src/basic/fd-util.h b/src/basic/fd-util.h
index 89c3f34c7..2ae4e67ed 100644
--- a/src/basic/fd-util.h
+++ b/src/basic/fd-util.h
@@ -81,6 +81,8 @@ enum {
int acquire_data_fd(const void *data, size_t size, unsigned flags);
+int fd_duplicate_data_fd(int fd);
+
/* Hint: ENETUNREACH happens if we try to connect to "non-existing" special IP addresses, such as ::5 */
#define ERRNO_IS_DISCONNECT(r) \
IN_SET(r, ENOTCONN, ECONNRESET, ECONNREFUSED, ECONNABORTED, EPIPE, ENETUNREACH)