Add memfd_create(2) and mlock2(2).
These are old enough now that the latest devices will have kernels that
support them.
Also add basic doc comments to <sys/mman.h>.
Test: treehugger
Change-Id: I1b5ff5db0b6270f5c374287cac1d6a751a0259f5
diff --git a/docs/status.md b/docs/status.md
index 2f356f3..e091b16 100644
--- a/docs/status.md
+++ b/docs/status.md
@@ -39,6 +39,7 @@
New libc functions in R (API level 30):
* Full C11 `<threads.h>` (available as inlines for older API levels).
+ * `memfd_create` and `mlock2` (GNU extensions).
* `renameat2` (GNU extension).
New libc functions in Q (API level 29):
diff --git a/libc/SECCOMP_WHITELIST_COMMON.TXT b/libc/SECCOMP_WHITELIST_COMMON.TXT
index a2114dd..07f84a8 100644
--- a/libc/SECCOMP_WHITELIST_COMMON.TXT
+++ b/libc/SECCOMP_WHITELIST_COMMON.TXT
@@ -68,28 +68,24 @@
# Useful new syscalls which we don't yet use in bionic.
#
-# Since Linux 3.14, not in glibc.
-int sched_getattr(pid_t pid, struct sched_attr* attr, unsigned int flags) all
-int sched_setattr(pid_t pid, struct sched_attr* attr, unsigned int size, unsigned int flags) all
-# Since Linux 3.15, glibc 2.27.
-int memfd_create(const char* name, unsigned int flags) all
-# Since Linux 3.19, not in glibc.
-int execveat(int dirfd, const char* pathname, char* const* argv, char* const* envp, int flags) all
-# Since Linux 4.3, not in glibc. Probed for and conditionally used by ART.
-int membarrier(int cmd, int flags) all
-# Since Linux 4.5, glibc 2.27.
-ssize_t copy_file_range(int fd_in, loff_t* off_in, int fd_out, loff_t* off_out, size_t len, unsigned int flags) all
-# Since Linux 4.4, glibc 2.27.
-int mlock2(const void* addr, size_t len, int flags) all
-# Since Linux 4.6, glibc 2.26.
-ssize_t preadv2(int fd, const struct iovec* iov, int iovcnt, off_t offset, int flags) all
-ssize_t pwritev2(int fd, const struct iovec* iov, int iovcnt, off_t offset, int flags) all
# Since Linux 2.5, not in glibc.
int io_setup(unsigned nr, aio_context_t *ctxp) all
int io_destroy(aio_context_t ctx) all
int io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) all
int io_getevents(aio_context_t ctx, long min_nr, long max_nr, struct io_event *events, struct timespec *timeout) all
int io_cancel(aio_context_t ctx, struct iocb *, struct io_event *result) all
+# Since Linux 3.14, not in glibc.
+int sched_getattr(pid_t pid, struct sched_attr* attr, unsigned int flags) all
+int sched_setattr(pid_t pid, struct sched_attr* attr, unsigned int size, unsigned int flags) all
+# Since Linux 3.19, not in glibc (and not really needed to implement fexecve).
+int execveat(int dirfd, const char* pathname, char* const* argv, char* const* envp, int flags) all
+# Since Linux 4.3, not in glibc. Probed for and conditionally used by ART.
+int membarrier(int cmd, int flags) all
+# Since Linux 4.5, glibc 2.27.
+ssize_t copy_file_range(int fd_in, loff_t* off_in, int fd_out, loff_t* off_out, size_t len, unsigned int flags) all
+# Since Linux 4.6, glibc 2.26.
+ssize_t preadv2(int fd, const struct iovec* iov, int iovcnt, off_t offset, int flags) all
+ssize_t pwritev2(int fd, const struct iovec* iov, int iovcnt, off_t offset, int flags) all
# Since Linux 4.11, glibc 2.30.
int statx(int, const char*, int, unsigned int, statx*) all
# Since Linux 5.1, not in glibc.
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 62d698f..21ebdbd 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -107,12 +107,14 @@
int __close:close(int) all
pid_t __getpid:getpid() all
+int memfd_create(const char*, unsigned) all
int munmap(void*, size_t) all
void* __mremap:mremap(void*, size_t, size_t, int, void*) all
int msync(const void*, size_t, int) all
int mprotect(const void*, size_t, int) all
int madvise(void*, size_t, int) all
-int mlock(const void* addr, size_t len) all
+int mlock(const void* addr, size_t len) all
+int mlock2(const void* addr, size_t len, int flags) all
int munlock(const void* addr, size_t len) all
int mlockall(int flags) all
int munlockall() all
diff --git a/libc/include/sys/mman.h b/libc/include/sys/mman.h
index 89d6d07..3b83229 100644
--- a/libc/include/sys/mman.h
+++ b/libc/include/sys/mman.h
@@ -26,34 +26,27 @@
* SUCH DAMAGE.
*/
-#ifndef _SYS_MMAN_H_
-#define _SYS_MMAN_H_
+#pragma once
#include <sys/cdefs.h>
#include <sys/types.h>
+#include <linux/memfd.h>
#include <linux/mman.h>
__BEGIN_DECLS
-#ifndef MAP_ANON
-#define MAP_ANON MAP_ANONYMOUS
-#endif
+/** Alternative spelling of the `MAP_ANONYMOUS` flag for mmap(). */
+#define MAP_ANON MAP_ANONYMOUS
+/** Return value for mmap(). */
#define MAP_FAILED __BIONIC_CAST(reinterpret_cast, void*, -1)
-#define MREMAP_MAYMOVE 1
-#define MREMAP_FIXED 2
-
-/*
- * See https://android.googlesource.com/platform/bionic/+/master/docs/32-bit-abi.md
+/**
+ * [mmap(2)](http://man7.org/linux/man-pages/man2/mmap.2.html)
+ * creates a memory mapping for the given range.
*
- * mmap64 wasn't really around until L, but we added an inline for it since it
- * allows a lot more code to compile with _FILE_OFFSET_BITS=64.
- *
- * GCC removes the static inline unless it is explicitly used. We can get around
- * this with __attribute__((used)), but that needlessly adds a definition of
- * mmap64 to every translation unit that includes this header. Instead, just
- * preserve the old behavior for GCC and emit a useful diagnostic.
+ * Returns the address of the mapping on success,
+ * and returns `MAP_FAILED` and sets `errno` on failure.
*/
#if defined(__USE_FILE_OFFSET64)
void* mmap(void* __addr, size_t __size, int __prot, int __flags, int __fd, off_t __offset) __RENAME(mmap64);
@@ -62,25 +55,126 @@
#endif
#if __ANDROID_API__ >= __ANDROID_API_L__
+/**
+ * mmap64() is a variant of mmap() that takes a 64-bit offset even on LP32.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/docs/32-bit-abi.md
+ *
+ * mmap64 wasn't really around until L, but we added an inline for it since it
+ * allows a lot more code to compile with _FILE_OFFSET_BITS=64.
+ */
void* mmap64(void* __addr, size_t __size, int __prot, int __flags, int __fd, off64_t __offset) __INTRODUCED_IN(21);
#endif
+/**
+ * [munmap(2)](http://man7.org/linux/man-pages/man2/munmap.2.html)
+ * deletes a memory mapping for the given range.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int munmap(void* __addr, size_t __size);
+
+/**
+ * [msync(2)](http://man7.org/linux/man-pages/man2/msync.2.html)
+ * flushes changes to a memory-mapped file to disk.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int msync(void* __addr, size_t __size, int __flags);
+
+/**
+ * [mprotect(2)](http://man7.org/linux/man-pages/man2/mprotect.2.html)
+ * sets the protection on a memory region.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int mprotect(void* __addr, size_t __size, int __prot);
+
+/** Flag for mremap(). */
+#define MREMAP_MAYMOVE 1
+
+/** Flag for mremap(). */
+#define MREMAP_FIXED 2
+
+/**
+ * [mremap(2)](http://man7.org/linux/man-pages/man2/mremap.2.html)
+ * expands or shrinks an existing memory mapping.
+ *
+ * Returns the address of the mapping on success,
+ * and returns `MAP_FAILED` and sets `errno` on failure.
+ */
void* mremap(void* __old_addr, size_t __old_size, size_t __new_size, int __flags, ...);
+/**
+ * [mlockall(2)](http://man7.org/linux/man-pages/man2/mlockall.2.html)
+ * locks pages (preventing swapping).
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int mlockall(int __flags) __INTRODUCED_IN(17);
+
+/**
+ * [munlockall(2)](http://man7.org/linux/man-pages/man2/munlockall.2.html)
+ * unlocks pages (allowing swapping).
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int munlockall(void) __INTRODUCED_IN(17);
+/**
+ * [mlock(2)](http://man7.org/linux/man-pages/man2/mlock.2.html)
+ * locks pages (preventing swapping).
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int mlock(const void* __addr, size_t __size);
+
+/**
+ * [mlock2(2)](http://man7.org/linux/man-pages/man2/mlock.2.html)
+ * locks pages (preventing swapping), with optional flags.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
+int mlock2(const void* __addr, size_t __size, int __flags) __INTRODUCED_IN(30);
+
+/**
+ * [munlock(2)](http://man7.org/linux/man-pages/man2/munlock.2.html)
+ * unlocks pages (allowing swapping).
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int munlock(const void* __addr, size_t __size);
+/**
+ * [mincore(2)](http://man7.org/linux/man-pages/man2/mincore.2.html)
+ * tests whether pages are resident in memory.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int mincore(void* __addr, size_t __size, unsigned char* __vector);
+/**
+ * [madvise(2)](http://man7.org/linux/man-pages/man2/madvise.2.html)
+ * gives the kernel advice about future usage patterns.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int madvise(void* __addr, size_t __size, int __advice);
+#if defined(__USE_GNU)
+
+/**
+ * [memfd_create(2)](http://man7.org/linux/man-pages/man2/memfd_create.2.html)
+ * creates an anonymous file.
+ *
+ * Returns an fd on success, and returns -1 and sets `errno` on failure.
+ */
+int memfd_create(const char* __name, unsigned __flags) __INTRODUCED_IN(30);
+
+#endif
+
#if __ANDROID_API__ >= __ANDROID_API_M__
+
/*
* Some third-party code uses the existence of POSIX_MADV_NORMAL to detect the
* availability of posix_madvise. This is not correct, since having up-to-date
@@ -89,16 +183,30 @@
*
* https://github.com/android-ndk/ndk/issues/395
*/
+
+/** Flag for posix_madvise(). */
#define POSIX_MADV_NORMAL MADV_NORMAL
+/** Flag for posix_madvise(). */
#define POSIX_MADV_RANDOM MADV_RANDOM
+/** Flag for posix_madvise(). */
#define POSIX_MADV_SEQUENTIAL MADV_SEQUENTIAL
+/** Flag for posix_madvise(). */
#define POSIX_MADV_WILLNEED MADV_WILLNEED
+/** Flag for posix_madvise(). */
#define POSIX_MADV_DONTNEED MADV_DONTNEED
+
#endif
+
+/**
+ * [posix_madvise(3)](http://man7.org/linux/man-pages/man3/posix_madvise.3.html)
+ * gives the kernel advice about future usage patterns.
+ *
+ * Returns 0 on success, and returns a positive error number on failure.
+ *
+ * See also madvise() which has been available much longer.
+ */
int posix_madvise(void* __addr, size_t __size, int __advice) __INTRODUCED_IN(23);
__END_DECLS
#include <android/legacy_sys_mman_inlines.h>
-
-#endif
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index c3b9e2c..9b39bb8 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1493,6 +1493,8 @@
cnd_signal;
cnd_timedwait;
cnd_wait;
+ memfd_create;
+ mlock2;
mtx_destroy;
mtx_init;
mtx_lock;
diff --git a/tests/dlext_test.cpp b/tests/dlext_test.cpp
index 67ebf37..59cf2f7 100644
--- a/tests/dlext_test.cpp
+++ b/tests/dlext_test.cpp
@@ -29,9 +29,7 @@
#include <android-base/file.h>
#include <android-base/strings.h>
-#include <linux/memfd.h>
#include <sys/mman.h>
-#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/vfs.h>
#include <sys/wait.h>
@@ -942,7 +940,7 @@
const std::string lib_path = GetTestlibRoot() + "/libtest_simple.so";
// create memfd
- int memfd = syscall(__NR_memfd_create, "foobar", MFD_CLOEXEC);
+ int memfd = memfd_create("foobar", MFD_CLOEXEC);
if (memfd == -1 && errno == ENOSYS) {
return;
}
diff --git a/tests/sys_mman_test.cpp b/tests/sys_mman_test.cpp
index 0b98198..e403ea5 100644
--- a/tests/sys_mman_test.cpp
+++ b/tests/sys_mman_test.cpp
@@ -230,7 +230,10 @@
TEST(sys_mman, mremap_PTRDIFF_MAX) {
void* map = mmap(nullptr, PAGE_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(MAP_FAILED, map);
+
ASSERT_EQ(MAP_FAILED, mremap(map, PAGE_SIZE, kHuge, MREMAP_MAYMOVE));
+
+ ASSERT_EQ(0, munmap(map, PAGE_SIZE));
}
TEST(sys_mman, mmap_bug_27265969) {
@@ -239,3 +242,61 @@
// Some kernels had bugs that would cause segfaults here...
__builtin___clear_cache(base, base + (PAGE_SIZE * 2));
}
+
+TEST(sys_mman, mlock) {
+ void* map = mmap(nullptr, PAGE_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, map);
+
+ // Not really anything we can assert about this.
+ mlock(map, PAGE_SIZE);
+
+ ASSERT_EQ(0, munmap(map, PAGE_SIZE));
+}
+
+TEST(sys_mman, mlock2) {
+#if defined(__GLIBC__)
+ GTEST_SKIP() << "needs glibc 2.27";
+#else
+ void* map = mmap(nullptr, PAGE_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, map);
+
+ // Not really anything we can assert about this.
+ mlock2(map, PAGE_SIZE, MLOCK_ONFAULT);
+
+ ASSERT_EQ(0, munmap(map, PAGE_SIZE));
+#endif
+}
+
+TEST(sys_mman, memfd_create) {
+#if defined(__GLIBC__)
+ GTEST_SKIP() << "needs glibc 2.27";
+#else
+ // Is the MFD_CLOEXEC flag obeyed?
+ errno = 0;
+ int fd = memfd_create("doesn't matter", 0);
+ if (fd == -1) {
+ ASSERT_EQ(ENOSYS, errno);
+ GTEST_SKIP() << "no memfd_create available";
+ }
+ int f = fcntl(fd, F_GETFD);
+ ASSERT_NE(-1, f);
+ ASSERT_FALSE(f & FD_CLOEXEC);
+ close(fd);
+
+ errno = 0;
+ fd = memfd_create("doesn't matter", MFD_CLOEXEC);
+ f = fcntl(fd, F_GETFD);
+ ASSERT_NE(-1, f);
+ ASSERT_TRUE(f & FD_CLOEXEC);
+
+ // Can we read and write?
+ std::string expected("hello, world!");
+ ASSERT_TRUE(android::base::WriteStringToFd(expected, fd));
+ ASSERT_EQ(0, lseek(fd, 0, SEEK_SET));
+ std::string actual;
+ ASSERT_TRUE(android::base::ReadFdToString(fd, &actual));
+ ASSERT_EQ(expected, actual);
+
+ close(fd);
+#endif
+}