#include <time.h>
#include <unistd.h>
#include <pthread.h>
+#include <sys/select.h>
#include <rte_malloc.h>
#include <rte_memcpy.h>
static __thread struct ff_epoll_ctl_args *epoll_ctl_args = NULL;
static __thread struct ff_epoll_wait_args *epoll_wait_args = NULL;
static __thread struct ff_kevent_args *kevent_args = NULL;
+static __thread struct ff_select_args *select_args = NULL;
#define IOV_MAX 16
#define IOV_LEN_MAX 2048
static int nb_procs = NB_FSTACK_INSTANCE_DEFAULT;
#define FF_KERNEL_MAX_FD_DEFAULT 1024
+#if defined(FF_PRELOAD_SUPPORT_SELECT)
+static int ff_kernel_max_fd = FF_KERNEL_MAX_FD_SELECT;
+#else
static int ff_kernel_max_fd = FF_KERNEL_MAX_FD_DEFAULT;
+#endif
/* not support thread socket now */
static int need_alarm_sem = 0;
+#define count_trailing_zeros(x) __builtin_ctzll(x)
+
void __chk_fail(void);
static inline int convert_fstack_fd(int sockfd) {
return sockfd >= ff_kernel_max_fd;
}
+static inline uint64_t
+ff_bitmap_first_set(fd_mask *ai, int set_words)
+{
+ fd_mask i = 0;
+
+ for (; i < set_words; i++) {
+ fd_mask x = ai[i];
+
+ if (x != 0) {
+ return i * NFDBITS + count_trailing_zeros(x);
+ }
+ }
+
+ return ~0;
+}
+
+static inline fd_mask
+ff_bitmap_next_set(fd_mask *ai, fd_mask i, int set_words)
+{
+ fd_mask i0 = i / NFDBITS;
+ fd_mask i1 = i % NFDBITS;
+ fd_mask t;
+
+ if (i0 < set_words) {
+ t = (ai[i0] >> i1) << i1;
+
+ if (t) {
+ return count_trailing_zeros(t) + i0 * NFDBITS;
+ }
+
+ for (i0++; i0 < set_words; i0++) {
+ t = ai[i0];
+ if (t) {
+ return count_trailing_zeros(t) + i0 * NFDBITS;
+ }
+ }
+ }
+
+ return ~0;
+}
+
int
fstack_territory(int domain, int type, int protocol)
{
RETURN_NOFREE();
}
+int
+ff_hook_select(int nfds, fd_set *restrict readfds, fd_set *restrict writefds,
+ fd_set *restrict exceptfds, struct timeval *restrict timeout)
+{
+ int set_words;
+ int set_bytes;
+ int fd;
+ int max_kernel_fd = 0;
+ int max_ff_fd = 0;
+ fd_mask *fds_bit;
+ struct timespec t_s, t_n;
+ time_t now_time_ms = 0;
+ time_t end_time_ms = 0;
+ int ff_set_words, ff_set_bytes;
+ int kernel_set_words, kernel_set_bytes;
+ int kernel_ret = 0;
+
+ DEBUG_LOG("ff_hook_select nfds:%d\n", nfds);
+
+ DEFINE_REQ_ARGS_STATIC(select);
+ static __thread fd_set *ff_readfds_share = NULL;
+ static __thread fd_set *ff_writefds_share = NULL;
+ static __thread fd_set *ff_exceptfds_share = NULL;
+ static __thread fd_set kernel_readfds;
+ static __thread fd_set kernel_writefds;
+ static __thread fd_set kernel_exceptfds;
+ static __thread fd_set ff_readfds;
+ static __thread fd_set ff_writefds;
+ static __thread fd_set ff_exceptfds;
+
+ bool have_ff_readfd = false;
+ bool have_ff_writefd = false;
+ bool have_ff_exceptfd = false;
+ bool have_kernel_fd = false;
+ bool have_exec_kernel_select = false;
+
+ if (nfds > FD_SETSIZE) {
+ nfds = FD_SETSIZE;
+ }
+
+ if (ff_kernel_max_fd >= FD_SETSIZE) {
+ return ff_linux_select(nfds, readfds, writefds, exceptfds, timeout);
+ }
+
+ set_words = (nfds + NFDBITS - 1) / NFDBITS;
+ set_bytes = set_words * sizeof(fd_mask);
+
+ if (ff_readfds_share == NULL) {
+ ff_readfds_share = share_mem_alloc(sizeof(fd_set));
+ if (ff_readfds_share == NULL) {
+ RETURN_ERROR_NOFREE(ENOMEM);
+ }
+ }
+
+ if (ff_writefds_share == NULL) {
+ ff_writefds_share = share_mem_alloc(sizeof(fd_set));
+ if (ff_writefds_share == NULL) {
+ RETURN_ERROR_NOFREE(ENOMEM);
+ }
+ }
+
+ if (ff_exceptfds_share == NULL) {
+ ff_exceptfds_share = share_mem_alloc(sizeof(fd_set));
+ if (ff_exceptfds_share == NULL) {
+ RETURN_ERROR_NOFREE(ENOMEM);
+ }
+ }
+
+ /* We first separate kernel fd and userspace fd. */
+ if (readfds != NULL) {
+ memset(&ff_readfds, 0, set_bytes);
+ memset(&kernel_readfds, 0, set_bytes);
+
+ fds_bit = (fd_mask *)readfds;
+
+ for (fd = ff_bitmap_first_set(fds_bit, set_words); (fd != ~0) && (fd < nfds);
+ fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+ if (is_fstack_fd(fd)) {
+ have_ff_readfd = true;
+ if (restore_fstack_fd(fd) > max_ff_fd) {
+ max_ff_fd = restore_fstack_fd(fd);
+ }
+ FD_SET(restore_fstack_fd(fd), &ff_readfds);
+ } else {
+ have_kernel_fd = true;
+ if (fd > max_kernel_fd) {
+ max_kernel_fd = fd;
+ }
+ FD_SET(fd, &kernel_readfds);
+ }
+ }
+
+ memset(readfds, 0, set_bytes);
+ }
+
+ if (writefds != NULL) {
+ memset(&ff_writefds, 0, set_bytes);
+ memset(&kernel_writefds, 0, set_bytes);
+
+ fds_bit = (fd_mask *)writefds;
+
+ for (fd = ff_bitmap_first_set(fds_bit, set_words); (fd != ~0) && (fd < nfds);
+ fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+ if (is_fstack_fd(fd)) {
+ have_ff_exceptfd = true;
+ if (restore_fstack_fd(fd) > max_ff_fd) {
+ max_ff_fd = restore_fstack_fd(fd);
+ }
+ FD_SET(restore_fstack_fd(fd), &ff_writefds);
+ } else {
+ have_kernel_fd = true;
+ if (fd > max_kernel_fd) {
+ max_kernel_fd = fd;
+ }
+ FD_SET(fd, &kernel_writefds);
+ }
+ }
+
+ memset(writefds, 0, set_bytes);
+ }
+
+ if (exceptfds != NULL) {
+ memset(&ff_exceptfds, 0, set_bytes);
+ memset(&kernel_exceptfds, 0, set_bytes);
+
+ fds_bit = (fd_mask *)exceptfds;
+
+ for (fd = ff_bitmap_first_set(fds_bit, set_words); (fd != ~0) && (fd < nfds);
+ fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+ if (is_fstack_fd(fd)) {
+ have_ff_writefd = true;
+ if (restore_fstack_fd(fd) > max_ff_fd) {
+ max_ff_fd = restore_fstack_fd(fd);
+ }
+ FD_SET(restore_fstack_fd(fd), &ff_exceptfds);
+ } else {
+ have_kernel_fd = true;
+ if (fd > max_kernel_fd) {
+ max_kernel_fd = fd;
+ }
+ FD_SET(fd, &kernel_exceptfds);
+ }
+ }
+
+ memset(exceptfds, 0, set_bytes);
+ }
+
+ if (timeout != NULL) {
+ if (clock_gettime(CLOCK_MONOTONIC_COARSE, &t_s) == -1) {
+ ret = -1;
+ goto select_exit;
+ }
+ end_time_ms = (t_s.tv_sec + timeout->tv_sec) * 1000 + t_s.tv_nsec / 1000000 + timeout->tv_usec / 1000;
+ }
+
+ if (have_ff_readfd || have_ff_writefd || have_ff_exceptfd) {
+ args->nfds = max_ff_fd + 1;
+ ff_set_words = (max_ff_fd + 1 + NFDBITS - 1) / NFDBITS;
+ ff_set_bytes = ff_set_words * sizeof(fd_mask);
+ }
+
+ if (have_kernel_fd) {
+ kernel_set_words = (max_kernel_fd + 1 + NFDBITS - 1) / NFDBITS;
+ kernel_set_bytes = kernel_set_words * sizeof(fd_mask);
+ }
+
+ struct timeval no_block_time = {0, 0};
+
+retry:
+ if (have_ff_readfd) {
+ memcpy(ff_readfds_share, &ff_readfds, ff_set_bytes);
+ args->readfds = ff_readfds_share;
+ } else {
+ args->readfds = NULL;
+ }
+
+ if (have_ff_writefd) {
+ memcpy(ff_writefds_share, &ff_writefds, ff_set_bytes);
+ args->writefds = ff_writefds_share;
+ } else {
+ args->writefds = NULL;
+ }
+
+ if (have_ff_exceptfd) {
+ memcpy(ff_exceptfds_share, &ff_exceptfds, ff_set_bytes);
+ args->exceptfds = ff_exceptfds_share;
+ } else {
+ args->exceptfds = NULL;
+ }
+
+ ACQUIRE_ZONE_LOCK(FF_SC_IDLE);
+ sc->ops = FF_SO_SELECT;
+ sc->args = args;
+
+ /*
+ * sc->result, sc->error must reset in select.
+ * Otherwise can access last sc call's result.
+ */
+ sc->result = 0;
+ sc->error = 0;
+ errno = 0;
+ RELEASE_ZONE_LOCK(FF_SC_REQ);
+
+ do {
+ /*
+ * we call freebsd select.
+ */
+ ACQUIRE_ZONE_TRY_LOCK(FF_SC_REP);
+ ret = sc->result;
+ if (ret < 0) {
+ //occur error, return imme.
+ errno = sc->error;
+ RELEASE_ZONE_LOCK(FF_SC_IDLE);
+ return ret;
+ }
+ RELEASE_ZONE_LOCK(FF_SC_IDLE);
+ if (ret > 0) {
+ goto select_exit;
+ }
+
+ if (have_kernel_fd) {
+ if (readfds)
+ memcpy(readfds, &kernel_readfds, kernel_set_bytes);
+ if (writefds)
+ memcpy(writefds, &kernel_writefds, kernel_set_bytes);
+ if (exceptfds)
+ memcpy(exceptfds, &kernel_exceptfds, kernel_set_bytes);
+
+ kernel_ret = ff_linux_select(max_kernel_fd + 1, readfds, writefds, exceptfds, &no_block_time);
+ have_exec_kernel_select = true;
+ if (kernel_ret < 0) {
+ //occur error, return imme.
+ ret = kernel_ret;
+ goto select_exit;
+ }
+ }
+
+ if (timeout) {
+ if (timeout->tv_sec == 0 && timeout->tv_usec == 0) {
+ goto select_exit;
+ } else {
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &t_n);
+ now_time_ms = t_n.tv_sec * 1000 + t_n.tv_nsec / 1000000;
+
+ if (now_time_ms >= end_time_ms) {
+ goto select_exit;
+ }
+
+ goto retry;
+ }
+ } else {
+ goto retry;
+ }
+ } while(true);
+
+select_exit:
+ if (have_kernel_fd && have_exec_kernel_select == false) {
+ /*
+ * Not been called even once.
+ */
+ if (readfds)
+ memcpy(readfds, &kernel_readfds, kernel_set_bytes);
+ if (writefds)
+ memcpy(writefds, &kernel_writefds, kernel_set_bytes);
+ if (exceptfds)
+ memcpy(exceptfds, &kernel_exceptfds, kernel_set_bytes);
+
+ kernel_ret = ff_linux_select(max_kernel_fd + 1, readfds, writefds, exceptfds, &no_block_time);
+ if (kernel_ret < 0) {
+ //occur error, return imme.
+ return kernel_ret;
+ }
+ }
+
+ /* if have ff_event, set to intput read/write/except fds */
+ if (likely(ret > 0)) {
+ if (have_ff_readfd) {
+ fds_bit = (fd_mask *)ff_readfds_share;
+
+ for (fd = ff_bitmap_first_set(fds_bit, set_words); fd != ~0;
+ fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+ FD_SET(convert_fstack_fd(fd), readfds);
+ }
+ }
+
+ if (have_ff_writefd) {
+ fds_bit = (fd_mask *)ff_writefds_share;
+
+ for (fd = ff_bitmap_first_set(fds_bit, set_words); fd != ~0;
+ fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+ FD_SET(convert_fstack_fd(fd), writefds);
+ }
+ }
+
+ if (have_ff_exceptfd) {
+ fds_bit = (fd_mask *)ff_exceptfds_share;
+
+ for (fd = ff_bitmap_first_set(fds_bit, set_words); fd != ~0;
+ fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+ FD_SET(convert_fstack_fd(fd), exceptfds);
+ }
+ }
+ }
+
+ ret += kernel_ret;
+
+ RETURN_NOFREE();
+}
+
static void
thread_destructor(void *sc)
{
ERR_LOG("getrlimit(RLIMIT_NOFILE) failed, use default ff_kernel_max_fd:%d\n", ff_kernel_max_fd);
return -1;
} else {
+#if !defined(FF_PRELOAD_SUPPORT_SELECT)
ff_kernel_max_fd = (int)rlmt.rlim_cur;
+#endif
}
ERR_LOG("getrlimit(RLIMIT_NOFILE) successed, sed ff_kernel_max_fd:%d, and rlim_max is %ld\n",
ff_kernel_max_fd, rlmt.rlim_max);