]> git.feebdaed.xyz Git - 0xmirror/f-stack.git/commitdiff
Update ff_hook_syscall.c
authorliujinhui-job <liujinhui@kylinos.cn>
Sun, 8 Jun 2025 15:57:27 +0000 (23:57 +0800)
committerGitHub <noreply@github.com>
Sun, 8 Jun 2025 15:57:27 +0000 (23:57 +0800)
adapter/syscall/ff_hook_syscall.c

index 165471a1d59175ed945a1708ac91915d159fc69c..6d63f806a88b332b3497439d14452ae1f5394b03 100644 (file)
@@ -7,6 +7,7 @@
 #include <time.h>
 #include <unistd.h>
 #include <pthread.h>
+#include <sys/select.h>
 
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
@@ -86,6 +87,7 @@ static __thread struct ff_fcntl_args *fcntl_args = NULL;
 static __thread struct ff_epoll_ctl_args *epoll_ctl_args = NULL;
 static __thread struct ff_epoll_wait_args *epoll_wait_args = NULL;
 static __thread struct ff_kevent_args *kevent_args = NULL;
+static __thread struct ff_select_args *select_args = NULL;
 
 #define IOV_MAX   16
 #define IOV_LEN_MAX     2048
@@ -235,11 +237,17 @@ rte_spinlock_t worker_id_lock;
 static int nb_procs = NB_FSTACK_INSTANCE_DEFAULT;
 
 #define FF_KERNEL_MAX_FD_DEFAULT    1024
+#if defined(FF_PRELOAD_SUPPORT_SELECT)
+static int ff_kernel_max_fd = FF_KERNEL_MAX_FD_SELECT;
+#else
 static int ff_kernel_max_fd = FF_KERNEL_MAX_FD_DEFAULT;
+#endif
 
 /* not support thread socket now */
 static int need_alarm_sem = 0;
 
+#define count_trailing_zeros(x) __builtin_ctzll(x)
+
 void __chk_fail(void);
 
 static inline int convert_fstack_fd(int sockfd) {
@@ -264,6 +272,47 @@ int is_fstack_fd(int sockfd) {
     return sockfd >= ff_kernel_max_fd;
 }
 
+static inline uint64_t 
+ff_bitmap_first_set(fd_mask *ai, int set_words) 
+{
+    fd_mask i = 0;
+
+    for (; i < set_words; i++) {
+        fd_mask x = ai[i];
+
+        if (x != 0) {
+            return i * NFDBITS + count_trailing_zeros(x);
+        }
+    }
+
+    return ~0;
+}
+
+static inline fd_mask 
+ff_bitmap_next_set(fd_mask *ai, fd_mask i, int set_words) 
+{
+    fd_mask i0 = i / NFDBITS;
+    fd_mask i1 = i % NFDBITS;
+    fd_mask t;
+
+    if (i0 < set_words) {
+        t = (ai[i0] >> i1) << i1;
+        
+        if (t) {
+            return count_trailing_zeros(t) + i0 * NFDBITS;
+        }
+        
+        for (i0++; i0 < set_words; i0++) {
+            t = ai[i0];
+            if (t) {
+                return count_trailing_zeros(t) + i0 * NFDBITS;
+            }
+        }
+    }
+
+    return ~0;
+}
+
 int
 fstack_territory(int domain, int type, int protocol)
 {
@@ -2558,6 +2607,315 @@ kevent(int kq, const struct kevent *changelist, int nchanges,
     RETURN_NOFREE();
 }
 
+int
+ff_hook_select(int nfds, fd_set *restrict readfds, fd_set *restrict writefds,
+    fd_set *restrict exceptfds, struct timeval *restrict timeout)
+{
+    int set_words;
+    int set_bytes;
+    int fd;
+    int max_kernel_fd = 0;
+    int max_ff_fd = 0;
+    fd_mask *fds_bit;
+    struct timespec t_s, t_n;
+    time_t now_time_ms = 0;
+    time_t end_time_ms = 0;
+    int ff_set_words, ff_set_bytes;
+    int kernel_set_words, kernel_set_bytes;
+    int kernel_ret = 0;
+    
+    DEBUG_LOG("ff_hook_select nfds:%d\n", nfds);
+
+    DEFINE_REQ_ARGS_STATIC(select);
+    static __thread fd_set *ff_readfds_share = NULL;
+    static __thread fd_set *ff_writefds_share = NULL;
+    static __thread fd_set *ff_exceptfds_share = NULL;
+    static __thread fd_set kernel_readfds;
+    static __thread fd_set kernel_writefds;
+    static __thread fd_set kernel_exceptfds;
+    static __thread fd_set ff_readfds;
+    static __thread fd_set ff_writefds;
+    static __thread fd_set ff_exceptfds;
+
+    bool have_ff_readfd = false;
+    bool have_ff_writefd = false;
+    bool have_ff_exceptfd = false;
+    bool have_kernel_fd = false;
+    bool have_exec_kernel_select = false;
+
+    if (nfds > FD_SETSIZE) {
+        nfds = FD_SETSIZE;
+    }
+
+    if (ff_kernel_max_fd >= FD_SETSIZE) {
+        return ff_linux_select(nfds, readfds, writefds, exceptfds, timeout);
+    }
+
+    set_words = (nfds + NFDBITS - 1) / NFDBITS;
+    set_bytes = set_words * sizeof(fd_mask);
+
+    if (ff_readfds_share == NULL) {
+        ff_readfds_share = share_mem_alloc(sizeof(fd_set));
+        if (ff_readfds_share == NULL) {
+            RETURN_ERROR_NOFREE(ENOMEM);
+        }
+    }
+
+    if (ff_writefds_share == NULL) {
+        ff_writefds_share = share_mem_alloc(sizeof(fd_set));
+        if (ff_writefds_share == NULL) {
+            RETURN_ERROR_NOFREE(ENOMEM);
+        }
+    }
+
+    if (ff_exceptfds_share == NULL) {
+        ff_exceptfds_share = share_mem_alloc(sizeof(fd_set));
+        if (ff_exceptfds_share == NULL) {
+            RETURN_ERROR_NOFREE(ENOMEM);
+        }
+    }
+
+    /* We first separate kernel fd and userspace fd. */
+    if (readfds != NULL) {
+        memset(&ff_readfds, 0, set_bytes);
+        memset(&kernel_readfds, 0, set_bytes);
+
+        fds_bit = (fd_mask *)readfds;
+
+        for (fd = ff_bitmap_first_set(fds_bit, set_words); (fd != ~0) && (fd < nfds); 
+             fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+            if (is_fstack_fd(fd)) {
+                have_ff_readfd = true;
+                if (restore_fstack_fd(fd) > max_ff_fd) {
+                    max_ff_fd = restore_fstack_fd(fd);
+                }
+                FD_SET(restore_fstack_fd(fd), &ff_readfds);
+            } else {
+                have_kernel_fd = true;
+                if (fd > max_kernel_fd) {
+                    max_kernel_fd = fd;
+                }
+                FD_SET(fd, &kernel_readfds);
+            }
+        }
+
+        memset(readfds, 0, set_bytes);
+    }
+
+    if (writefds != NULL) {
+        memset(&ff_writefds, 0, set_bytes);
+        memset(&kernel_writefds, 0, set_bytes);
+
+        fds_bit = (fd_mask *)writefds;
+
+        for (fd = ff_bitmap_first_set(fds_bit, set_words); (fd != ~0) && (fd < nfds); 
+             fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+            if (is_fstack_fd(fd)) {
+                have_ff_exceptfd = true;
+                if (restore_fstack_fd(fd) > max_ff_fd) {
+                    max_ff_fd = restore_fstack_fd(fd);
+                }
+                FD_SET(restore_fstack_fd(fd), &ff_writefds);
+            } else {
+                have_kernel_fd = true;
+                if (fd > max_kernel_fd) {
+                    max_kernel_fd = fd;
+                }
+                FD_SET(fd, &kernel_writefds);
+            }
+        }
+
+        memset(writefds, 0, set_bytes);
+    }
+
+    if (exceptfds != NULL) {
+        memset(&ff_exceptfds, 0, set_bytes);
+        memset(&kernel_exceptfds, 0, set_bytes);
+
+        fds_bit = (fd_mask *)exceptfds;
+
+        for (fd = ff_bitmap_first_set(fds_bit, set_words); (fd != ~0) && (fd < nfds); 
+             fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+            if (is_fstack_fd(fd)) {
+                have_ff_writefd = true;
+                if (restore_fstack_fd(fd) > max_ff_fd) {
+                    max_ff_fd = restore_fstack_fd(fd);
+                }
+                FD_SET(restore_fstack_fd(fd), &ff_exceptfds);
+            } else {
+                have_kernel_fd = true;
+                if (fd > max_kernel_fd) {
+                    max_kernel_fd = fd;
+                }
+                FD_SET(fd, &kernel_exceptfds);
+            }
+        }
+
+        memset(exceptfds, 0, set_bytes);
+    }
+
+    if (timeout != NULL) {
+        if (clock_gettime(CLOCK_MONOTONIC_COARSE, &t_s) == -1) {
+            ret = -1;
+            goto select_exit;
+        }
+        end_time_ms = (t_s.tv_sec + timeout->tv_sec) * 1000 + t_s.tv_nsec / 1000000 + timeout->tv_usec / 1000;
+    }    
+
+    if (have_ff_readfd || have_ff_writefd || have_ff_exceptfd) {
+        args->nfds = max_ff_fd + 1;
+        ff_set_words = (max_ff_fd + 1 + NFDBITS - 1) / NFDBITS;
+        ff_set_bytes = ff_set_words * sizeof(fd_mask);
+    }
+
+    if (have_kernel_fd) {
+        kernel_set_words = (max_kernel_fd + 1 + NFDBITS - 1) / NFDBITS;
+        kernel_set_bytes = kernel_set_words * sizeof(fd_mask);
+    }
+
+    struct timeval no_block_time = {0, 0};
+
+retry:
+    if (have_ff_readfd) {
+        memcpy(ff_readfds_share, &ff_readfds, ff_set_bytes);
+        args->readfds = ff_readfds_share;
+    } else {
+        args->readfds = NULL;
+    }
+
+    if (have_ff_writefd) {
+        memcpy(ff_writefds_share, &ff_writefds, ff_set_bytes);
+        args->writefds = ff_writefds_share;
+    } else {
+        args->writefds = NULL;
+    }
+
+    if (have_ff_exceptfd) {
+        memcpy(ff_exceptfds_share, &ff_exceptfds, ff_set_bytes);
+        args->exceptfds = ff_exceptfds_share;
+    } else {
+        args->exceptfds = NULL;
+    }
+    
+    ACQUIRE_ZONE_LOCK(FF_SC_IDLE);
+    sc->ops = FF_SO_SELECT;
+    sc->args = args;
+
+    /*
+     * sc->result, sc->error must reset in select.
+     * Otherwise can access last sc call's result.
+     */
+    sc->result = 0;
+    sc->error = 0;
+    errno = 0;
+    RELEASE_ZONE_LOCK(FF_SC_REQ);
+
+    do {
+        /*
+         * we call freebsd select.
+         */
+        ACQUIRE_ZONE_TRY_LOCK(FF_SC_REP);
+        ret = sc->result;
+        if (ret < 0) {
+            //occur error, return imme.
+            errno = sc->error;
+            RELEASE_ZONE_LOCK(FF_SC_IDLE);
+            return ret;
+        }
+        RELEASE_ZONE_LOCK(FF_SC_IDLE);
+        if (ret > 0) {
+            goto select_exit;
+        }
+
+        if (have_kernel_fd) {
+            if (readfds)
+                memcpy(readfds, &kernel_readfds, kernel_set_bytes);
+            if (writefds)
+                memcpy(writefds, &kernel_writefds, kernel_set_bytes);
+            if (exceptfds)
+                memcpy(exceptfds, &kernel_exceptfds, kernel_set_bytes);
+            
+            kernel_ret = ff_linux_select(max_kernel_fd + 1, readfds, writefds, exceptfds, &no_block_time);
+            have_exec_kernel_select = true;
+            if (kernel_ret < 0) {
+                //occur error, return imme.
+                ret = kernel_ret;
+                goto select_exit;
+            }
+        }
+
+        if (timeout) {
+            if (timeout->tv_sec == 0 && timeout->tv_usec == 0) {
+                goto select_exit;
+            } else {
+                clock_gettime(CLOCK_MONOTONIC_COARSE, &t_n);
+                now_time_ms = t_n.tv_sec * 1000 + t_n.tv_nsec / 1000000;
+
+                if (now_time_ms >= end_time_ms) {
+                    goto select_exit;
+                }
+
+                goto retry;
+            }
+        } else {
+            goto retry;
+        }
+    } while(true);
+
+select_exit:
+    if (have_kernel_fd && have_exec_kernel_select == false) {
+        /* 
+         * Not been called even once.
+         */
+        if (readfds)
+            memcpy(readfds, &kernel_readfds, kernel_set_bytes);
+        if (writefds)
+            memcpy(writefds, &kernel_writefds, kernel_set_bytes);
+        if (exceptfds)
+            memcpy(exceptfds, &kernel_exceptfds, kernel_set_bytes);
+        
+        kernel_ret = ff_linux_select(max_kernel_fd + 1, readfds, writefds, exceptfds, &no_block_time);
+        if (kernel_ret < 0) {
+            //occur error, return imme.
+            return kernel_ret;
+        }
+    }
+
+    /* if have ff_event, set to intput read/write/except fds */
+    if (likely(ret > 0)) {
+        if (have_ff_readfd) {
+            fds_bit = (fd_mask *)ff_readfds_share;
+
+            for (fd = ff_bitmap_first_set(fds_bit, set_words); fd != ~0; 
+                 fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+                FD_SET(convert_fstack_fd(fd), readfds);
+            }
+        }
+
+        if (have_ff_writefd) {
+            fds_bit = (fd_mask *)ff_writefds_share;
+
+            for (fd = ff_bitmap_first_set(fds_bit, set_words); fd != ~0; 
+                 fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+                FD_SET(convert_fstack_fd(fd), writefds);
+            }
+        }
+
+        if (have_ff_exceptfd) {
+            fds_bit = (fd_mask *)ff_exceptfds_share;
+
+            for (fd = ff_bitmap_first_set(fds_bit, set_words); fd != ~0; 
+                 fd = ff_bitmap_next_set(fds_bit, fd + 1, set_words)) {
+                FD_SET(convert_fstack_fd(fd), exceptfds);
+            }
+        }
+    }
+
+    ret += kernel_ret;
+    
+    RETURN_NOFREE();
+}
+
 static void
 thread_destructor(void *sc)
 {
@@ -2710,7 +3068,9 @@ ff_adapter_init()
             ERR_LOG("getrlimit(RLIMIT_NOFILE) failed, use default ff_kernel_max_fd:%d\n", ff_kernel_max_fd);
             return -1;
         } else {
+#if !defined(FF_PRELOAD_SUPPORT_SELECT)
             ff_kernel_max_fd = (int)rlmt.rlim_cur;
+#endif
         }
         ERR_LOG("getrlimit(RLIMIT_NOFILE) successed, sed ff_kernel_max_fd:%d, and rlim_max is %ld\n",
             ff_kernel_max_fd, rlmt.rlim_max);