+++ /dev/null
-/*
- * Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <assert.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <errno.h>
-
-#include <rte_common.h>
-#include <rte_byteorder.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memcpy.h>
-#include <rte_memzone.h>
-#include <rte_config.h>
-#include <rte_eal.h>
-#include <rte_pci.h>
-#include <rte_mbuf.h>
-#include <rte_memory.h>
-#include <rte_lcore.h>
-#include <rte_launch.h>
-#include <rte_ethdev.h>
-#include <rte_debug.h>
-#include <rte_common.h>
-#include <rte_ether.h>
-#include <rte_malloc.h>
-#include <rte_cycles.h>
-#include <rte_timer.h>
-#include <rte_thash.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
-#include <rte_udp.h>
-#include <rte_eth_bond.h>
-#include <rte_eth_bond_8023ad.h>
-#include <rte_hash.h>
-
-#include "ff_dpdk_if.h"
-#include "ff_dpdk_pcap.h"
-#include "ff_dpdk_kni.h"
-#include "ff_config.h"
-#include "ff_veth.h"
-#include "ff_host_interface.h"
-#include "ff_msg.h"
-#include "ff_api.h"
-#include "ff_memory.h"
-
-#ifdef FF_KNI
-#define KNI_MBUF_MAX 2048
-#define KNI_QUEUE_SIZE KNI_MBUF_MAX
-
-int enable_kni = 0;
-static int kni_accept;
-static int knictl_action = FF_KNICTL_ACTION_DEFAULT;
-#endif
-int nb_dev_ports = 0; /* primary is correct, secondary is not correct, but no impact now*/
-
-static int numa_on;
-
-static unsigned idle_sleep;
-static unsigned pkt_tx_delay;
-static uint64_t usr_cb_tsc;
-static int stop_loop;
-
-static struct rte_timer freebsd_clock;
-
-// Mellanox Linux's driver key
-static uint8_t default_rsskey_40bytes[40] = {
- 0xd1, 0x81, 0xc6, 0x2c, 0xf7, 0xf4, 0xdb, 0x5b,
- 0x19, 0x83, 0xa2, 0xfc, 0x94, 0x3e, 0x1a, 0xdb,
- 0xd9, 0x38, 0x9e, 0x6b, 0xd1, 0x03, 0x9c, 0x2c,
- 0xa7, 0x44, 0x99, 0xad, 0x59, 0x3d, 0x56, 0xd9,
- 0xf3, 0x25, 0x3c, 0x06, 0x2a, 0xdc, 0x1f, 0xfc
-};
-
-static uint8_t default_rsskey_52bytes[52] = {
- 0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23,
- 0xb6, 0x75, 0xea, 0x5b, 0x12, 0x4f, 0x9f, 0x30,
- 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02,
- 0xa0, 0x8c, 0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c,
- 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8, 0x55,
- 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e,
- 0x81, 0x15, 0x03, 0x66
-};
-
-static uint8_t symmetric_rsskey[52] = {
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a
-};
-
-static int rsskey_len = sizeof(default_rsskey_40bytes);
-static uint8_t *rsskey = default_rsskey_40bytes;
-
-struct lcore_conf lcore_conf;
-
-struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
-
-static pcblddr_func_t pcblddr_fun;
-
-static struct rte_ring **dispatch_ring[RTE_MAX_ETHPORTS];
-static dispatch_func_t packet_dispatcher;
-
-static uint16_t rss_reta_size[RTE_MAX_ETHPORTS];
-
-#define BOND_DRIVER_NAME "net_bonding"
-
-static inline int send_single_packet(struct rte_mbuf *m, uint8_t port);
-
-struct ff_msg_ring {
- char ring_name[FF_MSG_NUM][RTE_RING_NAMESIZE];
- /* ring[0] for lcore recv msg, other send */
- /* ring[1] for lcore send msg, other read */
- struct rte_ring *ring[FF_MSG_NUM];
-} __rte_cache_aligned;
-
-static struct ff_msg_ring msg_ring[RTE_MAX_LCORE];
-static struct rte_mempool *message_pool;
-static struct ff_dpdk_if_context *veth_ctx[RTE_MAX_ETHPORTS];
-
-static struct ff_top_args ff_top_status;
-static struct ff_traffic_args ff_traffic;
-extern void ff_hardclock(void);
-
-static void
-ff_hardclock_job(__rte_unused struct rte_timer *timer,
- __rte_unused void *arg) {
- ff_hardclock();
- ff_update_current_ts();
-}
-
-struct ff_dpdk_if_context *
-ff_dpdk_register_if(void *sc, void *ifp, struct ff_port_cfg *cfg)
-{
- struct ff_dpdk_if_context *ctx;
-
- ctx = calloc(1, sizeof(struct ff_dpdk_if_context));
- if (ctx == NULL)
- return NULL;
-
- ctx->sc = sc;
- ctx->ifp = ifp;
- ctx->port_id = cfg->port_id;
- ctx->hw_features = cfg->hw_features;
-
- return ctx;
-}
-
-void
-ff_dpdk_deregister_if(struct ff_dpdk_if_context *ctx)
-{
- free(ctx);
-}
-
-static void
-check_all_ports_link_status(void)
-{
- #define CHECK_INTERVAL 100 /* 100ms */
- #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
-
- uint16_t portid;
- uint8_t count, all_ports_up, print_flag = 0;
- struct rte_eth_link link;
-
- printf("\nChecking link status");
- fflush(stdout);
-
- int i, nb_ports;
- nb_ports = ff_global_cfg.dpdk.nb_ports;
- for (count = 0; count <= MAX_CHECK_TIME; count++) {
- all_ports_up = 1;
- for (i = 0; i < nb_ports; i++) {
- uint16_t portid = ff_global_cfg.dpdk.portid_list[i];
- memset(&link, 0, sizeof(link));
- rte_eth_link_get_nowait(portid, &link);
-
- /* print link status if flag set */
- if (print_flag == 1) {
- if (link.link_status) {
- printf("Port %d Link Up - speed %u "
- "Mbps - %s\n", (int)portid,
- (unsigned)link.link_speed,
- (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
- ("full-duplex") : ("half-duplex\n"));
- } else {
- printf("Port %d Link Down\n", (int)portid);
- }
- continue;
- }
- /* clear all_ports_up flag if any link down */
- if (link.link_status == 0) {
- all_ports_up = 0;
- break;
- }
- }
-
- /* after finally printing all link status, get out */
- if (print_flag == 1)
- break;
-
- if (all_ports_up == 0) {
- printf(".");
- fflush(stdout);
- rte_delay_ms(CHECK_INTERVAL);
- }
-
- /* set the print_flag if all ports up or timeout */
- if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
- print_flag = 1;
- printf("done\n");
- }
- }
-}
-
-static int
-init_lcore_conf(void)
-{
- if (nb_dev_ports == 0) {
- nb_dev_ports = rte_eth_dev_count_avail();
- }
- if (nb_dev_ports == 0) {
- rte_exit(EXIT_FAILURE, "No probed ethernet devices\n");
- }
-
- if (ff_global_cfg.dpdk.max_portid >= nb_dev_ports) {
- rte_exit(EXIT_FAILURE, "this machine doesn't have port %d.\n",
- ff_global_cfg.dpdk.max_portid);
- }
-
- lcore_conf.port_cfgs = ff_global_cfg.dpdk.port_cfgs;
- lcore_conf.proc_id = ff_global_cfg.dpdk.proc_id;
-
- uint16_t socket_id = 0;
- if (numa_on) {
- socket_id = rte_lcore_to_socket_id(rte_lcore_id());
- }
-
- lcore_conf.socket_id = socket_id;
-
- uint16_t lcore_id = ff_global_cfg.dpdk.proc_lcore[lcore_conf.proc_id];
- if (!rte_lcore_is_enabled(lcore_id)) {
- rte_exit(EXIT_FAILURE, "lcore %u unavailable\n", lcore_id);
- }
-
- int j;
- for (j = 0; j < ff_global_cfg.dpdk.nb_ports; ++j) {
- uint16_t port_id = ff_global_cfg.dpdk.portid_list[j];
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id];
-
- int queueid = -1;
- int i;
- for (i = 0; i < pconf->nb_lcores; i++) {
- if (pconf->lcore_list[i] == lcore_id) {
- queueid = i;
- }
- }
- if (queueid < 0) {
- continue;
- }
- printf("lcore: %u, port: %u, queue: %u\n", lcore_id, port_id, queueid);
- uint16_t nb_rx_queue = lcore_conf.nb_rx_queue;
- lcore_conf.rx_queue_list[nb_rx_queue].port_id = port_id;
- lcore_conf.rx_queue_list[nb_rx_queue].queue_id = queueid;
- lcore_conf.nb_rx_queue++;
-
- lcore_conf.tx_queue_id[port_id] = queueid;
- lcore_conf.tx_port_id[lcore_conf.nb_tx_port] = port_id;
- lcore_conf.nb_tx_port++;
-
- /* Enable pcap dump */
- if (ff_global_cfg.pcap.enable) {
- ff_enable_pcap(ff_global_cfg.pcap.save_path, ff_global_cfg.pcap.snap_len);
- }
-
- lcore_conf.nb_queue_list[port_id] = pconf->nb_lcores;
- }
-
- if (lcore_conf.nb_rx_queue == 0) {
- rte_exit(EXIT_FAILURE, "lcore %u has nothing to do\n", lcore_id);
- }
-
- return 0;
-}
-
-static int
-init_mem_pool(void)
-{
- uint8_t nb_ports = ff_global_cfg.dpdk.nb_ports;
- uint32_t nb_lcores = ff_global_cfg.dpdk.nb_procs;
- uint32_t nb_tx_queue = nb_lcores;
- uint32_t nb_rx_queue = lcore_conf.nb_rx_queue * nb_lcores;
- uint16_t max_portid = ff_global_cfg.dpdk.max_portid;
-
- unsigned nb_mbuf = RTE_ALIGN_CEIL (
- (nb_rx_queue * (max_portid + 1) * 2 * RX_QUEUE_SIZE +
- nb_ports * (max_portid + 1) * 2 * nb_lcores * MAX_PKT_BURST +
- nb_ports * (max_portid + 1) * 2 * nb_tx_queue * TX_QUEUE_SIZE +
- nb_lcores * MEMPOOL_CACHE_SIZE +
-#ifdef FF_KNI
- nb_ports * KNI_MBUF_MAX +
- nb_ports * KNI_QUEUE_SIZE +
-#endif
- nb_lcores * nb_ports * DISPATCH_RING_SIZE),
- (unsigned)8192);
-
- unsigned socketid = 0;
- uint16_t i, lcore_id;
- char s[64];
-
- for (i = 0; i < ff_global_cfg.dpdk.nb_procs; i++) {
- lcore_id = ff_global_cfg.dpdk.proc_lcore[i];
- if (numa_on) {
- socketid = rte_lcore_to_socket_id(lcore_id);
- }
-
- if (socketid >= NB_SOCKETS) {
- rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
- socketid, i, NB_SOCKETS);
- }
-
- if (pktmbuf_pool[socketid] != NULL) {
- continue;
- }
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
- pktmbuf_pool[socketid] =
- rte_pktmbuf_pool_create(s, nb_mbuf,
- MEMPOOL_CACHE_SIZE, 0,
- RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
- } else {
- snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
- pktmbuf_pool[socketid] = rte_mempool_lookup(s);
- }
-
- if (pktmbuf_pool[socketid] == NULL) {
- rte_exit(EXIT_FAILURE, "Cannot create mbuf pool on socket %d\n", socketid);
- } else {
- printf("create mbuf pool on socket %d\n", socketid);
- }
-
-#ifdef FF_USE_PAGE_ARRAY
- nb_mbuf = RTE_ALIGN_CEIL (
- nb_ports*nb_lcores*MAX_PKT_BURST +
- nb_ports*nb_tx_queue*TX_QUEUE_SIZE +
- nb_lcores*MEMPOOL_CACHE_SIZE,
- (unsigned)4096);
- ff_init_ref_pool(nb_mbuf, socketid);
-#endif
- }
-
- return 0;
-}
-
-static struct rte_ring *
-create_ring(const char *name, unsigned count, int socket_id, unsigned flags)
-{
- struct rte_ring *ring;
-
- if (name == NULL) {
- rte_exit(EXIT_FAILURE, "create ring failed, no name!\n");
- }
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- ring = rte_ring_create(name, count, socket_id, flags);
- } else {
- ring = rte_ring_lookup(name);
- }
-
- if (ring == NULL) {
- rte_exit(EXIT_FAILURE, "create ring:%s failed!\n", name);
- }
-
- return ring;
-}
-
-static int
-init_dispatch_ring(void)
-{
- int j;
- char name_buf[RTE_RING_NAMESIZE];
- int queueid;
-
- unsigned socketid = lcore_conf.socket_id;
-
- /* Create ring according to ports actually being used. */
- int nb_ports = ff_global_cfg.dpdk.nb_ports;
- for (j = 0; j < nb_ports; j++) {
- uint16_t portid = ff_global_cfg.dpdk.portid_list[j];
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[portid];
- int nb_queues = pconf->nb_lcores;
- if (dispatch_ring[portid] == NULL) {
- snprintf(name_buf, RTE_RING_NAMESIZE, "ring_ptr_p%d", portid);
-
- dispatch_ring[portid] = rte_zmalloc(name_buf,
- sizeof(struct rte_ring *) * nb_queues,
- RTE_CACHE_LINE_SIZE);
- if (dispatch_ring[portid] == NULL) {
- rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) "
- "failed\n", name_buf);
- }
- }
-
- for(queueid = 0; queueid < nb_queues; ++queueid) {
- snprintf(name_buf, RTE_RING_NAMESIZE, "dispatch_ring_p%d_q%d",
- portid, queueid);
- dispatch_ring[portid][queueid] = create_ring(name_buf,
- DISPATCH_RING_SIZE, socketid, RING_F_SC_DEQ);
-
- if (dispatch_ring[portid][queueid] == NULL)
- rte_panic("create ring:%s failed!\n", name_buf);
-
- printf("create ring:%s success, %u ring entries are now free!\n",
- name_buf, rte_ring_free_count(dispatch_ring[portid][queueid]));
- }
- }
-
- return 0;
-}
-
-static void
-ff_msg_init(struct rte_mempool *mp,
- __attribute__((unused)) void *opaque_arg,
- void *obj, __attribute__((unused)) unsigned i)
-{
- struct ff_msg *msg = (struct ff_msg *)obj;
- msg->msg_type = FF_UNKNOWN;
- msg->buf_addr = (char *)msg + sizeof(struct ff_msg);
- msg->buf_len = mp->elt_size - sizeof(struct ff_msg);
- msg->original_buf = NULL;
- msg->original_buf_len = 0;
-}
-
-static int
-init_msg_ring(void)
-{
- uint16_t i, j;
- uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs;
- unsigned socketid = lcore_conf.socket_id;
-
- /* Create message buffer pool */
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- message_pool = rte_mempool_create(FF_MSG_POOL,
- MSG_RING_SIZE * 2 * nb_procs,
- MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0,
- NULL, NULL, ff_msg_init, NULL,
- socketid, 0);
- } else {
- message_pool = rte_mempool_lookup(FF_MSG_POOL);
- }
-
- if (message_pool == NULL) {
- rte_panic("Create msg mempool failed\n");
- }
-
- for(i = 0; i < nb_procs; ++i) {
- snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE,
- "%s%u", FF_MSG_RING_IN, i);
- msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0],
- MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ);
- if (msg_ring[i].ring[0] == NULL)
- rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]);
-
- for (j = FF_SYSCTL; j < FF_MSG_NUM; j++) {
- snprintf(msg_ring[i].ring_name[j], RTE_RING_NAMESIZE,
- "%s%u_%u", FF_MSG_RING_OUT, i, j);
- msg_ring[i].ring[j] = create_ring(msg_ring[i].ring_name[j],
- MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ);
- if (msg_ring[i].ring[j] == NULL)
- rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[j]);
- }
- }
-
- return 0;
-}
-
-#ifdef FF_KNI
-
-static enum FF_KNICTL_CMD get_kni_action(const char *c){
- if (!c)
- return FF_KNICTL_ACTION_DEFAULT;
- if (0 == strcasecmp(c, "alltokni")){
- return FF_KNICTL_ACTION_ALL_TO_KNI;
- } else if (0 == strcasecmp(c, "alltoff")){
- return FF_KNICTL_ACTION_ALL_TO_FF;
- } else if (0 == strcasecmp(c, "default")){
- return FF_KNICTL_ACTION_DEFAULT;
- } else {
- return FF_KNICTL_ACTION_DEFAULT;
- }
-}
-
-static int
-init_kni(void)
-{
- int nb_ports = nb_dev_ports;
-
- kni_accept = 0;
-
- if(strcasecmp(ff_global_cfg.kni.method, "accept") == 0)
- kni_accept = 1;
-
- knictl_action = get_kni_action(ff_global_cfg.kni.kni_action);
-
- ff_kni_init(nb_ports, ff_global_cfg.kni.type, ff_global_cfg.kni.tcp_port,
- ff_global_cfg.kni.udp_port);
-
- unsigned socket_id = lcore_conf.socket_id;
- struct rte_mempool *mbuf_pool = pktmbuf_pool[socket_id];
-
- nb_ports = ff_global_cfg.dpdk.nb_ports;
- int i, ret;
- for (i = 0; i < nb_ports; i++) {
- uint16_t port_id = ff_global_cfg.dpdk.portid_list[i];
- ff_kni_alloc(port_id, socket_id, ff_global_cfg.kni.type, i, mbuf_pool, KNI_QUEUE_SIZE);
- }
-
- return 0;
-}
-#endif
-
-//RSS reta update will failed when enable flow isolate
-#if !defined(FF_FLOW_ISOLATE) && !defined(FF_FLOW_IPIP)
-static void
-set_rss_table(uint16_t port_id, uint16_t reta_size, uint16_t nb_queues)
-{
- if (reta_size == 0) {
- return;
- }
-
- int reta_conf_size = RTE_MAX(1, reta_size / RTE_ETH_RETA_GROUP_SIZE);
- struct rte_eth_rss_reta_entry64 reta_conf[reta_conf_size];
-
- /* config HW indirection table */
- unsigned i, j, hash=0;
- for (i = 0; i < reta_conf_size; i++) {
- reta_conf[i].mask = ~0ULL;
- for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++) {
- reta_conf[i].reta[j] = hash++ % nb_queues;
- }
- }
-
- if (rte_eth_dev_rss_reta_update(port_id, reta_conf, reta_size)) {
- rte_exit(EXIT_FAILURE, "port[%d], failed to update rss table\n",
- port_id);
- }
-}
-#endif
-
-static int
-init_port_start(void)
-{
- int nb_ports = ff_global_cfg.dpdk.nb_ports, total_nb_ports;
- unsigned socketid = 0;
- struct rte_mempool *mbuf_pool;
- uint16_t i, j;
-
- total_nb_ports = nb_ports;
-#ifdef FF_KNI
- if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
-#ifdef FF_KNI_KNI
- if (ff_global_cfg.kni.type == KNI_TYPE_VIRTIO)
-#endif
- {
- total_nb_ports *= 2; /* one more virtio_user port for kernel per port */
- }
- }
-#endif
-
- for (i = 0; i < total_nb_ports; i++) {
- uint16_t port_id, u_port_id;
- struct ff_port_cfg *pconf = NULL;
- uint16_t nb_queues;
- int nb_slaves;
-
- if (i < nb_ports) {
- u_port_id = ff_global_cfg.dpdk.portid_list[i];
- pconf = &ff_global_cfg.dpdk.port_cfgs[u_port_id];
- nb_queues = pconf->nb_lcores;
- nb_slaves = pconf->nb_slaves;
-
- if (nb_slaves > 0) {
- rte_eth_bond_8023ad_dedicated_queues_enable(u_port_id);
- }
- } else {
- /* kernel virtio user, port id start from `nb_dev_ports` */
- u_port_id = i - nb_ports + nb_dev_ports;
- nb_queues = 1; /* see ff_kni_alloc in ff_dpdk_kni.c */
- nb_slaves = 0;
- }
-
- for (j = 0; j <= nb_slaves; j++) {
- if (j < nb_slaves) {
- port_id = pconf->slave_portid_list[j];
- printf("To init %s's %d'st slave port[%d]\n",
- ff_global_cfg.dpdk.bond_cfgs->name,
- j, port_id);
- } else {
- port_id = u_port_id;
- }
-
- struct rte_eth_dev_info dev_info;
- struct rte_eth_conf port_conf = {0};
- struct rte_eth_rxconf rxq_conf;
- struct rte_eth_txconf txq_conf;
-
- int ret = rte_eth_dev_info_get(port_id, &dev_info);
- if (ret != 0)
- rte_exit(EXIT_FAILURE,
- "Error during getting device (port %u) info: %s\n",
- port_id, strerror(-ret));
-
- if (nb_queues > dev_info.max_rx_queues) {
- rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_rx_queues[%d]\n",
- nb_queues,
- dev_info.max_rx_queues);
- }
-
- if (nb_queues > dev_info.max_tx_queues) {
- rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_tx_queues[%d]\n",
- nb_queues,
- dev_info.max_tx_queues);
- }
-
- struct rte_ether_addr addr;
- rte_eth_macaddr_get(port_id, &addr);
- printf("Port %u MAC:"RTE_ETHER_ADDR_PRT_FMT"\n",
- (unsigned)port_id, RTE_ETHER_ADDR_BYTES(&addr));
-
- /* Only config dev port, but not kernel virtio user port */
- if (pconf) {
- rte_memcpy(pconf->mac,
- addr.addr_bytes, RTE_ETHER_ADDR_LEN);
-
- /* Set RSS mode */
- if (dev_info.flow_type_rss_offloads) {
- uint64_t default_rss_hf = RTE_ETH_RSS_PROTO_MASK;
- port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
- port_conf.rx_adv_conf.rss_conf.rss_hf = default_rss_hf;
- if (dev_info.hash_key_size == 52) {
- rsskey = default_rsskey_52bytes;
- rsskey_len = 52;
- }
- if (ff_global_cfg.dpdk.symmetric_rss) {
- printf("Use symmetric Receive-side Scaling(RSS) key\n");
- rsskey = symmetric_rsskey;
- }
- port_conf.rx_adv_conf.rss_conf.rss_key = rsskey;
- port_conf.rx_adv_conf.rss_conf.rss_key_len = rsskey_len;
- port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads;
- if (port_conf.rx_adv_conf.rss_conf.rss_hf !=
- RTE_ETH_RSS_PROTO_MASK) {
- printf("Port %u modified RSS hash function based on hardware support,"
- "requested:%#"PRIx64" configured:%#"PRIx64"\n",
- port_id, default_rss_hf,
- port_conf.rx_adv_conf.rss_conf.rss_hf);
- }
- }
-
- if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
- port_conf.txmode.offloads |=
- RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
- }
-
- /* Set Rx VLAN stripping */
- if (ff_global_cfg.dpdk.vlan_strip) {
- if (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP) {
- port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
- }
- }
-
- /* Enable HW CRC stripping */
- port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_KEEP_CRC;
-
- /* FIXME: Enable TCP LRO ?*/
- #if 0
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) {
- printf("LRO is supported\n");
- port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_LRO;
- pconf->hw_features.rx_lro = 1;
- }
- #endif
-
- /* Set Rx checksum checking */
- if ((dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) &&
- (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_UDP_CKSUM) &&
- (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) {
- printf("RX checksum offload supported\n");
- port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_CHECKSUM;
- pconf->hw_features.rx_csum = 1;
- }
-
- if (ff_global_cfg.dpdk.tx_csum_offoad_skip == 0) {
- if ((dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) {
- printf("TX ip checksum offload supported\n");
- port_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
- pconf->hw_features.tx_csum_ip = 1;
- }
-
- if ((dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) &&
- (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
- printf("TX TCP&UDP checksum offload supported\n");
- port_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
- pconf->hw_features.tx_csum_l4 = 1;
- }
- } else {
- printf("TX checksum offoad is disabled\n");
- }
-
- if (ff_global_cfg.dpdk.tso) {
- if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
- printf("TSO is supported\n");
- port_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
- pconf->hw_features.tx_tso = 1;
- }
- else {
- printf("TSO is not supported\n");
- }
- } else {
- printf("TSO is disabled\n");
- }
-
- if (dev_info.reta_size) {
- /* reta size must be power of 2 */
- assert((dev_info.reta_size & (dev_info.reta_size - 1)) == 0);
-
- rss_reta_size[port_id] = dev_info.reta_size;
- printf("port[%d]: rss table size: %d\n", port_id,
- dev_info.reta_size);
- }
- }
-
- if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
- continue;
- }
-
- ret = rte_eth_dev_configure(port_id, nb_queues, nb_queues, &port_conf);
- if (ret != 0) {
- return ret;
- }
-
- static uint16_t nb_rxd = RX_QUEUE_SIZE;
- static uint16_t nb_txd = TX_QUEUE_SIZE;
- ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
- if (ret < 0)
- printf("Could not adjust number of descriptors "
- "for port%u (%d)\n", (unsigned)port_id, ret);
-
- uint16_t q;
- for (q = 0; q < nb_queues; q++) {
- if (numa_on) {
- uint16_t lcore_id = lcore_conf.port_cfgs[u_port_id].lcore_list[q];
- socketid = rte_lcore_to_socket_id(lcore_id);
- }
- mbuf_pool = pktmbuf_pool[socketid];
-
- txq_conf = dev_info.default_txconf;
- txq_conf.offloads = port_conf.txmode.offloads;
- ret = rte_eth_tx_queue_setup(port_id, q, nb_txd,
- socketid, &txq_conf);
- if (ret < 0) {
- return ret;
- }
-
- rxq_conf = dev_info.default_rxconf;
- rxq_conf.offloads = port_conf.rxmode.offloads;
- ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd,
- socketid, &rxq_conf, mbuf_pool);
- if (ret < 0) {
- return ret;
- }
- }
-
- if (strncmp(dev_info.driver_name, BOND_DRIVER_NAME,
- strlen(dev_info.driver_name)) == 0) {
-
- rte_eth_macaddr_get(port_id, &addr);
- printf("Port %u MAC:"RTE_ETHER_ADDR_PRT_FMT"\n",
- (unsigned)port_id, RTE_ETHER_ADDR_BYTES(&addr));
-
- rte_memcpy(pconf->mac,
- addr.addr_bytes, RTE_ETHER_ADDR_LEN);
-
- int mode, count, x;
- uint16_t slaves[RTE_MAX_ETHPORTS], len = RTE_MAX_ETHPORTS;
-
- mode = rte_eth_bond_mode_get(port_id);
- printf("Port %u, bond mode:%d\n", port_id, mode);
-
- count = rte_eth_bond_members_get(port_id, slaves, len);
- printf("Port %u, %s's slave ports count:%d\n", port_id,
- ff_global_cfg.dpdk.bond_cfgs->name, count);
- for (x=0; x<count; x++) {
- printf("Port %u, %s's slave port[%u]\n", port_id,
- ff_global_cfg.dpdk.bond_cfgs->name, slaves[x]);
- }
- }
-
- ret = rte_eth_dev_start(port_id);
- if (ret < 0) {
- return ret;
- }
-
-//RSS reta update will failed when enable flow isolate
-#if !defined(FF_FLOW_ISOLATE) && !defined(FF_FLOW_IPIP)
- if (nb_queues > 1) {
- /*
- * FIXME: modify RSS set to FDIR
- */
- set_rss_table(port_id, dev_info.reta_size, nb_queues);
- }
-#endif
-
- /* Enable RX in promiscuous mode for the Ethernet device. */
- if (ff_global_cfg.dpdk.promiscuous) {
- ret = rte_eth_promiscuous_enable(port_id);
- if (ret == 0) {
- printf("set port %u to promiscuous mode ok\n", port_id);
- } else {
- printf("set port %u to promiscuous mode error\n", port_id);
- }
- }
- }
- }
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- check_all_ports_link_status();
- }
-
- return 0;
-}
-
-static int
-init_clock(void)
-{
- rte_timer_subsystem_init();
- uint64_t hz = rte_get_timer_hz();
- uint64_t intrs = US_PER_S / ff_global_cfg.freebsd.hz;
- uint64_t tsc = (hz + US_PER_S - 1) / US_PER_S * intrs;
-
- rte_timer_init(&freebsd_clock);
- rte_timer_reset(&freebsd_clock, tsc, PERIODICAL,
- rte_lcore_id(), &ff_hardclock_job, NULL);
-
- ff_update_current_ts();
-
- return 0;
-}
-
-#if defined(FF_FLOW_ISOLATE) || defined(FF_FDIR)
-/** Print a message out of a flow error. */
-static int
-port_flow_complain(struct rte_flow_error *error)
-{
- static const char *const errstrlist[] = {
- [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
- [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
- [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
- [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
- [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
- [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
- [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
- [RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER] = "transfer field",
- [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
- [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
- [RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification",
- [RTE_FLOW_ERROR_TYPE_ITEM_LAST] = "item specification range",
- [RTE_FLOW_ERROR_TYPE_ITEM_MASK] = "item specification mask",
- [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
- [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
- [RTE_FLOW_ERROR_TYPE_ACTION_CONF] = "action configuration",
- [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
- };
- const char *errstr;
- char buf[32];
- int err = rte_errno;
-
- if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
- !errstrlist[error->type])
- errstr = "unknown type";
- else
- errstr = errstrlist[error->type];
- printf("Caught error type %d (%s): %s%s: %s\n",
- error->type, errstr,
- error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
- error->cause), buf) : "",
- error->message ? error->message : "(no stated reason)",
- rte_strerror(err));
- return -err;
-}
-#endif
-
-
-#ifdef FF_FLOW_ISOLATE
-static int
-port_flow_isolate(uint16_t port_id, int set)
-{
- struct rte_flow_error error;
-
- /* Poisoning to make sure PMDs update it in case of error. */
- memset(&error, 0x66, sizeof(error));
- if (rte_flow_isolate(port_id, set, &error))
- return port_flow_complain(&error);
- printf("Ingress traffic on port %u is %s to the defined flow rules\n",
- port_id,
- set ? "now restricted" : "not restricted anymore");
- return 0;
-}
-
-static int
-create_tcp_flow(uint16_t port_id, uint16_t tcp_port) {
- struct rte_flow_attr attr = {.ingress = 1};
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id];
- int nb_queues = pconf->nb_lcores;
- uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
- int i = 0, j = 0;
- for (i = 0, j = 0; i < nb_queues; ++i)
- queue[j++] = i;
- struct rte_flow_action_rss rss = {
- .types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
- .key_len = rsskey_len,
- .key = rsskey,
- .queue_num = j,
- .queue = queue,
- };
-
- struct rte_eth_dev_info dev_info;
- int ret = rte_eth_dev_info_get(port_id, &dev_info);
- if (ret != 0)
- rte_exit(EXIT_FAILURE, "Error during getting device (port %u) info: %s\n", port_id, strerror(-ret));
-
- struct rte_flow_item pattern[3];
- struct rte_flow_action action[2];
- struct rte_flow_item_tcp tcp_spec;
- struct rte_flow_item_tcp tcp_mask = {
- .hdr = {
- .src_port = RTE_BE16(0x0000),
- .dst_port = RTE_BE16(0xffff),
- },
- };
- struct rte_flow_error error;
-
- memset(pattern, 0, sizeof(pattern));
- memset(action, 0, sizeof(action));
-
- /* set the dst ipv4 packet to the required value */
- pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4;
-
- memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
- tcp_spec.hdr.dst_port = rte_cpu_to_be_16(tcp_port);
- pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP;
- pattern[1].spec = &tcp_spec;
- pattern[1].mask = &tcp_mask;
-
- /* end the pattern array */
- pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
-
- /* create the action */
- action[0].type = RTE_FLOW_ACTION_TYPE_RSS;
- action[0].conf = &rss;
- action[1].type = RTE_FLOW_ACTION_TYPE_END;
-
- struct rte_flow *flow;
- /* validate and create the flow rule */
- if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern, action, &error);
- if (!flow) {
- return port_flow_complain(&error);
- }
- }
-
- memset(pattern, 0, sizeof(pattern));
-
- /* set the dst ipv4 packet to the required value */
- pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4;
-
- struct rte_flow_item_tcp tcp_src_mask = {
- .hdr = {
- .src_port = RTE_BE16(0xffff),
- .dst_port = RTE_BE16(0x0000),
- },
- };
-
- memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
- tcp_spec.hdr.src_port = rte_cpu_to_be_16(tcp_port);
- pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP;
- pattern[1].spec = &tcp_spec;
- pattern[1].mask = &tcp_src_mask;
-
- /* end the pattern array */
- pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
-
- /* validate and create the flow rule */
- if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern, action, &error);
- if (!flow) {
- return port_flow_complain(&error);
- }
- }
-
- return 1;
-}
-
-static int
-init_flow(uint16_t port_id, uint16_t tcp_port) {
- // struct ff_flow_cfg fcfg = ff_global_cfg.dpdk.flow_cfgs[0];
-
- // int i;
- // for (i = 0; i < fcfg.nb_port; i++) {
- // if(!create_tcp_flow(fcfg.port_id, fcfg.tcp_ports[i])) {
- // return 0;
- // }
- // }
-
- if(!create_tcp_flow(port_id, tcp_port)) {
- rte_exit(EXIT_FAILURE, "create tcp flow failed\n");
- return -1;
- }
-
- /* ARP rule */
- struct rte_flow_attr attr = {.ingress = 1};
- struct rte_flow_action_queue queue = {.index = 0};
-
- struct rte_flow_item pattern_[2];
- struct rte_flow_action action[2];
- struct rte_flow_item_eth eth_type = {.type = RTE_BE16(0x0806)};
- struct rte_flow_item_eth eth_mask = {
- .type = RTE_BE16(0xffff)
- };
-
- memset(pattern_, 0, sizeof(pattern_));
- memset(action, 0, sizeof(action));
-
- pattern_[0].type = RTE_FLOW_ITEM_TYPE_ETH;
- pattern_[0].spec = ð_type;
- pattern_[0].mask = ð_mask;
-
- pattern_[1].type = RTE_FLOW_ITEM_TYPE_END;
-
- /* create the action */
- action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
- action[0].conf = &queue;
- action[1].type = RTE_FLOW_ACTION_TYPE_END;
-
- struct rte_flow *flow;
- struct rte_flow_error error;
- /* validate and create the flow rule */
- if (!rte_flow_validate(port_id, &attr, pattern_, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern_, action, &error);
- if (!flow) {
- return port_flow_complain(&error);
- }
- }
-
- return 1;
-}
-
-#endif
-
-#ifdef FF_FLOW_IPIP
-static int
-create_ipip_flow(uint16_t port_id) {
- struct rte_flow_attr attr = {.ingress = 1};
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id];
- int nb_queues = pconf->nb_lcores;
- uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
- // 1. Queue configuration check
- if (nb_queues > RTE_MAX_QUEUES_PER_PORT) {
- rte_exit(EXIT_FAILURE, "Queue count exceeds limit (%d > %d)\n",
- nb_queues, RTE_MAX_QUEUES_PER_PORT);
- }
- for (int i = 0; i < nb_queues; i++)
- queue[i] = i;
-
- // 2. Get device info and check return value
- struct rte_eth_dev_info dev_info;
- int ret = rte_eth_dev_info_get(port_id, &dev_info);
- if (ret != 0) {
- rte_exit(EXIT_FAILURE, "Error during getting device (port %u) info: %s\n",
- port_id, strerror(-ret));
- }
- // 3. RSS config - key: set inner hash
- struct rte_flow_action_rss rss = {
- .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
- .level = 2, // inner encapsulation layer RSS - hash based on inner protocol
- .types = RTE_ETH_RSS_NONFRAG_IPV4_TCP, // inner IPv4+TCP hash
- .key_len = rsskey_len,
- .key = rsskey,
- .queue_num = nb_queues,
- .queue = queue,
- };
- // 4. Hardware capability check and fallback handling
- if (!(dev_info.flow_type_rss_offloads & RTE_ETH_RSS_NONFRAG_IPV4_TCP)) {
- // printf("warning: inner TCP RSS not supported, fallback to outer RSS\n");
- fprintf(stderr, "Fallback handling!!!\n");
- printf("I'm three,Warning: inner TCP RSS is not supported, falling back to outer RSS.\n");
- rss.level = 0; // fallback to outer RSS
- rss.types = RTE_ETH_FLOW_IPV4; // update to outer protocol type
- }
-
- // 5. Outer IPv4 matches IPIP protocol
- struct rte_flow_item_ipv4 outer_ipv4_spec = {
- .hdr = {
- .next_proto_id = IPPROTO_IPIP
- }
- };
- struct rte_flow_item_ipv4 outer_ipv4_mask = {
- .hdr = {
- .next_proto_id = 0xFF
- }
- };
-
- // 6. Pattern chain definition - match inner TCP to enable inner RSS
- struct rte_flow_item pattern[] = {
- // Outer Ethernet header (wildcard)
- {
- .type = RTE_FLOW_ITEM_TYPE_ETH,
- .spec = NULL,
- .mask = NULL
- },
- // Outer IPv4 header (match only IPIP protocol)
- {
- .type = RTE_FLOW_ITEM_TYPE_IPV4,
- .spec = &outer_ipv4_spec,
- .mask = &outer_ipv4_mask
- },
- // Inner IPv4 header (wildcard, RSS hashes based on this layer)
- {
- .type = RTE_FLOW_ITEM_TYPE_IPV4,
- .spec = NULL,
- .mask = NULL
- },
- // Inner TCP header (wildcard, RSS hashes based on this layer)
- {
- .type = RTE_FLOW_ITEM_TYPE_TCP,
- .spec = NULL,
- .mask = NULL
- },
- {
- .type = RTE_FLOW_ITEM_TYPE_END
- }
- };
-
- // 7. Action configuration
- struct rte_flow_action action[] = {
- {
- .type = RTE_FLOW_ACTION_TYPE_RSS,
- .conf = &rss
- },
- {
- .type = RTE_FLOW_ACTION_TYPE_END
- }
- };
-
- // 8. Validate and create flow rule
- struct rte_flow_error error;
- struct rte_flow *flow = NULL;
-
- if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern, action, &error);
- if (!flow) {
- fprintf(stderr, "Flow rule creation failed: %s\n", error.message);
- return -error.type;
- }
- } else {
- fprintf(stderr, "Flow rule validation failed: %s\n", error.message);
- return -error.type;
- }
- fprintf(stderr, "IPIP flow rule created successfully (port %d, RSS level=%d)\n", port_id, rss.level);
- printf("IPIP flow rule created successfully (port %d, RSS level=%d)\n", port_id, rss.level);
- return 0;
-}
-#endif
-
-#ifdef FF_FDIR
-/*
- * Flow director allows the traffic to specific port to be processed on the
- * specific queue. Unlike FF_FLOW_ISOLATE, the FF_FDIR implementation uses
- * general flow rule so that most FDIR supported NIC will support. The best
- * using case of FDIR is (but not limited to), using multiple processes to
- * listen on different ports.
- *
- * This function can be called either in FSTACK or in end-application.
- *
- * Example:
- * Given 2 fstack instances A and B. Instance A listens on port 80, and
- * instance B listens on port 81. We want to process the traffic to port 80
- * on rx queue 0, and the traffic to port 81 on rx queue 1.
- * // port 80 rx queue 0
- * ret = fdir_add_tcp_flow(port_id, 0, FF_FLOW_INGRESS, 0, 80);
- * // port 81 rx queue 1
- * ret = fdir_add_tcp_flow(port_id, 1, FF_FLOW_INGRESS, 0, 81);
- */
-#define FF_FLOW_EGRESS 1
-#define FF_FLOW_INGRESS 2
-/**
- * Create a flow rule that moves packets with matching src and dest tcp port
- * to the target queue.
- *
- * This function uses general flow rules and doesn't rely on the flow_isolation
- * that not all the FDIR capable NIC support.
- *
- * @param port_id
- * The selected port.
- * @param queue
- * The target queue.
- * @param dir
- * The direction of the traffic.
- * 1 for egress, 2 for ingress and sum(1+2) for both.
- * @param tcp_sport
- * The src tcp port to match.
- * @param tcp_dport
- * The dest tcp port to match.
- *
- */
-static int
-fdir_add_tcp_flow(uint16_t port_id, uint16_t queue, uint16_t dir,
- uint16_t tcp_sport, uint16_t tcp_dport)
-{
- struct rte_flow_attr attr;
- struct rte_flow_item flow_pattern[4];
- struct rte_flow_action flow_action[2];
- struct rte_flow *flow = NULL;
- struct rte_flow_action_queue flow_action_queue = { .index = queue };
- struct rte_flow_item_tcp tcp_spec;
- struct rte_flow_item_tcp tcp_mask;
- struct rte_flow_error rfe;
- int res;
-
- memset(flow_pattern, 0, sizeof(flow_pattern));
- memset(flow_action, 0, sizeof(flow_action));
-
- /*
- * set the rule attribute.
- */
- memset(&attr, 0, sizeof(struct rte_flow_attr));
- attr.ingress = ((dir & FF_FLOW_INGRESS) > 0);
- attr.egress = ((dir & FF_FLOW_EGRESS) > 0);
-
- /*
- * create the action sequence.
- * one action only, move packet to queue
- */
- flow_action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
- flow_action[0].conf = &flow_action_queue;
- flow_action[1].type = RTE_FLOW_ACTION_TYPE_END;
-
- flow_pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
- flow_pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
-
- /*
- * set the third level of the pattern (TCP).
- */
- memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
- memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
- tcp_spec.hdr.src_port = htons(tcp_sport);
- tcp_mask.hdr.src_port = (tcp_sport == 0 ? 0: 0xffff);
- tcp_spec.hdr.dst_port = htons(tcp_dport);
- tcp_mask.hdr.dst_port = (tcp_dport == 0 ? 0: 0xffff);
- flow_pattern[2].type = RTE_FLOW_ITEM_TYPE_TCP;
- flow_pattern[2].spec = &tcp_spec;
- flow_pattern[2].mask = &tcp_mask;
-
- flow_pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
-
- res = rte_flow_validate(port_id, &attr, flow_pattern, flow_action, &rfe);
- if (res)
- return (1);
-
- flow = rte_flow_create(port_id, &attr, flow_pattern, flow_action, &rfe);
- if (!flow)
- return port_flow_complain(&rfe);
-
- return (0);
-}
-
-#endif
-
-int
-ff_dpdk_init(int argc, char **argv)
-{
- if (ff_global_cfg.dpdk.nb_procs < 1 ||
- ff_global_cfg.dpdk.nb_procs > RTE_MAX_LCORE ||
- ff_global_cfg.dpdk.proc_id >= ff_global_cfg.dpdk.nb_procs ||
- ff_global_cfg.dpdk.proc_id < 0) {
- printf("param num_procs[%d] or proc_id[%d] error!\n",
- ff_global_cfg.dpdk.nb_procs,
- ff_global_cfg.dpdk.proc_id);
- exit(1);
- }
-
- int ret = rte_eal_init(argc, argv);
- if (ret < 0) {
- rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
- }
-
- numa_on = ff_global_cfg.dpdk.numa_on;
-
- idle_sleep = ff_global_cfg.dpdk.idle_sleep;
- pkt_tx_delay = ff_global_cfg.dpdk.pkt_tx_delay > BURST_TX_DRAIN_US ? \
- BURST_TX_DRAIN_US : ff_global_cfg.dpdk.pkt_tx_delay;
-
- init_lcore_conf();
-
- init_mem_pool();
-
- init_dispatch_ring();
-
- init_msg_ring();
-
-#ifdef FF_KNI
- enable_kni = ff_global_cfg.kni.enable;
- if (enable_kni) {
- init_kni();
- }
-#endif
-
-#ifdef FF_USE_PAGE_ARRAY
- ff_mmap_init();
-#endif
-
-#ifdef FF_FLOW_ISOLATE
- // run once in primary process
- if (rte_eal_process_type() == RTE_PROC_PRIMARY){
- ret = port_flow_isolate(0, 1);
- if (ret < 0)
- rte_exit(EXIT_FAILURE, "init_port_isolate failed\n");
- }
-#endif
-
- ret = init_port_start();
- if (ret < 0) {
- rte_exit(EXIT_FAILURE, "init_port_start failed\n");
- }
-
- init_clock();
-#ifdef FF_FLOW_ISOLATE
- //Only give a example usage: port_id=0, tcp_port= 80.
- //Recommend:
- //1. init_flow should replace `set_rss_table` in `init_port_start` loop, This can set all NIC's port_id_list instead only 0 device(port_id).
- //2. using config options `tcp_port` replace magic number of 80
- ret = init_flow(0, 80);
- if (ret < 0) {
- rte_exit(EXIT_FAILURE, "init_port_flow failed\n");
- }
-#endif
-
-#ifdef FF_FLOW_IPIP
- // create ipip flow for port 0
- if (rte_eal_process_type() == RTE_PROC_PRIMARY){
- ret = create_ipip_flow(0);
- if (ret != 0) {
- rte_exit(EXIT_FAILURE, "create_ipip_flow failed\n");
- }
- }
-#endif
-
-#ifdef FF_FDIR
- /*
- * Refer function header section for usage.
- */
- ret = fdir_add_tcp_flow(0, 0, FF_FLOW_INGRESS, 0, 80);
- if (ret)
- rte_exit(EXIT_FAILURE, "fdir_add_tcp_flow failed\n");
-#endif
-
- return 0;
-}
-
-static void
-ff_veth_input(const struct ff_dpdk_if_context *ctx, struct rte_mbuf *pkt)
-{
- uint8_t rx_csum = ctx->hw_features.rx_csum;
- if (rx_csum) {
- if (pkt->ol_flags & (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD)) {
- rte_pktmbuf_free(pkt);
- return;
- }
- }
-
- void *data = rte_pktmbuf_mtod(pkt, void*);
- uint16_t len = rte_pktmbuf_data_len(pkt);
-
- void *hdr = ff_mbuf_gethdr(pkt, pkt->pkt_len, data, len, rx_csum);
- if (hdr == NULL) {
- rte_pktmbuf_free(pkt);
- return;
- }
-
- if (pkt->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) {
- ff_mbuf_set_vlan_info(hdr, pkt->vlan_tci);
- }
-
- struct rte_mbuf *pn = pkt->next;
- void *prev = hdr;
- while(pn != NULL) {
- data = rte_pktmbuf_mtod(pn, void*);
- len = rte_pktmbuf_data_len(pn);
-
- void *mb = ff_mbuf_get(prev, pn, data, len);
- if (mb == NULL) {
- ff_mbuf_free(hdr);
- rte_pktmbuf_free(pkt);
- return;
- }
- pn = pn->next;
- prev = mb;
- }
-
- ff_veth_process_packet(ctx->ifp, hdr);
-}
-
-static enum FilterReturn
-protocol_filter(const void *data, uint16_t len)
-{
- if(len < RTE_ETHER_ADDR_LEN)
- return FILTER_UNKNOWN;
-
- const struct rte_ether_hdr *hdr;
- const struct rte_vlan_hdr *vlanhdr;
- hdr = (const struct rte_ether_hdr *)data;
- uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type);
- data += RTE_ETHER_HDR_LEN;
- len -= RTE_ETHER_HDR_LEN;
-
- if (ether_type == RTE_ETHER_TYPE_VLAN) {
- vlanhdr = (struct rte_vlan_hdr *)data;
- ether_type = rte_be_to_cpu_16(vlanhdr->eth_proto);
- data += sizeof(struct rte_vlan_hdr);
- len -= sizeof(struct rte_vlan_hdr);
- }
-
- if(ether_type == RTE_ETHER_TYPE_ARP) {
- return FILTER_ARP;
- }
-
- /* Multicast protocol, such as stp(used by zebra), is forwarded to kni and has a separate speed limit */
- if (rte_is_multicast_ether_addr(&hdr->dst_addr)) {
- return FILTER_MULTI;
- }
-
-#if (!defined(__FreeBSD__) && defined(INET6) ) || \
- ( defined(__FreeBSD__) && defined(INET6) && defined(FF_KNI))
- if (ether_type == RTE_ETHER_TYPE_IPV6) {
- return ff_kni_proto_filter(data,
- len, ether_type);
- }
-#endif
-
-#ifndef FF_KNI
- return FILTER_UNKNOWN;
-#else
- if (!enable_kni) {
- return FILTER_UNKNOWN;
- }
-
- if(ether_type != RTE_ETHER_TYPE_IPV4)
- return FILTER_UNKNOWN;
-
- return ff_kni_proto_filter(data,
- len, ether_type);
-#endif
-}
-
-static inline void
-pktmbuf_deep_attach(struct rte_mbuf *mi, const struct rte_mbuf *m)
-{
- struct rte_mbuf *md;
- void *src, *dst;
-
- dst = rte_pktmbuf_mtod(mi, void *);
- src = rte_pktmbuf_mtod(m, void *);
-
- mi->data_len = m->data_len;
- rte_memcpy(dst, src, m->data_len);
-
- mi->port = m->port;
- mi->vlan_tci = m->vlan_tci;
- mi->vlan_tci_outer = m->vlan_tci_outer;
- mi->tx_offload = m->tx_offload;
- mi->hash = m->hash;
- mi->ol_flags = m->ol_flags;
- mi->packet_type = m->packet_type;
-}
-
-/* copied from rte_pktmbuf_clone */
-static inline struct rte_mbuf *
-pktmbuf_deep_clone(const struct rte_mbuf *md,
- struct rte_mempool *mp)
-{
- struct rte_mbuf *mc, *mi, **prev;
- uint32_t pktlen;
- uint8_t nseg;
-
- if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL))
- return NULL;
-
- mi = mc;
- prev = &mi->next;
- pktlen = md->pkt_len;
- nseg = 0;
-
- do {
- nseg++;
- pktmbuf_deep_attach(mi, md);
- *prev = mi;
- prev = &mi->next;
- } while ((md = md->next) != NULL &&
- (mi = rte_pktmbuf_alloc(mp)) != NULL);
-
- *prev = NULL;
- mc->nb_segs = nseg;
- mc->pkt_len = pktlen;
-
- /* Allocation of new indirect segment failed */
- if (unlikely (mi == NULL)) {
- rte_pktmbuf_free(mc);
- return NULL;
- }
-
- __rte_mbuf_sanity_check(mc, 1);
- return mc;
-}
-
-static inline void
-ff_add_vlan_tag(struct rte_mbuf * rtem)
-{
- void *data = NULL;
-
- if (rtem->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) {
- data = rte_pktmbuf_prepend(rtem, sizeof(struct rte_vlan_hdr));
- if (data != NULL) {
- memmove(data, data + sizeof(struct rte_vlan_hdr), RTE_ETHER_HDR_LEN);
- struct rte_ether_hdr *etherhdr = (struct rte_ether_hdr *)data;
- struct rte_vlan_hdr *vlanhdr = (struct rte_vlan_hdr *)(data + RTE_ETHER_HDR_LEN);
- vlanhdr->vlan_tci = rte_cpu_to_be_16(rtem->vlan_tci);
- vlanhdr->eth_proto = etherhdr->ether_type;
- etherhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN);
- }
- }
-}
-
-static inline void
-process_packets(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **bufs,
- uint16_t count, const struct ff_dpdk_if_context *ctx, int pkts_from_ring)
-{
- struct lcore_conf *qconf = &lcore_conf;
- uint16_t nb_queues = qconf->nb_queue_list[port_id];
-
- uint16_t i;
- for (i = 0; i < count; i++) {
- struct rte_mbuf *rtem = bufs[i];
-
- if (unlikely( ff_global_cfg.pcap.enable)) {
- if (!pkts_from_ring) {
- ff_dump_packets( ff_global_cfg.pcap.save_path, rtem, ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len);
- }
- }
-
- void *data = rte_pktmbuf_mtod(rtem, void*);
- uint16_t len = rte_pktmbuf_data_len(rtem);
-
- if (!pkts_from_ring) {
- ff_traffic.rx_packets += rtem->nb_segs;
- ff_traffic.rx_bytes += rte_pktmbuf_pkt_len(rtem);
- }
-
- if (!pkts_from_ring && packet_dispatcher) {
- uint64_t cur_tsc = rte_rdtsc();
- int ret = (*packet_dispatcher)(data, &len, queue_id, nb_queues);
- usr_cb_tsc += rte_rdtsc() - cur_tsc;
- if (ret == FF_DISPATCH_RESPONSE) {
- rte_pktmbuf_pkt_len(rtem) = rte_pktmbuf_data_len(rtem) = len;
- /*
- * We have not support vlan out strip
- */
- ff_add_vlan_tag(rtem);
- send_single_packet(rtem, port_id);
- continue;
- }
-
- if (ret == FF_DISPATCH_ERROR || ret >= nb_queues) {
- //ff_traffic.rx_dropped += rtem->nb_segs; /* Not counted as packet drop */
- rte_pktmbuf_free(rtem);
- continue;
- }
-
- if (ret != queue_id) {
- ret = rte_ring_enqueue(dispatch_ring[port_id][ret], rtem);
- if (ret < 0) {
- ff_traffic.rx_dropped += rtem->nb_segs;
- rte_pktmbuf_free(rtem);
- }
-
- continue;
- }
- }
-
- enum FilterReturn filter = protocol_filter(data, len);
-#ifdef INET6
- if (filter == FILTER_ARP || filter == FILTER_NDP) {
-#else
- if (filter == FILTER_ARP) {
-#endif
- struct rte_mempool *mbuf_pool;
- struct rte_mbuf *mbuf_clone;
- if (!pkts_from_ring) {
- uint16_t j;
- for(j = 0; j < nb_queues; ++j) {
- if(j == queue_id)
- continue;
-
- unsigned socket_id = 0;
- if (numa_on) {
- uint16_t lcore_id = qconf->port_cfgs[port_id].lcore_list[j];
- socket_id = rte_lcore_to_socket_id(lcore_id);
- }
- mbuf_pool = pktmbuf_pool[socket_id];
- mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool);
- if(mbuf_clone) {
- int ret = rte_ring_enqueue(dispatch_ring[port_id][j],
- mbuf_clone);
- if (ret < 0) {
- ff_traffic.rx_dropped += mbuf_clone->nb_segs;
- rte_pktmbuf_free(mbuf_clone);
- }
- }
- }
- }
-
-#ifdef FF_KNI
- if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
- mbuf_pool = pktmbuf_pool[qconf->socket_id];
- mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool);
- if(mbuf_clone) {
- ff_add_vlan_tag(mbuf_clone);
- ff_kni_enqueue(filter, port_id, mbuf_clone);
- }
- }
-#endif
- ff_veth_input(ctx, rtem);
-#ifdef FF_KNI
- } else if (enable_kni) {
- if (knictl_action == FF_KNICTL_ACTION_ALL_TO_KNI){
- ff_add_vlan_tag(rtem);
- ff_kni_enqueue(filter, port_id, rtem);
- } else if (knictl_action == FF_KNICTL_ACTION_ALL_TO_FF){
- ff_veth_input(ctx, rtem);
- } else if (knictl_action == FF_KNICTL_ACTION_DEFAULT){
- if (enable_kni &&
- ((filter == FILTER_KNI && kni_accept) ||
- ((filter == FILTER_UNKNOWN || filter >= FILTER_OSPF) && !kni_accept)) ) {
- ff_add_vlan_tag(rtem);
- ff_kni_enqueue(filter, port_id, rtem);
- } else {
- ff_veth_input(ctx, rtem);
- }
- } else {
- ff_veth_input(ctx, rtem);
- }
-#endif
- } else {
- ff_veth_input(ctx, rtem);
- }
- }
-}
-
-static inline int
-process_dispatch_ring(uint16_t port_id, uint16_t queue_id,
- struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx)
-{
- /* read packet from ring buf and to process */
- uint16_t nb_rb;
- nb_rb = rte_ring_dequeue_burst(dispatch_ring[port_id][queue_id],
- (void **)pkts_burst, MAX_PKT_BURST, NULL);
-
- if(nb_rb > 0) {
- process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1);
- }
-
- return nb_rb;
-}
-
-static inline void
-handle_sysctl_msg(struct ff_msg *msg)
-{
- int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen,
- msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new,
- msg->sysctl.newlen);
-
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-
-static inline void
-handle_ioctl_msg(struct ff_msg *msg)
-{
- int fd, ret;
-#ifdef INET6
- if (msg->msg_type == FF_IOCTL6) {
- fd = ff_socket(AF_INET6, SOCK_DGRAM, 0);
- } else
-#endif
- fd = ff_socket(AF_INET, SOCK_DGRAM, 0);
-
- if (fd < 0) {
- ret = -1;
- goto done;
- }
-
- ret = ff_ioctl_freebsd(fd, msg->ioctl.cmd, msg->ioctl.data);
-
- ff_close(fd);
-
-done:
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-
-static inline void
-handle_route_msg(struct ff_msg *msg)
-{
- int ret = ff_rtioctl(msg->route.fib, msg->route.data,
- &msg->route.len, msg->route.maxlen);
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-
-static inline void
-handle_top_msg(struct ff_msg *msg)
-{
- msg->top = ff_top_status;
- msg->result = 0;
-}
-
-#ifdef FF_NETGRAPH
-static inline void
-handle_ngctl_msg(struct ff_msg *msg)
-{
- int ret = ff_ngctl(msg->ngctl.cmd, msg->ngctl.data);
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- msg->ngctl.ret = ret;
- }
-}
-#endif
-
-#ifdef FF_IPFW
-static inline void
-handle_ipfw_msg(struct ff_msg *msg)
-{
- int fd, ret;
- fd = ff_socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
- if (fd < 0) {
- ret = -1;
- goto done;
- }
-
- switch (msg->ipfw.cmd) {
- case FF_IPFW_GET:
- ret = ff_getsockopt_freebsd(fd, msg->ipfw.level,
- msg->ipfw.optname, msg->ipfw.optval,
- msg->ipfw.optlen);
- break;
- case FF_IPFW_SET:
- ret = ff_setsockopt_freebsd(fd, msg->ipfw.level,
- msg->ipfw.optname, msg->ipfw.optval,
- *(msg->ipfw.optlen));
- break;
- default:
- ret = -1;
- errno = ENOTSUP;
- break;
- }
-
- ff_close(fd);
-
-done:
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-#endif
-
-static inline void
-handle_traffic_msg(struct ff_msg *msg)
-{
- msg->traffic = ff_traffic;
- msg->result = 0;
-}
-
-void ff_get_traffic(void *buffer)
-{
- *(struct ff_traffic_args *)buffer = ff_traffic;
-}
-
-#ifdef FF_KNI
-static inline void
-handle_knictl_msg(struct ff_msg *msg)
-{
- if (msg->knictl.kni_cmd == FF_KNICTL_CMD_SET){
- switch (msg->knictl.kni_action){
- case FF_KNICTL_ACTION_ALL_TO_FF: knictl_action = FF_KNICTL_ACTION_ALL_TO_FF; msg->result = 0; printf("new kni action: alltoff\n"); break;
- case FF_KNICTL_ACTION_ALL_TO_KNI: knictl_action = FF_KNICTL_ACTION_ALL_TO_KNI; msg->result = 0; printf("new kni action: alltokni\n"); break;
- case FF_KNICTL_ACTION_DEFAULT: knictl_action = FF_KNICTL_ACTION_DEFAULT; msg->result = 0; printf("new kni action: default\n"); break;
- default: msg->result = -1;
- }
- }
- else if (msg->knictl.kni_cmd == FF_KNICTL_CMD_GET){
- msg->knictl.kni_action = knictl_action;
- } else {
- msg->result = -2;
- }
-}
-#endif
-
-static inline void
-handle_default_msg(struct ff_msg *msg)
-{
- msg->result = ENOTSUP;
-}
-
-static inline void
-handle_msg(struct ff_msg *msg, uint16_t proc_id)
-{
- switch (msg->msg_type) {
- case FF_SYSCTL:
- handle_sysctl_msg(msg);
- break;
- case FF_IOCTL:
-#ifdef INET6
- case FF_IOCTL6:
-#endif
- handle_ioctl_msg(msg);
- break;
- case FF_ROUTE:
- handle_route_msg(msg);
- break;
- case FF_TOP:
- handle_top_msg(msg);
- break;
-#ifdef FF_NETGRAPH
- case FF_NGCTL:
- handle_ngctl_msg(msg);
- break;
-#endif
-#ifdef FF_IPFW
- case FF_IPFW_CTL:
- handle_ipfw_msg(msg);
- break;
-#endif
- case FF_TRAFFIC:
- handle_traffic_msg(msg);
- break;
-#ifdef FF_KNI
- case FF_KNICTL:
- handle_knictl_msg(msg);
- break;
-#endif
- default:
- handle_default_msg(msg);
- break;
- }
- if (rte_ring_enqueue(msg_ring[proc_id].ring[msg->msg_type], msg) < 0) {
- if (msg->original_buf) {
- rte_free(msg->buf_addr);
- msg->buf_addr = msg->original_buf;
- msg->buf_len = msg->original_buf_len;
- msg->original_buf = NULL;
- }
-
- rte_mempool_put(message_pool, msg);
- }
-}
-
-static inline int
-process_msg_ring(uint16_t proc_id, struct rte_mbuf **pkts_burst)
-{
- /* read msg from ring buf and to process */
- uint16_t nb_rb;
- int i;
-
- nb_rb = rte_ring_dequeue_burst(msg_ring[proc_id].ring[0],
- (void **)pkts_burst, MAX_PKT_BURST, NULL);
-
- if (likely(nb_rb == 0))
- return 0;
-
- for (i = 0; i < nb_rb; ++i) {
- handle_msg((struct ff_msg *)pkts_burst[i], proc_id);
- }
-
- return 0;
-}
-
-/* Send burst of packets on an output interface */
-static inline int
-send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
-{
- struct rte_mbuf **m_table;
- int ret;
- uint16_t queueid;
-
- queueid = qconf->tx_queue_id[port];
- m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
-
- if (unlikely(ff_global_cfg.pcap.enable)) {
- uint16_t i;
- for (i = 0; i < n; i++) {
- ff_dump_packets( ff_global_cfg.pcap.save_path, m_table[i],
- ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len);
- }
- }
-
- ret = rte_eth_tx_burst(port, queueid, m_table, n);
- uint16_t i;
- for (i = 0; i < ret; i++) {
- ff_traffic.tx_packets += m_table[i]->nb_segs; // use ret or rets' nb_segs?
- ff_traffic.tx_bytes += rte_pktmbuf_pkt_len(m_table[i]);
-#ifdef FF_USE_PAGE_ARRAY
- if (qconf->tx_mbufs[port].bsd_m_table[i])
- ff_enq_tx_bsdmbuf(port, qconf->tx_mbufs[port].bsd_m_table[i], m_table[i]->nb_segs);
-#endif
- }
- if (unlikely(ret < n)) {
- do {
- ff_traffic.tx_dropped += m_table[ret]->nb_segs;
- rte_pktmbuf_free(m_table[ret]);
-#ifdef FF_USE_PAGE_ARRAY
- if ( qconf->tx_mbufs[port].bsd_m_table[ret] )
- ff_mbuf_free(qconf->tx_mbufs[port].bsd_m_table[ret]);
-#endif
- } while (++ret < n);
- }
- return 0;
-}
-
-/* Enqueue a single packet, and send burst if queue is filled */
-static inline int
-send_single_packet(struct rte_mbuf *m, uint8_t port)
-{
- uint16_t len;
- struct lcore_conf *qconf;
-
- qconf = &lcore_conf;
- len = qconf->tx_mbufs[port].len;
- qconf->tx_mbufs[port].m_table[len] = m;
- len++;
-
- /* enough pkts to be sent */
- if (unlikely(len == MAX_PKT_BURST)) {
- send_burst(qconf, MAX_PKT_BURST, port);
- len = 0;
- }
-
- qconf->tx_mbufs[port].len = len;
- return 0;
-}
-
-int
-ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m,
- int total)
-{
-#ifdef FF_USE_PAGE_ARRAY
- struct lcore_conf *qconf = &lcore_conf;
- int len = 0;
-
- len = ff_if_send_onepkt(ctx, m,total);
- if (unlikely(len == MAX_PKT_BURST)) {
- send_burst(qconf, MAX_PKT_BURST, ctx->port_id);
- len = 0;
- }
- qconf->tx_mbufs[ctx->port_id].len = len;
- return 0;
-#endif
- struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id];
- struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool);
- if (head == NULL) {
- ff_traffic.tx_dropped++;
- ff_mbuf_free(m);
- return -1;
- }
-
- head->pkt_len = total;
- head->nb_segs = 0;
-
- int off = 0;
- struct rte_mbuf *cur = head, *prev = NULL;
- while(total > 0) {
- if (cur == NULL) {
- cur = rte_pktmbuf_alloc(mbuf_pool);
- if (cur == NULL) {
- ff_traffic.tx_dropped += head->nb_segs + 1;
- rte_pktmbuf_free(head);
- ff_mbuf_free(m);
- return -1;
- }
- }
-
- if (prev != NULL) {
- prev->next = cur;
- }
- head->nb_segs++;
-
- prev = cur;
- void *data = rte_pktmbuf_mtod(cur, void*);
- int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total;
- int ret = ff_mbuf_copydata(m, data, off, len);
- if (ret < 0) {
- ff_traffic.tx_dropped += head->nb_segs;
- rte_pktmbuf_free(head);
- ff_mbuf_free(m);
- return -1;
- }
-
-
- cur->data_len = len;
- off += len;
- total -= len;
- cur = NULL;
- }
-
- struct ff_tx_offload offload = {0};
- ff_mbuf_tx_offload(m, &offload);
-
- void *data = rte_pktmbuf_mtod(head, void*);
-
- if (offload.ip_csum) {
- /* ipv6 not supported yet */
- struct rte_ipv4_hdr *iph;
- int iph_len;
- iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN);
- iph_len = (iph->version_ihl & 0x0f) << 2;
-
- head->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4;
- head->l2_len = RTE_ETHER_HDR_LEN;
- head->l3_len = iph_len;
- }
-
- if (ctx->hw_features.tx_csum_l4) {
- struct rte_ipv4_hdr *iph;
- int iph_len;
- iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN);
- iph_len = (iph->version_ihl & 0x0f) << 2;
-
- if (iph->version == 4) {
- head->ol_flags |= RTE_MBUF_F_TX_IPV4;
- } else {
- head->ol_flags |= RTE_MBUF_F_TX_IPV6;
- }
-
- if (offload.tcp_csum) {
- head->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
- head->l2_len = RTE_ETHER_HDR_LEN;
- head->l3_len = iph_len;
- }
-
- /*
- * TCP segmentation offload.
- *
- * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag
- * implies PKT_TX_TCP_CKSUM)
- * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
- * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and
- * write the IP checksum to 0 in the packet
- * - fill the mbuf offload information: l2_len,
- * l3_len, l4_len, tso_segsz
- * - calculate the pseudo header checksum without taking ip_len
- * in account, and set it in the TCP header. Refer to
- * rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum() that can be
- * used as helpers.
- */
- if (offload.tso_seg_size) {
- struct rte_tcp_hdr *tcph;
- int tcph_len;
- tcph = (struct rte_tcp_hdr *)((char *)iph + iph_len);
- tcph_len = (tcph->data_off & 0xf0) >> 2;
- tcph->cksum = rte_ipv4_phdr_cksum(iph, RTE_MBUF_F_TX_TCP_SEG);
-
- head->ol_flags |= RTE_MBUF_F_TX_TCP_SEG;
- head->l4_len = tcph_len;
- head->tso_segsz = offload.tso_seg_size;
- }
-
- if (offload.udp_csum) {
- head->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
- head->l2_len = RTE_ETHER_HDR_LEN;
- head->l3_len = iph_len;
- }
- }
-
- ff_mbuf_free(m);
-
- return send_single_packet(head, ctx->port_id);
-}
-
-int
-ff_dpdk_raw_packet_send(void *data, int total, uint16_t port_id)
-{
- struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id];
- struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool);
- if (head == NULL) {
- ff_traffic.tx_dropped++;
- return -1;
- }
-
- head->pkt_len = total;
- head->nb_segs = 0;
-
- int off = 0;
- struct rte_mbuf *cur = head, *prev = NULL;
- while(total > 0) {
- if (cur == NULL) {
- cur = rte_pktmbuf_alloc(mbuf_pool);
- if (cur == NULL) {
- ff_traffic.tx_dropped += head->nb_segs + 1;
- rte_pktmbuf_free(head);
- return -1;
- }
- }
-
- if (prev != NULL) {
- prev->next = cur;
- }
- head->nb_segs++;
-
- prev = cur;
- void *cur_data = rte_pktmbuf_mtod(cur, void*);
- int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total;
- memcpy(cur_data, data + off, len);
-
- cur->data_len = len;
- off += len;
- total -= len;
- cur = NULL;
- }
-
- return send_single_packet(head, port_id);
-}
-
-static int
-main_loop(void *arg)
-{
- struct loop_routine *lr = (struct loop_routine *)arg;
-
- struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
- uint64_t prev_tsc, diff_tsc, cur_tsc, usch_tsc, div_tsc, usr_tsc, sys_tsc, end_tsc, idle_sleep_tsc;
- int i, j, nb_rx, idle;
- uint16_t port_id, queue_id;
- struct lcore_conf *qconf;
- uint64_t drain_tsc = 0;
- struct ff_dpdk_if_context *ctx;
-
- if (pkt_tx_delay) {
- drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * pkt_tx_delay;
- }
-
- prev_tsc = 0;
- usch_tsc = 0;
-
- qconf = &lcore_conf;
-
- while (1) {
-
- if (unlikely(stop_loop)) {
- break;
- }
-
- cur_tsc = rte_rdtsc();
- if (unlikely(freebsd_clock.expire < cur_tsc)) {
- rte_timer_manage();
-
-#ifdef FF_KNI
- /* reset kni ratelimt */
- if (enable_kni &&
- (ff_global_cfg.kni.console_packets_ratelimit ||
- ff_global_cfg.kni.general_packets_ratelimit ||
- ff_global_cfg.kni.kernel_packets_ratelimit)) {
- static time_t last_sec = 0;
- time_t sec;
- long nsec;
-
- ff_get_current_time(&sec, &nsec);
- if (sec > last_sec) {
- if (kni_rate_limt.gerneal_packets > ff_global_cfg.kni.general_packets_ratelimit ||
- kni_rate_limt.console_packets > ff_global_cfg.kni.console_packets_ratelimit ||
- kni_rate_limt.kernel_packets > ff_global_cfg.kni.kernel_packets_ratelimit) {
- printf("kni ratelimit, general:%lu/%d, console:%lu/%d, kernel:%lu/%d, last sec:%ld, sec:%ld\n",
- kni_rate_limt.gerneal_packets, ff_global_cfg.kni.general_packets_ratelimit,
- kni_rate_limt.console_packets, ff_global_cfg.kni.console_packets_ratelimit,
- kni_rate_limt.kernel_packets, ff_global_cfg.kni.kernel_packets_ratelimit, last_sec, sec);
- }
- last_sec = sec;
- kni_rate_limt.gerneal_packets = 0;
- kni_rate_limt.console_packets = 0;
- kni_rate_limt.kernel_packets = 0;
- }
- }
-#endif
- }
-
- idle = 1;
- sys_tsc = 0;
- usr_tsc = 0;
- usr_cb_tsc = 0;
-
- /*
- * TX burst queue drain
- */
- diff_tsc = cur_tsc - prev_tsc;
- if (unlikely(diff_tsc >= drain_tsc)) {
- for (i = 0; i < qconf->nb_tx_port; i++) {
- port_id = qconf->tx_port_id[i];
- if (qconf->tx_mbufs[port_id].len == 0)
- continue;
-
- idle = 0;
-
- send_burst(qconf,
- qconf->tx_mbufs[port_id].len,
- port_id);
- qconf->tx_mbufs[port_id].len = 0;
- }
-
- prev_tsc = cur_tsc;
- }
-
- /*
- * Read packet from RX queues
- */
- for (i = 0; i < qconf->nb_rx_queue; ++i) {
- port_id = qconf->rx_queue_list[i].port_id;
- queue_id = qconf->rx_queue_list[i].queue_id;
- ctx = veth_ctx[port_id];
-
-#ifdef FF_KNI
- if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
- ff_kni_process(port_id, queue_id, pkts_burst, MAX_PKT_BURST);
- }
-#endif
-
- idle &= !process_dispatch_ring(port_id, queue_id, pkts_burst, ctx);
-
- nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst,
- MAX_PKT_BURST);
- if (nb_rx == 0)
- continue;
-
- idle = 0;
-
- /* Prefetch first packets */
- for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
- rte_prefetch0(rte_pktmbuf_mtod(
- pkts_burst[j], void *));
- }
-
- /* Prefetch and handle already prefetched packets */
- for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
- j + PREFETCH_OFFSET], void *));
- process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0);
- }
-
- /* Handle remaining prefetched packets */
- for (; j < nb_rx; j++) {
- process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0);
- }
- }
-
- process_msg_ring(qconf->proc_id, pkts_burst);
-#ifdef FF_LOOPBACK_SUPPORT
- ff_swi_net_excute();
-#endif
- div_tsc = rte_rdtsc();
-
- if (likely(lr->loop != NULL && (!idle || cur_tsc - usch_tsc >= drain_tsc))) {
- usch_tsc = cur_tsc;
- lr->loop(lr->arg);
- }
-
- idle_sleep_tsc = rte_rdtsc();
- if (likely(idle && idle_sleep)) {
- rte_delay_us_sleep(idle_sleep);
- end_tsc = rte_rdtsc();
- } else {
- end_tsc = idle_sleep_tsc;
- }
-
- usr_tsc = usr_cb_tsc;
- if (usch_tsc == cur_tsc) {
- usr_tsc += idle_sleep_tsc - div_tsc;
- }
-
- if (!idle) {
- sys_tsc = div_tsc - cur_tsc - usr_cb_tsc;
- ff_top_status.sys_tsc += sys_tsc;
- }
-
- ff_top_status.usr_tsc += usr_tsc;
- ff_top_status.work_tsc += end_tsc - cur_tsc;
- ff_top_status.idle_tsc += end_tsc - cur_tsc - usr_tsc - sys_tsc;
-
- ff_top_status.loops++;
- }
-
- return 0;
-}
-
-int
-ff_dpdk_if_up(void) {
- int i;
- struct lcore_conf *qconf = &lcore_conf;
- for (i = 0; i < qconf->nb_tx_port; i++) {
- uint16_t port_id = qconf->tx_port_id[i];
-
- struct ff_port_cfg *pconf = &qconf->port_cfgs[port_id];
- veth_ctx[port_id] = ff_veth_attach(pconf);
- if (veth_ctx[port_id] == NULL) {
- rte_exit(EXIT_FAILURE, "ff_veth_attach failed");
- }
- }
-
- return 0;
-}
-
-void
-ff_dpdk_run(loop_func_t loop, void *arg) {
- struct loop_routine *lr = rte_malloc(NULL,
- sizeof(struct loop_routine), 0);
- stop_loop = 0;
- lr->loop = loop;
- lr->arg = arg;
- rte_eal_mp_remote_launch(main_loop, lr, CALL_MAIN);
- rte_eal_mp_wait_lcore();
- rte_free(lr);
-}
-
-void
-ff_dpdk_stop(void) {
- stop_loop = 1;
-}
-
-void
-ff_dpdk_pktmbuf_free(void *m)
-{
- rte_pktmbuf_free_seg((struct rte_mbuf *)m);
-}
-
-static uint32_t
-toeplitz_hash(unsigned keylen, const uint8_t *key,
- unsigned datalen, const uint8_t *data)
-{
- uint32_t hash = 0, v;
- u_int i, b;
-
- /* XXXRW: Perhaps an assertion about key length vs. data length? */
-
- v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3];
- for (i = 0; i < datalen; i++) {
- for (b = 0; b < 8; b++) {
- if (data[i] & (1<<(7-b)))
- hash ^= v;
- v <<= 1;
- if ((i + 4) < keylen &&
- (key[i+4] & (1<<(7-b))))
- v |= 1;
- }
- }
- return (hash);
-}
-
-int
-ff_in_pcbladdr(uint16_t family, void *faddr, uint16_t fport, void *laddr)
-{
- int ret = 0;
- uint16_t fa;
-
- if (!pcblddr_fun)
- return ret;
-
- if (family == AF_INET)
- fa = AF_INET;
- else if (family == AF_INET6_FREEBSD)
- fa = AF_INET6_LINUX;
- else
- return EADDRNOTAVAIL;
-
- ret = (*pcblddr_fun)(fa, faddr, fport, laddr);
-
- return ret;
-}
-
-void
-ff_regist_pcblddr_fun(pcblddr_func_t func)
-{
- pcblddr_fun = func;
-}
-
-
-//#define FF_RSS_NUMBER_TBL8S (1 << 20) /* Need less than 1 << 21 */
-#define FF_RSS_KEY_LEN (12) /* sip, dip, sport, dport */
-
-/* Not include all paras, just for get entries */
-struct rte_hash {
- char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */
- uint32_t entries; /**< Total table entries. */
- uint32_t num_buckets; /**< Number of buckets in table. */
-};
-
-static inline uint32_t
-ff_rss_hash(const void *data, __rte_unused uint32_t data_len,
- __rte_unused uint32_t init_val)
-{
- uint32_t *hash_data = (uint32_t *)data;
- uint32_t hash = *hash_data;
-
- hash ^= *(hash_data + 1);
- hash ^= *(hash_data + 2);
-
- return hash;
-}
-
-/* Remote IP:PORT */
-#define FF_RSS_TBL_MAX_SIP (4)
-#define FF_RSS_TBL_MAX_SPORT (4)
-#define FF_RSS_TBL_MAX_SIP_MASK (FF_RSS_TBL_MAX_SIP - 1)
-#define FF_RSS_TBL_MAX_SPORT_MASK (FF_RSS_TBL_MAX_SPORT - 1)
-/* Sever/local IP:PORT */
-#define FF_RSS_TBL_MAX_DIP (4)
-#define FF_RSS_TBL_MAX_DPORT (65536)
-#define FF_RSS_TBL_MAX_DIP_MASK (FF_RSS_TBL_MAX_DIP - 1)
-#define FF_RSS_TBL_MAX_DPORT_MASK (FF_RSS_TBL_MAX_DPORT - 1)
-
-#define FF_RSS_TBL_SIP_ENTRIES (FF_RSS_TBL_MAX_SIP * FF_RSS_TBL_MAX_SPORT)
-#define FF_RSS_TBL_SIP_ENTRIES_MASK (FF_RSS_TBL_SIP_ENTRIES - 1)
-//saddr 2429495146, daddr 4273001345, sport 13568, dport 24873
-
-enum ff_rss_tbl_stat_type {
- FF_RSS_TBL_STAT_UNKNOWN = -1,
- FF_RSS_TBL_STAT_NOT_MATCH = 0,
- FF_RSS_TBL_STAT_MATCH = 1
-};
-
-enum ff_rss_tbl_init_type {
- FF_RSS_TBL_NOT_INIT = 0,
- FF_RSS_TBL_INITING = 1,
- FF_RSS_TBL_INITED = 2
-};
-enum ff_rss_tbl_init_type ff_rss_tbl_init_flag = FF_RSS_TBL_NOT_INIT;
-
-struct ff_rss_tbl_dip_type {
- uint32_t dip;
- int8_t dport_stat[FF_RSS_TBL_MAX_DPORT];
-} __rte_cache_aligned;
-
-struct ff_rss_tbl_type {
- uint32_t sip;
- uint16_t sport;
- struct ff_rss_tbl_dip_type dip_tbl[FF_RSS_TBL_MAX_DIP];
-} __rte_cache_aligned;
-static struct ff_rss_tbl_type ff_rss_tbl[FF_RSS_TBL_SIP_ENTRIES];
-
-int
-ff_rss_tbl_init(void *softc, uint32_t sip, uint32_t dip, uint16_t sport)
-{
- uint32_t ori_idx, idx, ori_dip_idx, dip_idx;
- int i, j, k;
- uint32_t ori_sip = sip, ori_dip = dip;
- uint16_t ori_sport = sport;
-
- ff_rss_tbl_init_flag = FF_RSS_TBL_INITING;
- memset(ff_rss_tbl, 0, sizeof(ff_rss_tbl));
-
- for (i = 0; i < FF_RSS_TBL_SIP_ENTRIES; i++) {
- //ori_idx = idx = (sip ^ sport) & FF_RSS_TBL_SIP_ENTRIES_MASK;
- ori_idx = idx = (ori_sip ^ ori_sport) & FF_RSS_TBL_SIP_ENTRIES_MASK;
-
- /* 仅用作测试,实际应该从配置中获取,最差循环16+4次 */
- if (i != FF_RSS_TBL_SIP_ENTRIES - 1) { // 16次尝试成功
- sip = ori_sip + i + 1;
- sport = ori_sport + i + 1;
- } else {
- sip = ori_sip;
- sport = ori_sport;
- }
-
- do {
- if (ff_rss_tbl[idx].sip == INADDR_ANY ||
- (ff_rss_tbl[idx].sip == sip &&
- ff_rss_tbl[idx].sport == sport)) {
- break;
- }
-
- if (ff_rss_tbl[idx].sip != sip ||
- ff_rss_tbl[idx].sport != sport) {
- idx++;
- idx &= FF_RSS_TBL_SIP_ENTRIES_MASK;
- }
- } while (idx != ori_idx);
-
- if (idx == ori_idx &&
- ((ff_rss_tbl[idx].sip != INADDR_ANY) &&
- (ff_rss_tbl[idx].sip != sip ||
- ff_rss_tbl[idx].sport != sport))) {
- return -1;
- }
-
- for (k = 0; k < FF_RSS_TBL_MAX_DIP; k++) {
- //ori_dip_idx = dip_idx = dip & FF_RSS_TBL_MAX_DIP_MASK;
- ori_dip_idx = dip_idx = ori_dip & FF_RSS_TBL_MAX_DIP_MASK;
-
- /* 仅用作测试,实际应该从配置中获取,最差循环16+4次 */
- //if (k != FF_RSS_TBL_MAX_DIP - 1) { // 20次尝试成功
- if (k != FF_RSS_TBL_MAX_DIP) { // 20次尝试失败
- dip = ori_dip + k + 1;
- } else {
- dip = ori_dip;
- }
-
- do {
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip == INADDR_ANY) {
- break;
- }
-
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip != dip) {
- dip_idx++;
- dip_idx &= FF_RSS_TBL_MAX_DIP_MASK;
- } else {
- /* Dup 3-tuple */
- printf("Duplicate ff rss table 3-tuple, please check your config.ini file\n");
- goto IGNORE_DUP;
- }
- } while (dip_idx != ori_dip_idx);
-
- if (dip_idx == ori_dip_idx && ff_rss_tbl[idx].dip_tbl[dip_idx].dip != INADDR_ANY) {
- return -1;
- }
-
- for (j = 0; j < FF_RSS_TBL_MAX_DPORT; j++) {
- ff_rss_tbl[idx].dip_tbl[dip_idx].dport_stat[j] = ff_rss_check(softc, sip, dip, sport, j);
- }
-
- ff_rss_tbl[idx].dip_tbl[dip_idx].dip = dip;
-
-IGNORE_DUP:
- // do nothing
- ;
- }
- ff_rss_tbl[idx].sip = sip;
- ff_rss_tbl[idx].sport = sport;
-
- /* 仅用作测试时跳出,实际应该根据配置来 */
- //break;
- }
-
- ff_rss_tbl_init_flag = FF_RSS_TBL_INITED;
-
- return 0;
-
-}
-
-int
-ff_rss_tbl_get(uint32_t sip, uint32_t dip, uint16_t sport, uint16_t dport)
-{
- uint32_t ori_idx, idx, ori_dip_idx, dip_idx;
- int i;
-
- ori_idx = idx = (sip ^ sport) & FF_RSS_TBL_SIP_ENTRIES_MASK;
- do {
- /* If not inited, no need to continue check */
- if (ff_rss_tbl[idx].sip == INADDR_ANY) {
- return -1;
- }
-
- if (ff_rss_tbl[idx].sip == sip && ff_rss_tbl[idx].sport == sport) {
- ori_dip_idx = dip_idx = dip & FF_RSS_TBL_MAX_DIP_MASK;
- do {
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip == INADDR_ANY) {
- return -1;
- }
-
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip == dip) {
- return ff_rss_tbl[idx].dip_tbl[dip_idx].dport_stat[dport];
- }
-
- dip_idx++;
- dip_idx &= FF_RSS_TBL_MAX_DIP_MASK;
- } while (dip_idx != ori_dip_idx);
-
- if (dip_idx == ori_dip_idx) {
- return -1;
- }
- }
-
- idx++;
- idx &= FF_RSS_TBL_SIP_ENTRIES_MASK;
- } while (idx != ori_idx);
-
- if (idx == ori_idx) {
- return -1;
- }
-
- return -1;
-}
-
-int
-ff_rss_check(void *softc, uint32_t saddr, uint32_t daddr,
- uint16_t sport, uint16_t dport)
-{
- struct lcore_conf *qconf = &lcore_conf;
- struct ff_dpdk_if_context *ctx = ff_veth_softc_to_hostc(softc);
- uint16_t nb_queues = qconf->nb_queue_list[ctx->port_id];
- uint16_t queueid;
-
- int stat;
- int ret;
- uint64_t prev_tsc, cur_tsc;
-
-
- if (nb_queues <= 1) {
- return 1;
- }
-
- queueid = qconf->tx_queue_id[ctx->port_id];
-
- if (ff_rss_tbl_init_flag == FF_RSS_TBL_NOT_INIT) {
- prev_tsc = rte_rdtsc();
- ff_rss_tbl_init(softc, saddr, daddr, sport);
- cur_tsc = rte_rdtsc();
- printf("Init rss tbl success, diff_tsc %lu, port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- cur_tsc - prev_tsc, ctx->port_id, queueid,
- saddr, daddr, sport, dport);
- }
-
- uint8_t data[sizeof(saddr) + sizeof(daddr) + sizeof(sport) +
- sizeof(dport)];
- unsigned datalen = 0;
-
- bcopy(&saddr, &data[datalen], sizeof(saddr));
- datalen += sizeof(saddr);
-
- bcopy(&daddr, &data[datalen], sizeof(daddr));
- datalen += sizeof(daddr);
-
- bcopy(&sport, &data[datalen], sizeof(sport));
- datalen += sizeof(sport);
-
- bcopy(&dport, &data[datalen], sizeof(dport));
- datalen += sizeof(dport);
-
- if (ff_rss_tbl_init_flag == FF_RSS_TBL_INITED) {
- uint32_t idx = 0;
- uint64_t hash_val;
-
- prev_tsc = rte_rdtsc();
- ret = ff_rss_tbl_get(saddr, daddr, sport, dport);
- cur_tsc = rte_rdtsc();
- if (ret >= 0) {
- stat = ret;
- printf("Get rss tbl success, diff_tsc %lu, stat %d, port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- cur_tsc - prev_tsc, stat, ctx->port_id, queueid,
- saddr, daddr, sport, dport);
- return stat;
- } else {
- // do nothing
- printf("Get rss tbl failed %d, diff_tsc %lu, fall back to toeplitz_hash,"
- " port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- ret, cur_tsc - prev_tsc, ctx->port_id, queueid,
- saddr, daddr, sport, dport);
- }
- }
-
- uint16_t reta_size = rss_reta_size[ctx->port_id];
- uint32_t hash = 0;
- prev_tsc = rte_rdtsc();
- hash = toeplitz_hash(rsskey_len, rsskey, datalen, data);
- stat = ((hash & (reta_size - 1)) % nb_queues) == queueid;
- cur_tsc = rte_rdtsc();
- /*printf("toeplitz_hash diff tsc %lu, stat %d, port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- cur_tsc - prev_tsc, stat, ctx->port_id, queueid,
- saddr, daddr, sport, dport);*/
-
- return stat;
-}
-
-void
-ff_regist_packet_dispatcher(dispatch_func_t func)
-{
- packet_dispatcher = func;
-}
-
-uint64_t
-ff_get_tsc_ns()
-{
- uint64_t cur_tsc = rte_rdtsc();
- uint64_t hz = rte_get_tsc_hz();
- return ((double)cur_tsc/(double)hz) * NS_PER_S;
-}
-
+++ /dev/null
-/*
- * Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <assert.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <errno.h>
-
-#include <rte_common.h>
-#include <rte_byteorder.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memcpy.h>
-#include <rte_memzone.h>
-#include <rte_config.h>
-#include <rte_eal.h>
-#include <rte_pci.h>
-#include <rte_mbuf.h>
-#include <rte_memory.h>
-#include <rte_lcore.h>
-#include <rte_launch.h>
-#include <rte_ethdev.h>
-#include <rte_debug.h>
-#include <rte_common.h>
-#include <rte_ether.h>
-#include <rte_malloc.h>
-#include <rte_cycles.h>
-#include <rte_timer.h>
-#include <rte_thash.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
-#include <rte_udp.h>
-#include <rte_eth_bond.h>
-#include <rte_eth_bond_8023ad.h>
-#include <rte_hash.h>
-
-#include "ff_dpdk_if.h"
-#include "ff_dpdk_pcap.h"
-#include "ff_dpdk_kni.h"
-#include "ff_config.h"
-#include "ff_veth.h"
-#include "ff_host_interface.h"
-#include "ff_msg.h"
-#include "ff_api.h"
-#include "ff_memory.h"
-
-#ifdef FF_KNI
-#define KNI_MBUF_MAX 2048
-#define KNI_QUEUE_SIZE KNI_MBUF_MAX
-
-int enable_kni = 0;
-static int kni_accept;
-static int knictl_action = FF_KNICTL_ACTION_DEFAULT;
-#endif
-int nb_dev_ports = 0; /* primary is correct, secondary is not correct, but no impact now*/
-
-static int numa_on;
-
-static unsigned idle_sleep;
-static unsigned pkt_tx_delay;
-static uint64_t usr_cb_tsc;
-static int stop_loop;
-
-static struct rte_timer freebsd_clock;
-
-// Mellanox Linux's driver key
-static uint8_t default_rsskey_40bytes[40] = {
- 0xd1, 0x81, 0xc6, 0x2c, 0xf7, 0xf4, 0xdb, 0x5b,
- 0x19, 0x83, 0xa2, 0xfc, 0x94, 0x3e, 0x1a, 0xdb,
- 0xd9, 0x38, 0x9e, 0x6b, 0xd1, 0x03, 0x9c, 0x2c,
- 0xa7, 0x44, 0x99, 0xad, 0x59, 0x3d, 0x56, 0xd9,
- 0xf3, 0x25, 0x3c, 0x06, 0x2a, 0xdc, 0x1f, 0xfc
-};
-
-static uint8_t default_rsskey_52bytes[52] = {
- 0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23,
- 0xb6, 0x75, 0xea, 0x5b, 0x12, 0x4f, 0x9f, 0x30,
- 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02,
- 0xa0, 0x8c, 0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c,
- 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8, 0x55,
- 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e,
- 0x81, 0x15, 0x03, 0x66
-};
-
-static uint8_t symmetric_rsskey[52] = {
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a,
- 0x6d, 0x5a, 0x6d, 0x5a
-};
-
-static int rsskey_len = sizeof(default_rsskey_40bytes);
-static uint8_t *rsskey = default_rsskey_40bytes;
-
-struct lcore_conf lcore_conf;
-
-struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
-
-static pcblddr_func_t pcblddr_fun;
-
-static struct rte_ring **dispatch_ring[RTE_MAX_ETHPORTS];
-static dispatch_func_t packet_dispatcher;
-
-static uint16_t rss_reta_size[RTE_MAX_ETHPORTS];
-
-#define BOND_DRIVER_NAME "net_bonding"
-
-static inline int send_single_packet(struct rte_mbuf *m, uint8_t port);
-
-struct ff_msg_ring {
- char ring_name[FF_MSG_NUM][RTE_RING_NAMESIZE];
- /* ring[0] for lcore recv msg, other send */
- /* ring[1] for lcore send msg, other read */
- struct rte_ring *ring[FF_MSG_NUM];
-} __rte_cache_aligned;
-
-static struct ff_msg_ring msg_ring[RTE_MAX_LCORE];
-static struct rte_mempool *message_pool;
-static struct ff_dpdk_if_context *veth_ctx[RTE_MAX_ETHPORTS];
-
-static struct ff_top_args ff_top_status;
-static struct ff_traffic_args ff_traffic;
-extern void ff_hardclock(void);
-
-static void
-ff_hardclock_job(__rte_unused struct rte_timer *timer,
- __rte_unused void *arg) {
- ff_hardclock();
- ff_update_current_ts();
-}
-
-struct ff_dpdk_if_context *
-ff_dpdk_register_if(void *sc, void *ifp, struct ff_port_cfg *cfg)
-{
- struct ff_dpdk_if_context *ctx;
-
- ctx = calloc(1, sizeof(struct ff_dpdk_if_context));
- if (ctx == NULL)
- return NULL;
-
- ctx->sc = sc;
- ctx->ifp = ifp;
- ctx->port_id = cfg->port_id;
- ctx->hw_features = cfg->hw_features;
-
- return ctx;
-}
-
-void
-ff_dpdk_deregister_if(struct ff_dpdk_if_context *ctx)
-{
- free(ctx);
-}
-
-static void
-check_all_ports_link_status(void)
-{
- #define CHECK_INTERVAL 100 /* 100ms */
- #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
-
- uint16_t portid;
- uint8_t count, all_ports_up, print_flag = 0;
- struct rte_eth_link link;
-
- printf("\nChecking link status");
- fflush(stdout);
-
- int i, nb_ports;
- nb_ports = ff_global_cfg.dpdk.nb_ports;
- for (count = 0; count <= MAX_CHECK_TIME; count++) {
- all_ports_up = 1;
- for (i = 0; i < nb_ports; i++) {
- uint16_t portid = ff_global_cfg.dpdk.portid_list[i];
- memset(&link, 0, sizeof(link));
- rte_eth_link_get_nowait(portid, &link);
-
- /* print link status if flag set */
- if (print_flag == 1) {
- if (link.link_status) {
- printf("Port %d Link Up - speed %u "
- "Mbps - %s\n", (int)portid,
- (unsigned)link.link_speed,
- (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
- ("full-duplex") : ("half-duplex\n"));
- } else {
- printf("Port %d Link Down\n", (int)portid);
- }
- continue;
- }
- /* clear all_ports_up flag if any link down */
- if (link.link_status == 0) {
- all_ports_up = 0;
- break;
- }
- }
-
- /* after finally printing all link status, get out */
- if (print_flag == 1)
- break;
-
- if (all_ports_up == 0) {
- printf(".");
- fflush(stdout);
- rte_delay_ms(CHECK_INTERVAL);
- }
-
- /* set the print_flag if all ports up or timeout */
- if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
- print_flag = 1;
- printf("done\n");
- }
- }
-}
-
-static int
-init_lcore_conf(void)
-{
- if (nb_dev_ports == 0) {
- nb_dev_ports = rte_eth_dev_count_avail();
- }
- if (nb_dev_ports == 0) {
- rte_exit(EXIT_FAILURE, "No probed ethernet devices\n");
- }
-
- if (ff_global_cfg.dpdk.max_portid >= nb_dev_ports) {
- rte_exit(EXIT_FAILURE, "this machine doesn't have port %d.\n",
- ff_global_cfg.dpdk.max_portid);
- }
-
- lcore_conf.port_cfgs = ff_global_cfg.dpdk.port_cfgs;
- lcore_conf.proc_id = ff_global_cfg.dpdk.proc_id;
-
- uint16_t socket_id = 0;
- if (numa_on) {
- socket_id = rte_lcore_to_socket_id(rte_lcore_id());
- }
-
- lcore_conf.socket_id = socket_id;
-
- uint16_t lcore_id = ff_global_cfg.dpdk.proc_lcore[lcore_conf.proc_id];
- if (!rte_lcore_is_enabled(lcore_id)) {
- rte_exit(EXIT_FAILURE, "lcore %u unavailable\n", lcore_id);
- }
-
- int j;
- for (j = 0; j < ff_global_cfg.dpdk.nb_ports; ++j) {
- uint16_t port_id = ff_global_cfg.dpdk.portid_list[j];
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id];
-
- int queueid = -1;
- int i;
- for (i = 0; i < pconf->nb_lcores; i++) {
- if (pconf->lcore_list[i] == lcore_id) {
- queueid = i;
- }
- }
- if (queueid < 0) {
- continue;
- }
- printf("lcore: %u, port: %u, queue: %u\n", lcore_id, port_id, queueid);
- uint16_t nb_rx_queue = lcore_conf.nb_rx_queue;
- lcore_conf.rx_queue_list[nb_rx_queue].port_id = port_id;
- lcore_conf.rx_queue_list[nb_rx_queue].queue_id = queueid;
- lcore_conf.nb_rx_queue++;
-
- lcore_conf.tx_queue_id[port_id] = queueid;
- lcore_conf.tx_port_id[lcore_conf.nb_tx_port] = port_id;
- lcore_conf.nb_tx_port++;
-
- /* Enable pcap dump */
- if (ff_global_cfg.pcap.enable) {
- ff_enable_pcap(ff_global_cfg.pcap.save_path, ff_global_cfg.pcap.snap_len);
- }
-
- lcore_conf.nb_queue_list[port_id] = pconf->nb_lcores;
- }
-
- if (lcore_conf.nb_rx_queue == 0) {
- rte_exit(EXIT_FAILURE, "lcore %u has nothing to do\n", lcore_id);
- }
-
- return 0;
-}
-
-static int
-init_mem_pool(void)
-{
- uint8_t nb_ports = ff_global_cfg.dpdk.nb_ports;
- uint32_t nb_lcores = ff_global_cfg.dpdk.nb_procs;
- uint32_t nb_tx_queue = nb_lcores;
- uint32_t nb_rx_queue = lcore_conf.nb_rx_queue * nb_lcores;
- uint16_t max_portid = ff_global_cfg.dpdk.max_portid;
-
- unsigned nb_mbuf = RTE_ALIGN_CEIL (
- (nb_rx_queue * (max_portid + 1) * 2 * RX_QUEUE_SIZE +
- nb_ports * (max_portid + 1) * 2 * nb_lcores * MAX_PKT_BURST +
- nb_ports * (max_portid + 1) * 2 * nb_tx_queue * TX_QUEUE_SIZE +
- nb_lcores * MEMPOOL_CACHE_SIZE +
-#ifdef FF_KNI
- nb_ports * KNI_MBUF_MAX +
- nb_ports * KNI_QUEUE_SIZE +
-#endif
- nb_lcores * nb_ports * DISPATCH_RING_SIZE),
- (unsigned)8192);
-
- unsigned socketid = 0;
- uint16_t i, lcore_id;
- char s[64];
-
- for (i = 0; i < ff_global_cfg.dpdk.nb_procs; i++) {
- lcore_id = ff_global_cfg.dpdk.proc_lcore[i];
- if (numa_on) {
- socketid = rte_lcore_to_socket_id(lcore_id);
- }
-
- if (socketid >= NB_SOCKETS) {
- rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
- socketid, i, NB_SOCKETS);
- }
-
- if (pktmbuf_pool[socketid] != NULL) {
- continue;
- }
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
- pktmbuf_pool[socketid] =
- rte_pktmbuf_pool_create(s, nb_mbuf,
- MEMPOOL_CACHE_SIZE, 0,
- RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
- } else {
- snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
- pktmbuf_pool[socketid] = rte_mempool_lookup(s);
- }
-
- if (pktmbuf_pool[socketid] == NULL) {
- rte_exit(EXIT_FAILURE, "Cannot create mbuf pool on socket %d\n", socketid);
- } else {
- printf("create mbuf pool on socket %d\n", socketid);
- }
-
-#ifdef FF_USE_PAGE_ARRAY
- nb_mbuf = RTE_ALIGN_CEIL (
- nb_ports*nb_lcores*MAX_PKT_BURST +
- nb_ports*nb_tx_queue*TX_QUEUE_SIZE +
- nb_lcores*MEMPOOL_CACHE_SIZE,
- (unsigned)4096);
- ff_init_ref_pool(nb_mbuf, socketid);
-#endif
- }
-
- return 0;
-}
-
-static struct rte_ring *
-create_ring(const char *name, unsigned count, int socket_id, unsigned flags)
-{
- struct rte_ring *ring;
-
- if (name == NULL) {
- rte_exit(EXIT_FAILURE, "create ring failed, no name!\n");
- }
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- ring = rte_ring_create(name, count, socket_id, flags);
- } else {
- ring = rte_ring_lookup(name);
- }
-
- if (ring == NULL) {
- rte_exit(EXIT_FAILURE, "create ring:%s failed!\n", name);
- }
-
- return ring;
-}
-
-static int
-init_dispatch_ring(void)
-{
- int j;
- char name_buf[RTE_RING_NAMESIZE];
- int queueid;
-
- unsigned socketid = lcore_conf.socket_id;
-
- /* Create ring according to ports actually being used. */
- int nb_ports = ff_global_cfg.dpdk.nb_ports;
- for (j = 0; j < nb_ports; j++) {
- uint16_t portid = ff_global_cfg.dpdk.portid_list[j];
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[portid];
- int nb_queues = pconf->nb_lcores;
- if (dispatch_ring[portid] == NULL) {
- snprintf(name_buf, RTE_RING_NAMESIZE, "ring_ptr_p%d", portid);
-
- dispatch_ring[portid] = rte_zmalloc(name_buf,
- sizeof(struct rte_ring *) * nb_queues,
- RTE_CACHE_LINE_SIZE);
- if (dispatch_ring[portid] == NULL) {
- rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) "
- "failed\n", name_buf);
- }
- }
-
- for(queueid = 0; queueid < nb_queues; ++queueid) {
- snprintf(name_buf, RTE_RING_NAMESIZE, "dispatch_ring_p%d_q%d",
- portid, queueid);
- dispatch_ring[portid][queueid] = create_ring(name_buf,
- DISPATCH_RING_SIZE, socketid, RING_F_SC_DEQ);
-
- if (dispatch_ring[portid][queueid] == NULL)
- rte_panic("create ring:%s failed!\n", name_buf);
-
- printf("create ring:%s success, %u ring entries are now free!\n",
- name_buf, rte_ring_free_count(dispatch_ring[portid][queueid]));
- }
- }
-
- return 0;
-}
-
-static void
-ff_msg_init(struct rte_mempool *mp,
- __attribute__((unused)) void *opaque_arg,
- void *obj, __attribute__((unused)) unsigned i)
-{
- struct ff_msg *msg = (struct ff_msg *)obj;
- msg->msg_type = FF_UNKNOWN;
- msg->buf_addr = (char *)msg + sizeof(struct ff_msg);
- msg->buf_len = mp->elt_size - sizeof(struct ff_msg);
- msg->original_buf = NULL;
- msg->original_buf_len = 0;
-}
-
-static int
-init_msg_ring(void)
-{
- uint16_t i, j;
- uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs;
- unsigned socketid = lcore_conf.socket_id;
-
- /* Create message buffer pool */
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- message_pool = rte_mempool_create(FF_MSG_POOL,
- MSG_RING_SIZE * 2 * nb_procs,
- MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0,
- NULL, NULL, ff_msg_init, NULL,
- socketid, 0);
- } else {
- message_pool = rte_mempool_lookup(FF_MSG_POOL);
- }
-
- if (message_pool == NULL) {
- rte_panic("Create msg mempool failed\n");
- }
-
- for(i = 0; i < nb_procs; ++i) {
- snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE,
- "%s%u", FF_MSG_RING_IN, i);
- msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0],
- MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ);
- if (msg_ring[i].ring[0] == NULL)
- rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]);
-
- for (j = FF_SYSCTL; j < FF_MSG_NUM; j++) {
- snprintf(msg_ring[i].ring_name[j], RTE_RING_NAMESIZE,
- "%s%u_%u", FF_MSG_RING_OUT, i, j);
- msg_ring[i].ring[j] = create_ring(msg_ring[i].ring_name[j],
- MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ);
- if (msg_ring[i].ring[j] == NULL)
- rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[j]);
- }
- }
-
- return 0;
-}
-
-#ifdef FF_KNI
-
-static enum FF_KNICTL_CMD get_kni_action(const char *c){
- if (!c)
- return FF_KNICTL_ACTION_DEFAULT;
- if (0 == strcasecmp(c, "alltokni")){
- return FF_KNICTL_ACTION_ALL_TO_KNI;
- } else if (0 == strcasecmp(c, "alltoff")){
- return FF_KNICTL_ACTION_ALL_TO_FF;
- } else if (0 == strcasecmp(c, "default")){
- return FF_KNICTL_ACTION_DEFAULT;
- } else {
- return FF_KNICTL_ACTION_DEFAULT;
- }
-}
-
-static int
-init_kni(void)
-{
- int nb_ports = nb_dev_ports;
-
- kni_accept = 0;
-
- if(strcasecmp(ff_global_cfg.kni.method, "accept") == 0)
- kni_accept = 1;
-
- knictl_action = get_kni_action(ff_global_cfg.kni.kni_action);
-
- ff_kni_init(nb_ports, ff_global_cfg.kni.type, ff_global_cfg.kni.tcp_port,
- ff_global_cfg.kni.udp_port);
-
- unsigned socket_id = lcore_conf.socket_id;
- struct rte_mempool *mbuf_pool = pktmbuf_pool[socket_id];
-
- nb_ports = ff_global_cfg.dpdk.nb_ports;
- int i, ret;
- for (i = 0; i < nb_ports; i++) {
- uint16_t port_id = ff_global_cfg.dpdk.portid_list[i];
- ff_kni_alloc(port_id, socket_id, ff_global_cfg.kni.type, i, mbuf_pool, KNI_QUEUE_SIZE);
- }
-
- return 0;
-}
-#endif
-
-//RSS reta update will failed when enable flow isolate
-#if !defined(FF_FLOW_ISOLATE) && !defined(FF_FLOW_IPIP)
-static void
-set_rss_table(uint16_t port_id, uint16_t reta_size, uint16_t nb_queues)
-{
- if (reta_size == 0) {
- return;
- }
-
- int reta_conf_size = RTE_MAX(1, reta_size / RTE_ETH_RETA_GROUP_SIZE);
- struct rte_eth_rss_reta_entry64 reta_conf[reta_conf_size];
-
- /* config HW indirection table */
- unsigned i, j, hash=0;
- for (i = 0; i < reta_conf_size; i++) {
- reta_conf[i].mask = ~0ULL;
- for (j = 0; j < RTE_ETH_RETA_GROUP_SIZE; j++) {
- reta_conf[i].reta[j] = hash++ % nb_queues;
- }
- }
-
- if (rte_eth_dev_rss_reta_update(port_id, reta_conf, reta_size)) {
- rte_exit(EXIT_FAILURE, "port[%d], failed to update rss table\n",
- port_id);
- }
-}
-#endif
-
-static int
-init_port_start(void)
-{
- int nb_ports = ff_global_cfg.dpdk.nb_ports, total_nb_ports;
- unsigned socketid = 0;
- struct rte_mempool *mbuf_pool;
- uint16_t i, j;
-
- total_nb_ports = nb_ports;
-#ifdef FF_KNI
- if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
-#ifdef FF_KNI_KNI
- if (ff_global_cfg.kni.type == KNI_TYPE_VIRTIO)
-#endif
- {
- total_nb_ports *= 2; /* one more virtio_user port for kernel per port */
- }
- }
-#endif
-
- for (i = 0; i < total_nb_ports; i++) {
- uint16_t port_id, u_port_id;
- struct ff_port_cfg *pconf = NULL;
- uint16_t nb_queues;
- int nb_slaves;
-
- if (i < nb_ports) {
- u_port_id = ff_global_cfg.dpdk.portid_list[i];
- pconf = &ff_global_cfg.dpdk.port_cfgs[u_port_id];
- nb_queues = pconf->nb_lcores;
- nb_slaves = pconf->nb_slaves;
-
- if (nb_slaves > 0) {
- rte_eth_bond_8023ad_dedicated_queues_enable(u_port_id);
- }
- } else {
- /* kernel virtio user, port id start from `nb_dev_ports` */
- u_port_id = i - nb_ports + nb_dev_ports;
- nb_queues = 1; /* see ff_kni_alloc in ff_dpdk_kni.c */
- nb_slaves = 0;
- }
-
- for (j = 0; j <= nb_slaves; j++) {
- if (j < nb_slaves) {
- port_id = pconf->slave_portid_list[j];
- printf("To init %s's %d'st slave port[%d]\n",
- ff_global_cfg.dpdk.bond_cfgs->name,
- j, port_id);
- } else {
- port_id = u_port_id;
- }
-
- struct rte_eth_dev_info dev_info;
- struct rte_eth_conf port_conf = {0};
- struct rte_eth_rxconf rxq_conf;
- struct rte_eth_txconf txq_conf;
-
- int ret = rte_eth_dev_info_get(port_id, &dev_info);
- if (ret != 0)
- rte_exit(EXIT_FAILURE,
- "Error during getting device (port %u) info: %s\n",
- port_id, strerror(-ret));
-
- if (nb_queues > dev_info.max_rx_queues) {
- rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_rx_queues[%d]\n",
- nb_queues,
- dev_info.max_rx_queues);
- }
-
- if (nb_queues > dev_info.max_tx_queues) {
- rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_tx_queues[%d]\n",
- nb_queues,
- dev_info.max_tx_queues);
- }
-
- struct rte_ether_addr addr;
- rte_eth_macaddr_get(port_id, &addr);
- printf("Port %u MAC:"RTE_ETHER_ADDR_PRT_FMT"\n",
- (unsigned)port_id, RTE_ETHER_ADDR_BYTES(&addr));
-
- /* Only config dev port, but not kernel virtio user port */
- if (pconf) {
- rte_memcpy(pconf->mac,
- addr.addr_bytes, RTE_ETHER_ADDR_LEN);
-
- /* Set RSS mode */
- if (dev_info.flow_type_rss_offloads) {
- uint64_t default_rss_hf = RTE_ETH_RSS_PROTO_MASK;
- port_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
- port_conf.rx_adv_conf.rss_conf.rss_hf = default_rss_hf;
- if (dev_info.hash_key_size == 52) {
- rsskey = default_rsskey_52bytes;
- rsskey_len = 52;
- }
- if (ff_global_cfg.dpdk.symmetric_rss) {
- printf("Use symmetric Receive-side Scaling(RSS) key\n");
- rsskey = symmetric_rsskey;
- }
- port_conf.rx_adv_conf.rss_conf.rss_key = rsskey;
- port_conf.rx_adv_conf.rss_conf.rss_key_len = rsskey_len;
- port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads;
- if (port_conf.rx_adv_conf.rss_conf.rss_hf !=
- RTE_ETH_RSS_PROTO_MASK) {
- printf("Port %u modified RSS hash function based on hardware support,"
- "requested:%#"PRIx64" configured:%#"PRIx64"\n",
- port_id, default_rss_hf,
- port_conf.rx_adv_conf.rss_conf.rss_hf);
- }
- }
-
- if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
- port_conf.txmode.offloads |=
- RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
- }
-
- /* Set Rx VLAN stripping */
- if (ff_global_cfg.dpdk.vlan_strip) {
- if (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP) {
- port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
- }
- }
-
- /* Enable HW CRC stripping */
- port_conf.rxmode.offloads &= ~RTE_ETH_RX_OFFLOAD_KEEP_CRC;
-
- /* FIXME: Enable TCP LRO ?*/
- #if 0
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) {
- printf("LRO is supported\n");
- port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_LRO;
- pconf->hw_features.rx_lro = 1;
- }
- #endif
-
- /* Set Rx checksum checking */
- if ((dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) &&
- (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_UDP_CKSUM) &&
- (dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_CKSUM)) {
- printf("RX checksum offload supported\n");
- port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_CHECKSUM;
- pconf->hw_features.rx_csum = 1;
- }
-
- if (ff_global_cfg.dpdk.tx_csum_offoad_skip == 0) {
- if ((dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) {
- printf("TX ip checksum offload supported\n");
- port_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
- pconf->hw_features.tx_csum_ip = 1;
- }
-
- if ((dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) &&
- (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
- printf("TX TCP&UDP checksum offload supported\n");
- port_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
- pconf->hw_features.tx_csum_l4 = 1;
- }
- } else {
- printf("TX checksum offoad is disabled\n");
- }
-
- if (ff_global_cfg.dpdk.tso) {
- if (dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
- printf("TSO is supported\n");
- port_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
- pconf->hw_features.tx_tso = 1;
- }
- else {
- printf("TSO is not supported\n");
- }
- } else {
- printf("TSO is disabled\n");
- }
-
- if (dev_info.reta_size) {
- /* reta size must be power of 2 */
- assert((dev_info.reta_size & (dev_info.reta_size - 1)) == 0);
-
- rss_reta_size[port_id] = dev_info.reta_size;
- printf("port[%d]: rss table size: %d\n", port_id,
- dev_info.reta_size);
- }
- }
-
- if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
- continue;
- }
-
- ret = rte_eth_dev_configure(port_id, nb_queues, nb_queues, &port_conf);
- if (ret != 0) {
- return ret;
- }
-
- static uint16_t nb_rxd = RX_QUEUE_SIZE;
- static uint16_t nb_txd = TX_QUEUE_SIZE;
- ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
- if (ret < 0)
- printf("Could not adjust number of descriptors "
- "for port%u (%d)\n", (unsigned)port_id, ret);
-
- uint16_t q;
- for (q = 0; q < nb_queues; q++) {
- if (numa_on) {
- uint16_t lcore_id = lcore_conf.port_cfgs[u_port_id].lcore_list[q];
- socketid = rte_lcore_to_socket_id(lcore_id);
- }
- mbuf_pool = pktmbuf_pool[socketid];
-
- txq_conf = dev_info.default_txconf;
- txq_conf.offloads = port_conf.txmode.offloads;
- ret = rte_eth_tx_queue_setup(port_id, q, nb_txd,
- socketid, &txq_conf);
- if (ret < 0) {
- return ret;
- }
-
- rxq_conf = dev_info.default_rxconf;
- rxq_conf.offloads = port_conf.rxmode.offloads;
- ret = rte_eth_rx_queue_setup(port_id, q, nb_rxd,
- socketid, &rxq_conf, mbuf_pool);
- if (ret < 0) {
- return ret;
- }
- }
-
- if (strncmp(dev_info.driver_name, BOND_DRIVER_NAME,
- strlen(dev_info.driver_name)) == 0) {
-
- rte_eth_macaddr_get(port_id, &addr);
- printf("Port %u MAC:"RTE_ETHER_ADDR_PRT_FMT"\n",
- (unsigned)port_id, RTE_ETHER_ADDR_BYTES(&addr));
-
- rte_memcpy(pconf->mac,
- addr.addr_bytes, RTE_ETHER_ADDR_LEN);
-
- int mode, count, x;
- uint16_t slaves[RTE_MAX_ETHPORTS], len = RTE_MAX_ETHPORTS;
-
- mode = rte_eth_bond_mode_get(port_id);
- printf("Port %u, bond mode:%d\n", port_id, mode);
-
- count = rte_eth_bond_members_get(port_id, slaves, len);
- printf("Port %u, %s's slave ports count:%d\n", port_id,
- ff_global_cfg.dpdk.bond_cfgs->name, count);
- for (x=0; x<count; x++) {
- printf("Port %u, %s's slave port[%u]\n", port_id,
- ff_global_cfg.dpdk.bond_cfgs->name, slaves[x]);
- }
- }
-
- ret = rte_eth_dev_start(port_id);
- if (ret < 0) {
- return ret;
- }
-
-//RSS reta update will failed when enable flow isolate
-#if !defined(FF_FLOW_ISOLATE) && !defined(FF_FLOW_IPIP)
- if (nb_queues > 1) {
- /*
- * FIXME: modify RSS set to FDIR
- */
- set_rss_table(port_id, dev_info.reta_size, nb_queues);
- }
-#endif
-
- /* Enable RX in promiscuous mode for the Ethernet device. */
- if (ff_global_cfg.dpdk.promiscuous) {
- ret = rte_eth_promiscuous_enable(port_id);
- if (ret == 0) {
- printf("set port %u to promiscuous mode ok\n", port_id);
- } else {
- printf("set port %u to promiscuous mode error\n", port_id);
- }
- }
- }
- }
-
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- check_all_ports_link_status();
- }
-
- return 0;
-}
-
-static int
-init_clock(void)
-{
- rte_timer_subsystem_init();
- uint64_t hz = rte_get_timer_hz();
- uint64_t intrs = US_PER_S / ff_global_cfg.freebsd.hz;
- uint64_t tsc = (hz + US_PER_S - 1) / US_PER_S * intrs;
-
- rte_timer_init(&freebsd_clock);
- rte_timer_reset(&freebsd_clock, tsc, PERIODICAL,
- rte_lcore_id(), &ff_hardclock_job, NULL);
-
- ff_update_current_ts();
-
- return 0;
-}
-
-#if defined(FF_FLOW_ISOLATE) || defined(FF_FDIR)
-/** Print a message out of a flow error. */
-static int
-port_flow_complain(struct rte_flow_error *error)
-{
- static const char *const errstrlist[] = {
- [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
- [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
- [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
- [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
- [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
- [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
- [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
- [RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER] = "transfer field",
- [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
- [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
- [RTE_FLOW_ERROR_TYPE_ITEM_SPEC] = "item specification",
- [RTE_FLOW_ERROR_TYPE_ITEM_LAST] = "item specification range",
- [RTE_FLOW_ERROR_TYPE_ITEM_MASK] = "item specification mask",
- [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
- [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
- [RTE_FLOW_ERROR_TYPE_ACTION_CONF] = "action configuration",
- [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
- };
- const char *errstr;
- char buf[32];
- int err = rte_errno;
-
- if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
- !errstrlist[error->type])
- errstr = "unknown type";
- else
- errstr = errstrlist[error->type];
- printf("Caught error type %d (%s): %s%s: %s\n",
- error->type, errstr,
- error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
- error->cause), buf) : "",
- error->message ? error->message : "(no stated reason)",
- rte_strerror(err));
- return -err;
-}
-#endif
-
-
-#ifdef FF_FLOW_ISOLATE
-static int
-port_flow_isolate(uint16_t port_id, int set)
-{
- struct rte_flow_error error;
-
- /* Poisoning to make sure PMDs update it in case of error. */
- memset(&error, 0x66, sizeof(error));
- if (rte_flow_isolate(port_id, set, &error))
- return port_flow_complain(&error);
- printf("Ingress traffic on port %u is %s to the defined flow rules\n",
- port_id,
- set ? "now restricted" : "not restricted anymore");
- return 0;
-}
-
-static int
-create_tcp_flow(uint16_t port_id, uint16_t tcp_port) {
- struct rte_flow_attr attr = {.ingress = 1};
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id];
- int nb_queues = pconf->nb_lcores;
- uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
- int i = 0, j = 0;
- for (i = 0, j = 0; i < nb_queues; ++i)
- queue[j++] = i;
- struct rte_flow_action_rss rss = {
- .types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
- .key_len = rsskey_len,
- .key = rsskey,
- .queue_num = j,
- .queue = queue,
- };
-
- struct rte_eth_dev_info dev_info;
- int ret = rte_eth_dev_info_get(port_id, &dev_info);
- if (ret != 0)
- rte_exit(EXIT_FAILURE, "Error during getting device (port %u) info: %s\n", port_id, strerror(-ret));
-
- struct rte_flow_item pattern[3];
- struct rte_flow_action action[2];
- struct rte_flow_item_tcp tcp_spec;
- struct rte_flow_item_tcp tcp_mask = {
- .hdr = {
- .src_port = RTE_BE16(0x0000),
- .dst_port = RTE_BE16(0xffff),
- },
- };
- struct rte_flow_error error;
-
- memset(pattern, 0, sizeof(pattern));
- memset(action, 0, sizeof(action));
-
- /* set the dst ipv4 packet to the required value */
- pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4;
-
- memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
- tcp_spec.hdr.dst_port = rte_cpu_to_be_16(tcp_port);
- pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP;
- pattern[1].spec = &tcp_spec;
- pattern[1].mask = &tcp_mask;
-
- /* end the pattern array */
- pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
-
- /* create the action */
- action[0].type = RTE_FLOW_ACTION_TYPE_RSS;
- action[0].conf = &rss;
- action[1].type = RTE_FLOW_ACTION_TYPE_END;
-
- struct rte_flow *flow;
- /* validate and create the flow rule */
- if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern, action, &error);
- if (!flow) {
- return port_flow_complain(&error);
- }
- }
-
- memset(pattern, 0, sizeof(pattern));
-
- /* set the dst ipv4 packet to the required value */
- pattern[0].type = RTE_FLOW_ITEM_TYPE_IPV4;
-
- struct rte_flow_item_tcp tcp_src_mask = {
- .hdr = {
- .src_port = RTE_BE16(0xffff),
- .dst_port = RTE_BE16(0x0000),
- },
- };
-
- memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
- tcp_spec.hdr.src_port = rte_cpu_to_be_16(tcp_port);
- pattern[1].type = RTE_FLOW_ITEM_TYPE_TCP;
- pattern[1].spec = &tcp_spec;
- pattern[1].mask = &tcp_src_mask;
-
- /* end the pattern array */
- pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
-
- /* validate and create the flow rule */
- if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern, action, &error);
- if (!flow) {
- return port_flow_complain(&error);
- }
- }
-
- return 1;
-}
-
-static int
-init_flow(uint16_t port_id, uint16_t tcp_port) {
- // struct ff_flow_cfg fcfg = ff_global_cfg.dpdk.flow_cfgs[0];
-
- // int i;
- // for (i = 0; i < fcfg.nb_port; i++) {
- // if(!create_tcp_flow(fcfg.port_id, fcfg.tcp_ports[i])) {
- // return 0;
- // }
- // }
-
- if(!create_tcp_flow(port_id, tcp_port)) {
- rte_exit(EXIT_FAILURE, "create tcp flow failed\n");
- return -1;
- }
-
- /* ARP rule */
- struct rte_flow_attr attr = {.ingress = 1};
- struct rte_flow_action_queue queue = {.index = 0};
-
- struct rte_flow_item pattern_[2];
- struct rte_flow_action action[2];
- struct rte_flow_item_eth eth_type = {.type = RTE_BE16(0x0806)};
- struct rte_flow_item_eth eth_mask = {
- .type = RTE_BE16(0xffff)
- };
-
- memset(pattern_, 0, sizeof(pattern_));
- memset(action, 0, sizeof(action));
-
- pattern_[0].type = RTE_FLOW_ITEM_TYPE_ETH;
- pattern_[0].spec = ð_type;
- pattern_[0].mask = ð_mask;
-
- pattern_[1].type = RTE_FLOW_ITEM_TYPE_END;
-
- /* create the action */
- action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
- action[0].conf = &queue;
- action[1].type = RTE_FLOW_ACTION_TYPE_END;
-
- struct rte_flow *flow;
- struct rte_flow_error error;
- /* validate and create the flow rule */
- if (!rte_flow_validate(port_id, &attr, pattern_, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern_, action, &error);
- if (!flow) {
- return port_flow_complain(&error);
- }
- }
-
- return 1;
-}
-
-#endif
-
-#ifdef FF_FLOW_IPIP
-static int
-create_ipip_flow(uint16_t port_id) {
- struct rte_flow_attr attr = {.ingress = 1};
- struct ff_port_cfg *pconf = &ff_global_cfg.dpdk.port_cfgs[port_id];
- int nb_queues = pconf->nb_lcores;
- uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
- // 1. Queue configuration check
- if (nb_queues > RTE_MAX_QUEUES_PER_PORT) {
- rte_exit(EXIT_FAILURE, "Queue count exceeds limit (%d > %d)\n",
- nb_queues, RTE_MAX_QUEUES_PER_PORT);
- }
- for (int i = 0; i < nb_queues; i++)
- queue[i] = i;
-
- // 2. Get device info and check return value
- struct rte_eth_dev_info dev_info;
- int ret = rte_eth_dev_info_get(port_id, &dev_info);
- if (ret != 0) {
- rte_exit(EXIT_FAILURE, "Error during getting device (port %u) info: %s\n",
- port_id, strerror(-ret));
- }
- // 3. RSS config - key: set inner hash
- struct rte_flow_action_rss rss = {
- .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
- .level = 2, // inner encapsulation layer RSS - hash based on inner protocol
- .types = RTE_ETH_RSS_NONFRAG_IPV4_TCP, // inner IPv4+TCP hash
- .key_len = rsskey_len,
- .key = rsskey,
- .queue_num = nb_queues,
- .queue = queue,
- };
- // 4. Hardware capability check and fallback handling
- if (!(dev_info.flow_type_rss_offloads & RTE_ETH_RSS_NONFRAG_IPV4_TCP)) {
- // printf("warning: inner TCP RSS not supported, fallback to outer RSS\n");
- fprintf(stderr, "Fallback handling!!!\n");
- printf("I'm three,Warning: inner TCP RSS is not supported, falling back to outer RSS.\n");
- rss.level = 0; // fallback to outer RSS
- rss.types = RTE_ETH_FLOW_IPV4; // update to outer protocol type
- }
-
- // 5. Outer IPv4 matches IPIP protocol
- struct rte_flow_item_ipv4 outer_ipv4_spec = {
- .hdr = {
- .next_proto_id = IPPROTO_IPIP
- }
- };
- struct rte_flow_item_ipv4 outer_ipv4_mask = {
- .hdr = {
- .next_proto_id = 0xFF
- }
- };
-
- // 6. Pattern chain definition - match inner TCP to enable inner RSS
- struct rte_flow_item pattern[] = {
- // Outer Ethernet header (wildcard)
- {
- .type = RTE_FLOW_ITEM_TYPE_ETH,
- .spec = NULL,
- .mask = NULL
- },
- // Outer IPv4 header (match only IPIP protocol)
- {
- .type = RTE_FLOW_ITEM_TYPE_IPV4,
- .spec = &outer_ipv4_spec,
- .mask = &outer_ipv4_mask
- },
- // Inner IPv4 header (wildcard, RSS hashes based on this layer)
- {
- .type = RTE_FLOW_ITEM_TYPE_IPV4,
- .spec = NULL,
- .mask = NULL
- },
- // Inner TCP header (wildcard, RSS hashes based on this layer)
- {
- .type = RTE_FLOW_ITEM_TYPE_TCP,
- .spec = NULL,
- .mask = NULL
- },
- {
- .type = RTE_FLOW_ITEM_TYPE_END
- }
- };
-
- // 7. Action configuration
- struct rte_flow_action action[] = {
- {
- .type = RTE_FLOW_ACTION_TYPE_RSS,
- .conf = &rss
- },
- {
- .type = RTE_FLOW_ACTION_TYPE_END
- }
- };
-
- // 8. Validate and create flow rule
- struct rte_flow_error error;
- struct rte_flow *flow = NULL;
-
- if (!rte_flow_validate(port_id, &attr, pattern, action, &error)) {
- flow = rte_flow_create(port_id, &attr, pattern, action, &error);
- if (!flow) {
- fprintf(stderr, "Flow rule creation failed: %s\n", error.message);
- return -error.type;
- }
- } else {
- fprintf(stderr, "Flow rule validation failed: %s\n", error.message);
- return -error.type;
- }
- fprintf(stderr, "IPIP flow rule created successfully (port %d, RSS level=%d)\n", port_id, rss.level);
- printf("IPIP flow rule created successfully (port %d, RSS level=%d)\n", port_id, rss.level);
- return 0;
-}
-#endif
-
-#ifdef FF_FDIR
-/*
- * Flow director allows the traffic to specific port to be processed on the
- * specific queue. Unlike FF_FLOW_ISOLATE, the FF_FDIR implementation uses
- * general flow rule so that most FDIR supported NIC will support. The best
- * using case of FDIR is (but not limited to), using multiple processes to
- * listen on different ports.
- *
- * This function can be called either in FSTACK or in end-application.
- *
- * Example:
- * Given 2 fstack instances A and B. Instance A listens on port 80, and
- * instance B listens on port 81. We want to process the traffic to port 80
- * on rx queue 0, and the traffic to port 81 on rx queue 1.
- * // port 80 rx queue 0
- * ret = fdir_add_tcp_flow(port_id, 0, FF_FLOW_INGRESS, 0, 80);
- * // port 81 rx queue 1
- * ret = fdir_add_tcp_flow(port_id, 1, FF_FLOW_INGRESS, 0, 81);
- */
-#define FF_FLOW_EGRESS 1
-#define FF_FLOW_INGRESS 2
-/**
- * Create a flow rule that moves packets with matching src and dest tcp port
- * to the target queue.
- *
- * This function uses general flow rules and doesn't rely on the flow_isolation
- * that not all the FDIR capable NIC support.
- *
- * @param port_id
- * The selected port.
- * @param queue
- * The target queue.
- * @param dir
- * The direction of the traffic.
- * 1 for egress, 2 for ingress and sum(1+2) for both.
- * @param tcp_sport
- * The src tcp port to match.
- * @param tcp_dport
- * The dest tcp port to match.
- *
- */
-static int
-fdir_add_tcp_flow(uint16_t port_id, uint16_t queue, uint16_t dir,
- uint16_t tcp_sport, uint16_t tcp_dport)
-{
- struct rte_flow_attr attr;
- struct rte_flow_item flow_pattern[4];
- struct rte_flow_action flow_action[2];
- struct rte_flow *flow = NULL;
- struct rte_flow_action_queue flow_action_queue = { .index = queue };
- struct rte_flow_item_tcp tcp_spec;
- struct rte_flow_item_tcp tcp_mask;
- struct rte_flow_error rfe;
- int res;
-
- memset(flow_pattern, 0, sizeof(flow_pattern));
- memset(flow_action, 0, sizeof(flow_action));
-
- /*
- * set the rule attribute.
- */
- memset(&attr, 0, sizeof(struct rte_flow_attr));
- attr.ingress = ((dir & FF_FLOW_INGRESS) > 0);
- attr.egress = ((dir & FF_FLOW_EGRESS) > 0);
-
- /*
- * create the action sequence.
- * one action only, move packet to queue
- */
- flow_action[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
- flow_action[0].conf = &flow_action_queue;
- flow_action[1].type = RTE_FLOW_ACTION_TYPE_END;
-
- flow_pattern[0].type = RTE_FLOW_ITEM_TYPE_ETH;
- flow_pattern[1].type = RTE_FLOW_ITEM_TYPE_IPV4;
-
- /*
- * set the third level of the pattern (TCP).
- */
- memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
- memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
- tcp_spec.hdr.src_port = htons(tcp_sport);
- tcp_mask.hdr.src_port = (tcp_sport == 0 ? 0: 0xffff);
- tcp_spec.hdr.dst_port = htons(tcp_dport);
- tcp_mask.hdr.dst_port = (tcp_dport == 0 ? 0: 0xffff);
- flow_pattern[2].type = RTE_FLOW_ITEM_TYPE_TCP;
- flow_pattern[2].spec = &tcp_spec;
- flow_pattern[2].mask = &tcp_mask;
-
- flow_pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
-
- res = rte_flow_validate(port_id, &attr, flow_pattern, flow_action, &rfe);
- if (res)
- return (1);
-
- flow = rte_flow_create(port_id, &attr, flow_pattern, flow_action, &rfe);
- if (!flow)
- return port_flow_complain(&rfe);
-
- return (0);
-}
-
-#endif
-
-int
-ff_dpdk_init(int argc, char **argv)
-{
- if (ff_global_cfg.dpdk.nb_procs < 1 ||
- ff_global_cfg.dpdk.nb_procs > RTE_MAX_LCORE ||
- ff_global_cfg.dpdk.proc_id >= ff_global_cfg.dpdk.nb_procs ||
- ff_global_cfg.dpdk.proc_id < 0) {
- printf("param num_procs[%d] or proc_id[%d] error!\n",
- ff_global_cfg.dpdk.nb_procs,
- ff_global_cfg.dpdk.proc_id);
- exit(1);
- }
-
- int ret = rte_eal_init(argc, argv);
- if (ret < 0) {
- rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
- }
-
- numa_on = ff_global_cfg.dpdk.numa_on;
-
- idle_sleep = ff_global_cfg.dpdk.idle_sleep;
- pkt_tx_delay = ff_global_cfg.dpdk.pkt_tx_delay > BURST_TX_DRAIN_US ? \
- BURST_TX_DRAIN_US : ff_global_cfg.dpdk.pkt_tx_delay;
-
- init_lcore_conf();
-
- init_mem_pool();
-
- init_dispatch_ring();
-
- init_msg_ring();
-
-#ifdef FF_KNI
- enable_kni = ff_global_cfg.kni.enable;
- if (enable_kni) {
- init_kni();
- }
-#endif
-
-#ifdef FF_USE_PAGE_ARRAY
- ff_mmap_init();
-#endif
-
-#ifdef FF_FLOW_ISOLATE
- // run once in primary process
- if (rte_eal_process_type() == RTE_PROC_PRIMARY){
- ret = port_flow_isolate(0, 1);
- if (ret < 0)
- rte_exit(EXIT_FAILURE, "init_port_isolate failed\n");
- }
-#endif
-
- ret = init_port_start();
- if (ret < 0) {
- rte_exit(EXIT_FAILURE, "init_port_start failed\n");
- }
-
- init_clock();
-#ifdef FF_FLOW_ISOLATE
- //Only give a example usage: port_id=0, tcp_port= 80.
- //Recommend:
- //1. init_flow should replace `set_rss_table` in `init_port_start` loop, This can set all NIC's port_id_list instead only 0 device(port_id).
- //2. using config options `tcp_port` replace magic number of 80
- ret = init_flow(0, 80);
- if (ret < 0) {
- rte_exit(EXIT_FAILURE, "init_port_flow failed\n");
- }
-#endif
-
-#ifdef FF_FLOW_IPIP
- // create ipip flow for port 0
- if (rte_eal_process_type() == RTE_PROC_PRIMARY){
- ret = create_ipip_flow(0);
- if (ret != 0) {
- rte_exit(EXIT_FAILURE, "create_ipip_flow failed\n");
- }
- }
-#endif
-
-#ifdef FF_FDIR
- /*
- * Refer function header section for usage.
- */
- ret = fdir_add_tcp_flow(0, 0, FF_FLOW_INGRESS, 0, 80);
- if (ret)
- rte_exit(EXIT_FAILURE, "fdir_add_tcp_flow failed\n");
-#endif
-
- return 0;
-}
-
-static void
-ff_veth_input(const struct ff_dpdk_if_context *ctx, struct rte_mbuf *pkt)
-{
- uint8_t rx_csum = ctx->hw_features.rx_csum;
- if (rx_csum) {
- if (pkt->ol_flags & (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD)) {
- rte_pktmbuf_free(pkt);
- return;
- }
- }
-
- void *data = rte_pktmbuf_mtod(pkt, void*);
- uint16_t len = rte_pktmbuf_data_len(pkt);
-
- void *hdr = ff_mbuf_gethdr(pkt, pkt->pkt_len, data, len, rx_csum);
- if (hdr == NULL) {
- rte_pktmbuf_free(pkt);
- return;
- }
-
- if (pkt->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) {
- ff_mbuf_set_vlan_info(hdr, pkt->vlan_tci);
- }
-
- struct rte_mbuf *pn = pkt->next;
- void *prev = hdr;
- while(pn != NULL) {
- data = rte_pktmbuf_mtod(pn, void*);
- len = rte_pktmbuf_data_len(pn);
-
- void *mb = ff_mbuf_get(prev, pn, data, len);
- if (mb == NULL) {
- ff_mbuf_free(hdr);
- rte_pktmbuf_free(pkt);
- return;
- }
- pn = pn->next;
- prev = mb;
- }
-
- ff_veth_process_packet(ctx->ifp, hdr);
-}
-
-static enum FilterReturn
-protocol_filter(const void *data, uint16_t len)
-{
- if(len < RTE_ETHER_ADDR_LEN)
- return FILTER_UNKNOWN;
-
- const struct rte_ether_hdr *hdr;
- const struct rte_vlan_hdr *vlanhdr;
- hdr = (const struct rte_ether_hdr *)data;
- uint16_t ether_type = rte_be_to_cpu_16(hdr->ether_type);
- data += RTE_ETHER_HDR_LEN;
- len -= RTE_ETHER_HDR_LEN;
-
- if (ether_type == RTE_ETHER_TYPE_VLAN) {
- vlanhdr = (struct rte_vlan_hdr *)data;
- ether_type = rte_be_to_cpu_16(vlanhdr->eth_proto);
- data += sizeof(struct rte_vlan_hdr);
- len -= sizeof(struct rte_vlan_hdr);
- }
-
- if(ether_type == RTE_ETHER_TYPE_ARP) {
- return FILTER_ARP;
- }
-
- /* Multicast protocol, such as stp(used by zebra), is forwarded to kni and has a separate speed limit */
- if (rte_is_multicast_ether_addr(&hdr->dst_addr)) {
- return FILTER_MULTI;
- }
-
-#if (!defined(__FreeBSD__) && defined(INET6) ) || \
- ( defined(__FreeBSD__) && defined(INET6) && defined(FF_KNI))
- if (ether_type == RTE_ETHER_TYPE_IPV6) {
- return ff_kni_proto_filter(data,
- len, ether_type);
- }
-#endif
-
-#ifndef FF_KNI
- return FILTER_UNKNOWN;
-#else
- if (!enable_kni) {
- return FILTER_UNKNOWN;
- }
-
- if(ether_type != RTE_ETHER_TYPE_IPV4)
- return FILTER_UNKNOWN;
-
- return ff_kni_proto_filter(data,
- len, ether_type);
-#endif
-}
-
-static inline void
-pktmbuf_deep_attach(struct rte_mbuf *mi, const struct rte_mbuf *m)
-{
- struct rte_mbuf *md;
- void *src, *dst;
-
- dst = rte_pktmbuf_mtod(mi, void *);
- src = rte_pktmbuf_mtod(m, void *);
-
- mi->data_len = m->data_len;
- rte_memcpy(dst, src, m->data_len);
-
- mi->port = m->port;
- mi->vlan_tci = m->vlan_tci;
- mi->vlan_tci_outer = m->vlan_tci_outer;
- mi->tx_offload = m->tx_offload;
- mi->hash = m->hash;
- mi->ol_flags = m->ol_flags;
- mi->packet_type = m->packet_type;
-}
-
-/* copied from rte_pktmbuf_clone */
-static inline struct rte_mbuf *
-pktmbuf_deep_clone(const struct rte_mbuf *md,
- struct rte_mempool *mp)
-{
- struct rte_mbuf *mc, *mi, **prev;
- uint32_t pktlen;
- uint8_t nseg;
-
- if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL))
- return NULL;
-
- mi = mc;
- prev = &mi->next;
- pktlen = md->pkt_len;
- nseg = 0;
-
- do {
- nseg++;
- pktmbuf_deep_attach(mi, md);
- *prev = mi;
- prev = &mi->next;
- } while ((md = md->next) != NULL &&
- (mi = rte_pktmbuf_alloc(mp)) != NULL);
-
- *prev = NULL;
- mc->nb_segs = nseg;
- mc->pkt_len = pktlen;
-
- /* Allocation of new indirect segment failed */
- if (unlikely (mi == NULL)) {
- rte_pktmbuf_free(mc);
- return NULL;
- }
-
- __rte_mbuf_sanity_check(mc, 1);
- return mc;
-}
-
-static inline void
-ff_add_vlan_tag(struct rte_mbuf * rtem)
-{
- void *data = NULL;
-
- if (rtem->ol_flags & RTE_MBUF_F_RX_VLAN_STRIPPED) {
- data = rte_pktmbuf_prepend(rtem, sizeof(struct rte_vlan_hdr));
- if (data != NULL) {
- memmove(data, data + sizeof(struct rte_vlan_hdr), RTE_ETHER_HDR_LEN);
- struct rte_ether_hdr *etherhdr = (struct rte_ether_hdr *)data;
- struct rte_vlan_hdr *vlanhdr = (struct rte_vlan_hdr *)(data + RTE_ETHER_HDR_LEN);
- vlanhdr->vlan_tci = rte_cpu_to_be_16(rtem->vlan_tci);
- vlanhdr->eth_proto = etherhdr->ether_type;
- etherhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN);
- }
- }
-}
-
-static inline void
-process_packets(uint16_t port_id, uint16_t queue_id, struct rte_mbuf **bufs,
- uint16_t count, const struct ff_dpdk_if_context *ctx, int pkts_from_ring)
-{
- struct lcore_conf *qconf = &lcore_conf;
- uint16_t nb_queues = qconf->nb_queue_list[port_id];
-
- uint16_t i;
- for (i = 0; i < count; i++) {
- struct rte_mbuf *rtem = bufs[i];
-
- if (unlikely( ff_global_cfg.pcap.enable)) {
- if (!pkts_from_ring) {
- ff_dump_packets( ff_global_cfg.pcap.save_path, rtem, ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len);
- }
- }
-
- void *data = rte_pktmbuf_mtod(rtem, void*);
- uint16_t len = rte_pktmbuf_data_len(rtem);
-
- if (!pkts_from_ring) {
- ff_traffic.rx_packets += rtem->nb_segs;
- ff_traffic.rx_bytes += rte_pktmbuf_pkt_len(rtem);
- }
-
- if (!pkts_from_ring && packet_dispatcher) {
- uint64_t cur_tsc = rte_rdtsc();
- int ret = (*packet_dispatcher)(data, &len, queue_id, nb_queues);
- usr_cb_tsc += rte_rdtsc() - cur_tsc;
- if (ret == FF_DISPATCH_RESPONSE) {
- rte_pktmbuf_pkt_len(rtem) = rte_pktmbuf_data_len(rtem) = len;
- /*
- * We have not support vlan out strip
- */
- ff_add_vlan_tag(rtem);
- send_single_packet(rtem, port_id);
- continue;
- }
-
- if (ret == FF_DISPATCH_ERROR || ret >= nb_queues) {
- //ff_traffic.rx_dropped += rtem->nb_segs; /* Not counted as packet drop */
- rte_pktmbuf_free(rtem);
- continue;
- }
-
- if (ret != queue_id) {
- ret = rte_ring_enqueue(dispatch_ring[port_id][ret], rtem);
- if (ret < 0) {
- ff_traffic.rx_dropped += rtem->nb_segs;
- rte_pktmbuf_free(rtem);
- }
-
- continue;
- }
- }
-
- enum FilterReturn filter = protocol_filter(data, len);
-#ifdef INET6
- if (filter == FILTER_ARP || filter == FILTER_NDP) {
-#else
- if (filter == FILTER_ARP) {
-#endif
- struct rte_mempool *mbuf_pool;
- struct rte_mbuf *mbuf_clone;
- if (!pkts_from_ring) {
- uint16_t j;
- for(j = 0; j < nb_queues; ++j) {
- if(j == queue_id)
- continue;
-
- unsigned socket_id = 0;
- if (numa_on) {
- uint16_t lcore_id = qconf->port_cfgs[port_id].lcore_list[j];
- socket_id = rte_lcore_to_socket_id(lcore_id);
- }
- mbuf_pool = pktmbuf_pool[socket_id];
- mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool);
- if(mbuf_clone) {
- int ret = rte_ring_enqueue(dispatch_ring[port_id][j],
- mbuf_clone);
- if (ret < 0) {
- ff_traffic.rx_dropped += mbuf_clone->nb_segs;
- rte_pktmbuf_free(mbuf_clone);
- }
- }
- }
- }
-
-#ifdef FF_KNI
- if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
- mbuf_pool = pktmbuf_pool[qconf->socket_id];
- mbuf_clone = pktmbuf_deep_clone(rtem, mbuf_pool);
- if(mbuf_clone) {
- ff_add_vlan_tag(mbuf_clone);
- ff_kni_enqueue(filter, port_id, mbuf_clone);
- }
- }
-#endif
- ff_veth_input(ctx, rtem);
-#ifdef FF_KNI
- } else if (enable_kni) {
- if (knictl_action == FF_KNICTL_ACTION_ALL_TO_KNI){
- ff_add_vlan_tag(rtem);
- ff_kni_enqueue(filter, port_id, rtem);
- } else if (knictl_action == FF_KNICTL_ACTION_ALL_TO_FF){
- ff_veth_input(ctx, rtem);
- } else if (knictl_action == FF_KNICTL_ACTION_DEFAULT){
- if (enable_kni &&
- ((filter == FILTER_KNI && kni_accept) ||
- ((filter == FILTER_UNKNOWN || filter >= FILTER_OSPF) && !kni_accept)) ) {
- ff_add_vlan_tag(rtem);
- ff_kni_enqueue(filter, port_id, rtem);
- } else {
- ff_veth_input(ctx, rtem);
- }
- } else {
- ff_veth_input(ctx, rtem);
- }
-#endif
- } else {
- ff_veth_input(ctx, rtem);
- }
- }
-}
-
-static inline int
-process_dispatch_ring(uint16_t port_id, uint16_t queue_id,
- struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx)
-{
- /* read packet from ring buf and to process */
- uint16_t nb_rb;
- nb_rb = rte_ring_dequeue_burst(dispatch_ring[port_id][queue_id],
- (void **)pkts_burst, MAX_PKT_BURST, NULL);
-
- if(nb_rb > 0) {
- process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1);
- }
-
- return nb_rb;
-}
-
-static inline void
-handle_sysctl_msg(struct ff_msg *msg)
-{
- int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen,
- msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new,
- msg->sysctl.newlen);
-
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-
-static inline void
-handle_ioctl_msg(struct ff_msg *msg)
-{
- int fd, ret;
-#ifdef INET6
- if (msg->msg_type == FF_IOCTL6) {
- fd = ff_socket(AF_INET6, SOCK_DGRAM, 0);
- } else
-#endif
- fd = ff_socket(AF_INET, SOCK_DGRAM, 0);
-
- if (fd < 0) {
- ret = -1;
- goto done;
- }
-
- ret = ff_ioctl_freebsd(fd, msg->ioctl.cmd, msg->ioctl.data);
-
- ff_close(fd);
-
-done:
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-
-static inline void
-handle_route_msg(struct ff_msg *msg)
-{
- int ret = ff_rtioctl(msg->route.fib, msg->route.data,
- &msg->route.len, msg->route.maxlen);
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-
-static inline void
-handle_top_msg(struct ff_msg *msg)
-{
- msg->top = ff_top_status;
- msg->result = 0;
-}
-
-#ifdef FF_NETGRAPH
-static inline void
-handle_ngctl_msg(struct ff_msg *msg)
-{
- int ret = ff_ngctl(msg->ngctl.cmd, msg->ngctl.data);
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- msg->ngctl.ret = ret;
- }
-}
-#endif
-
-#ifdef FF_IPFW
-static inline void
-handle_ipfw_msg(struct ff_msg *msg)
-{
- int fd, ret;
- fd = ff_socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
- if (fd < 0) {
- ret = -1;
- goto done;
- }
-
- switch (msg->ipfw.cmd) {
- case FF_IPFW_GET:
- ret = ff_getsockopt_freebsd(fd, msg->ipfw.level,
- msg->ipfw.optname, msg->ipfw.optval,
- msg->ipfw.optlen);
- break;
- case FF_IPFW_SET:
- ret = ff_setsockopt_freebsd(fd, msg->ipfw.level,
- msg->ipfw.optname, msg->ipfw.optval,
- *(msg->ipfw.optlen));
- break;
- default:
- ret = -1;
- errno = ENOTSUP;
- break;
- }
-
- ff_close(fd);
-
-done:
- if (ret < 0) {
- msg->result = errno;
- } else {
- msg->result = 0;
- }
-}
-#endif
-
-static inline void
-handle_traffic_msg(struct ff_msg *msg)
-{
- msg->traffic = ff_traffic;
- msg->result = 0;
-}
-
-void ff_get_traffic(void *buffer)
-{
- *(struct ff_traffic_args *)buffer = ff_traffic;
-}
-
-#ifdef FF_KNI
-static inline void
-handle_knictl_msg(struct ff_msg *msg)
-{
- if (msg->knictl.kni_cmd == FF_KNICTL_CMD_SET){
- switch (msg->knictl.kni_action){
- case FF_KNICTL_ACTION_ALL_TO_FF: knictl_action = FF_KNICTL_ACTION_ALL_TO_FF; msg->result = 0; printf("new kni action: alltoff\n"); break;
- case FF_KNICTL_ACTION_ALL_TO_KNI: knictl_action = FF_KNICTL_ACTION_ALL_TO_KNI; msg->result = 0; printf("new kni action: alltokni\n"); break;
- case FF_KNICTL_ACTION_DEFAULT: knictl_action = FF_KNICTL_ACTION_DEFAULT; msg->result = 0; printf("new kni action: default\n"); break;
- default: msg->result = -1;
- }
- }
- else if (msg->knictl.kni_cmd == FF_KNICTL_CMD_GET){
- msg->knictl.kni_action = knictl_action;
- } else {
- msg->result = -2;
- }
-}
-#endif
-
-static inline void
-handle_default_msg(struct ff_msg *msg)
-{
- msg->result = ENOTSUP;
-}
-
-static inline void
-handle_msg(struct ff_msg *msg, uint16_t proc_id)
-{
- switch (msg->msg_type) {
- case FF_SYSCTL:
- handle_sysctl_msg(msg);
- break;
- case FF_IOCTL:
-#ifdef INET6
- case FF_IOCTL6:
-#endif
- handle_ioctl_msg(msg);
- break;
- case FF_ROUTE:
- handle_route_msg(msg);
- break;
- case FF_TOP:
- handle_top_msg(msg);
- break;
-#ifdef FF_NETGRAPH
- case FF_NGCTL:
- handle_ngctl_msg(msg);
- break;
-#endif
-#ifdef FF_IPFW
- case FF_IPFW_CTL:
- handle_ipfw_msg(msg);
- break;
-#endif
- case FF_TRAFFIC:
- handle_traffic_msg(msg);
- break;
-#ifdef FF_KNI
- case FF_KNICTL:
- handle_knictl_msg(msg);
- break;
-#endif
- default:
- handle_default_msg(msg);
- break;
- }
- if (rte_ring_enqueue(msg_ring[proc_id].ring[msg->msg_type], msg) < 0) {
- if (msg->original_buf) {
- rte_free(msg->buf_addr);
- msg->buf_addr = msg->original_buf;
- msg->buf_len = msg->original_buf_len;
- msg->original_buf = NULL;
- }
-
- rte_mempool_put(message_pool, msg);
- }
-}
-
-static inline int
-process_msg_ring(uint16_t proc_id, struct rte_mbuf **pkts_burst)
-{
- /* read msg from ring buf and to process */
- uint16_t nb_rb;
- int i;
-
- nb_rb = rte_ring_dequeue_burst(msg_ring[proc_id].ring[0],
- (void **)pkts_burst, MAX_PKT_BURST, NULL);
-
- if (likely(nb_rb == 0))
- return 0;
-
- for (i = 0; i < nb_rb; ++i) {
- handle_msg((struct ff_msg *)pkts_burst[i], proc_id);
- }
-
- return 0;
-}
-
-/* Send burst of packets on an output interface */
-static inline int
-send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
-{
- struct rte_mbuf **m_table;
- int ret;
- uint16_t queueid;
-
- queueid = qconf->tx_queue_id[port];
- m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
-
- if (unlikely(ff_global_cfg.pcap.enable)) {
- uint16_t i;
- for (i = 0; i < n; i++) {
- ff_dump_packets( ff_global_cfg.pcap.save_path, m_table[i],
- ff_global_cfg.pcap.snap_len, ff_global_cfg.pcap.save_len);
- }
- }
-
- ret = rte_eth_tx_burst(port, queueid, m_table, n);
- uint16_t i;
- for (i = 0; i < ret; i++) {
- ff_traffic.tx_packets += m_table[i]->nb_segs; // use ret or rets' nb_segs?
- ff_traffic.tx_bytes += rte_pktmbuf_pkt_len(m_table[i]);
-#ifdef FF_USE_PAGE_ARRAY
- if (qconf->tx_mbufs[port].bsd_m_table[i])
- ff_enq_tx_bsdmbuf(port, qconf->tx_mbufs[port].bsd_m_table[i], m_table[i]->nb_segs);
-#endif
- }
- if (unlikely(ret < n)) {
- do {
- ff_traffic.tx_dropped += m_table[ret]->nb_segs;
- rte_pktmbuf_free(m_table[ret]);
-#ifdef FF_USE_PAGE_ARRAY
- if ( qconf->tx_mbufs[port].bsd_m_table[ret] )
- ff_mbuf_free(qconf->tx_mbufs[port].bsd_m_table[ret]);
-#endif
- } while (++ret < n);
- }
- return 0;
-}
-
-/* Enqueue a single packet, and send burst if queue is filled */
-static inline int
-send_single_packet(struct rte_mbuf *m, uint8_t port)
-{
- uint16_t len;
- struct lcore_conf *qconf;
-
- qconf = &lcore_conf;
- len = qconf->tx_mbufs[port].len;
- qconf->tx_mbufs[port].m_table[len] = m;
- len++;
-
- /* enough pkts to be sent */
- if (unlikely(len == MAX_PKT_BURST)) {
- send_burst(qconf, MAX_PKT_BURST, port);
- len = 0;
- }
-
- qconf->tx_mbufs[port].len = len;
- return 0;
-}
-
-int
-ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m,
- int total)
-{
-#ifdef FF_USE_PAGE_ARRAY
- struct lcore_conf *qconf = &lcore_conf;
- int len = 0;
-
- len = ff_if_send_onepkt(ctx, m,total);
- if (unlikely(len == MAX_PKT_BURST)) {
- send_burst(qconf, MAX_PKT_BURST, ctx->port_id);
- len = 0;
- }
- qconf->tx_mbufs[ctx->port_id].len = len;
- return 0;
-#endif
- struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id];
- struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool);
- if (head == NULL) {
- ff_traffic.tx_dropped++;
- ff_mbuf_free(m);
- return -1;
- }
-
- head->pkt_len = total;
- head->nb_segs = 0;
-
- int off = 0;
- struct rte_mbuf *cur = head, *prev = NULL;
- while(total > 0) {
- if (cur == NULL) {
- cur = rte_pktmbuf_alloc(mbuf_pool);
- if (cur == NULL) {
- ff_traffic.tx_dropped += head->nb_segs + 1;
- rte_pktmbuf_free(head);
- ff_mbuf_free(m);
- return -1;
- }
- }
-
- if (prev != NULL) {
- prev->next = cur;
- }
- head->nb_segs++;
-
- prev = cur;
- void *data = rte_pktmbuf_mtod(cur, void*);
- int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total;
- int ret = ff_mbuf_copydata(m, data, off, len);
- if (ret < 0) {
- ff_traffic.tx_dropped += head->nb_segs;
- rte_pktmbuf_free(head);
- ff_mbuf_free(m);
- return -1;
- }
-
-
- cur->data_len = len;
- off += len;
- total -= len;
- cur = NULL;
- }
-
- struct ff_tx_offload offload = {0};
- ff_mbuf_tx_offload(m, &offload);
-
- void *data = rte_pktmbuf_mtod(head, void*);
-
- if (offload.ip_csum) {
- /* ipv6 not supported yet */
- struct rte_ipv4_hdr *iph;
- int iph_len;
- iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN);
- iph_len = (iph->version_ihl & 0x0f) << 2;
-
- head->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_IPV4;
- head->l2_len = RTE_ETHER_HDR_LEN;
- head->l3_len = iph_len;
- }
-
- if (ctx->hw_features.tx_csum_l4) {
- struct rte_ipv4_hdr *iph;
- int iph_len;
- iph = (struct rte_ipv4_hdr *)(data + RTE_ETHER_HDR_LEN);
- iph_len = (iph->version_ihl & 0x0f) << 2;
-
- if (iph->version == 4) {
- head->ol_flags |= RTE_MBUF_F_TX_IPV4;
- } else {
- head->ol_flags |= RTE_MBUF_F_TX_IPV6;
- }
-
- if (offload.tcp_csum) {
- head->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
- head->l2_len = RTE_ETHER_HDR_LEN;
- head->l3_len = iph_len;
- }
-
- /*
- * TCP segmentation offload.
- *
- * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag
- * implies PKT_TX_TCP_CKSUM)
- * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
- * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and
- * write the IP checksum to 0 in the packet
- * - fill the mbuf offload information: l2_len,
- * l3_len, l4_len, tso_segsz
- * - calculate the pseudo header checksum without taking ip_len
- * in account, and set it in the TCP header. Refer to
- * rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum() that can be
- * used as helpers.
- */
- if (offload.tso_seg_size) {
- struct rte_tcp_hdr *tcph;
- int tcph_len;
- tcph = (struct rte_tcp_hdr *)((char *)iph + iph_len);
- tcph_len = (tcph->data_off & 0xf0) >> 2;
- tcph->cksum = rte_ipv4_phdr_cksum(iph, RTE_MBUF_F_TX_TCP_SEG);
-
- head->ol_flags |= RTE_MBUF_F_TX_TCP_SEG;
- head->l4_len = tcph_len;
- head->tso_segsz = offload.tso_seg_size;
- }
-
- if (offload.udp_csum) {
- head->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
- head->l2_len = RTE_ETHER_HDR_LEN;
- head->l3_len = iph_len;
- }
- }
-
- ff_mbuf_free(m);
-
- return send_single_packet(head, ctx->port_id);
-}
-
-int
-ff_dpdk_raw_packet_send(void *data, int total, uint16_t port_id)
-{
- struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id];
- struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool);
- if (head == NULL) {
- ff_traffic.tx_dropped++;
- return -1;
- }
-
- head->pkt_len = total;
- head->nb_segs = 0;
-
- int off = 0;
- struct rte_mbuf *cur = head, *prev = NULL;
- while(total > 0) {
- if (cur == NULL) {
- cur = rte_pktmbuf_alloc(mbuf_pool);
- if (cur == NULL) {
- ff_traffic.tx_dropped += head->nb_segs + 1;
- rte_pktmbuf_free(head);
- return -1;
- }
- }
-
- if (prev != NULL) {
- prev->next = cur;
- }
- head->nb_segs++;
-
- prev = cur;
- void *cur_data = rte_pktmbuf_mtod(cur, void*);
- int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total;
- memcpy(cur_data, data + off, len);
-
- cur->data_len = len;
- off += len;
- total -= len;
- cur = NULL;
- }
-
- return send_single_packet(head, port_id);
-}
-
-static int
-main_loop(void *arg)
-{
- struct loop_routine *lr = (struct loop_routine *)arg;
-
- struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
- uint64_t prev_tsc, diff_tsc, cur_tsc, usch_tsc, div_tsc, usr_tsc, sys_tsc, end_tsc, idle_sleep_tsc;
- int i, j, nb_rx, idle;
- uint16_t port_id, queue_id;
- struct lcore_conf *qconf;
- uint64_t drain_tsc = 0;
- struct ff_dpdk_if_context *ctx;
-
- if (pkt_tx_delay) {
- drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * pkt_tx_delay;
- }
-
- prev_tsc = 0;
- usch_tsc = 0;
-
- qconf = &lcore_conf;
-
- while (1) {
-
- if (unlikely(stop_loop)) {
- break;
- }
-
- cur_tsc = rte_rdtsc();
- if (unlikely(freebsd_clock.expire < cur_tsc)) {
- rte_timer_manage();
-
-#ifdef FF_KNI
- /* reset kni ratelimt */
- if (enable_kni &&
- (ff_global_cfg.kni.console_packets_ratelimit ||
- ff_global_cfg.kni.general_packets_ratelimit ||
- ff_global_cfg.kni.kernel_packets_ratelimit)) {
- static time_t last_sec = 0;
- time_t sec;
- long nsec;
-
- ff_get_current_time(&sec, &nsec);
- if (sec > last_sec) {
- if (kni_rate_limt.gerneal_packets > ff_global_cfg.kni.general_packets_ratelimit ||
- kni_rate_limt.console_packets > ff_global_cfg.kni.console_packets_ratelimit ||
- kni_rate_limt.kernel_packets > ff_global_cfg.kni.kernel_packets_ratelimit) {
- printf("kni ratelimit, general:%lu/%d, console:%lu/%d, kernel:%lu/%d, last sec:%ld, sec:%ld\n",
- kni_rate_limt.gerneal_packets, ff_global_cfg.kni.general_packets_ratelimit,
- kni_rate_limt.console_packets, ff_global_cfg.kni.console_packets_ratelimit,
- kni_rate_limt.kernel_packets, ff_global_cfg.kni.kernel_packets_ratelimit, last_sec, sec);
- }
- last_sec = sec;
- kni_rate_limt.gerneal_packets = 0;
- kni_rate_limt.console_packets = 0;
- kni_rate_limt.kernel_packets = 0;
- }
- }
-#endif
- }
-
- idle = 1;
- sys_tsc = 0;
- usr_tsc = 0;
- usr_cb_tsc = 0;
-
- /*
- * TX burst queue drain
- */
- diff_tsc = cur_tsc - prev_tsc;
- if (unlikely(diff_tsc >= drain_tsc)) {
- for (i = 0; i < qconf->nb_tx_port; i++) {
- port_id = qconf->tx_port_id[i];
- if (qconf->tx_mbufs[port_id].len == 0)
- continue;
-
- idle = 0;
-
- send_burst(qconf,
- qconf->tx_mbufs[port_id].len,
- port_id);
- qconf->tx_mbufs[port_id].len = 0;
- }
-
- prev_tsc = cur_tsc;
- }
-
- /*
- * Read packet from RX queues
- */
- for (i = 0; i < qconf->nb_rx_queue; ++i) {
- port_id = qconf->rx_queue_list[i].port_id;
- queue_id = qconf->rx_queue_list[i].queue_id;
- ctx = veth_ctx[port_id];
-
-#ifdef FF_KNI
- if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
- ff_kni_process(port_id, queue_id, pkts_burst, MAX_PKT_BURST);
- }
-#endif
-
- idle &= !process_dispatch_ring(port_id, queue_id, pkts_burst, ctx);
-
- nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst,
- MAX_PKT_BURST);
- if (nb_rx == 0)
- continue;
-
- idle = 0;
-
- /* Prefetch first packets */
- for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
- rte_prefetch0(rte_pktmbuf_mtod(
- pkts_burst[j], void *));
- }
-
- /* Prefetch and handle already prefetched packets */
- for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
- j + PREFETCH_OFFSET], void *));
- process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0);
- }
-
- /* Handle remaining prefetched packets */
- for (; j < nb_rx; j++) {
- process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0);
- }
- }
-
- process_msg_ring(qconf->proc_id, pkts_burst);
-#ifdef FF_LOOPBACK_SUPPORT
- ff_swi_net_excute();
-#endif
- div_tsc = rte_rdtsc();
-
- if (likely(lr->loop != NULL && (!idle || cur_tsc - usch_tsc >= drain_tsc))) {
- usch_tsc = cur_tsc;
- lr->loop(lr->arg);
- }
-
- idle_sleep_tsc = rte_rdtsc();
- if (likely(idle && idle_sleep)) {
- rte_delay_us_sleep(idle_sleep);
- end_tsc = rte_rdtsc();
- } else {
- end_tsc = idle_sleep_tsc;
- }
-
- usr_tsc = usr_cb_tsc;
- if (usch_tsc == cur_tsc) {
- usr_tsc += idle_sleep_tsc - div_tsc;
- }
-
- if (!idle) {
- sys_tsc = div_tsc - cur_tsc - usr_cb_tsc;
- ff_top_status.sys_tsc += sys_tsc;
- }
-
- ff_top_status.usr_tsc += usr_tsc;
- ff_top_status.work_tsc += end_tsc - cur_tsc;
- ff_top_status.idle_tsc += end_tsc - cur_tsc - usr_tsc - sys_tsc;
-
- ff_top_status.loops++;
- }
-
- return 0;
-}
-
-int
-ff_dpdk_if_up(void) {
- int i;
- struct lcore_conf *qconf = &lcore_conf;
- for (i = 0; i < qconf->nb_tx_port; i++) {
- uint16_t port_id = qconf->tx_port_id[i];
-
- struct ff_port_cfg *pconf = &qconf->port_cfgs[port_id];
- veth_ctx[port_id] = ff_veth_attach(pconf);
- if (veth_ctx[port_id] == NULL) {
- rte_exit(EXIT_FAILURE, "ff_veth_attach failed");
- }
- }
-
- return 0;
-}
-
-void
-ff_dpdk_run(loop_func_t loop, void *arg) {
- struct loop_routine *lr = rte_malloc(NULL,
- sizeof(struct loop_routine), 0);
- stop_loop = 0;
- lr->loop = loop;
- lr->arg = arg;
- rte_eal_mp_remote_launch(main_loop, lr, CALL_MAIN);
- rte_eal_mp_wait_lcore();
- rte_free(lr);
-}
-
-void
-ff_dpdk_stop(void) {
- stop_loop = 1;
-}
-
-void
-ff_dpdk_pktmbuf_free(void *m)
-{
- rte_pktmbuf_free_seg((struct rte_mbuf *)m);
-}
-
-static uint32_t
-toeplitz_hash(unsigned keylen, const uint8_t *key,
- unsigned datalen, const uint8_t *data)
-{
- uint32_t hash = 0, v;
- u_int i, b;
-
- /* XXXRW: Perhaps an assertion about key length vs. data length? */
-
- v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3];
- for (i = 0; i < datalen; i++) {
- for (b = 0; b < 8; b++) {
- if (data[i] & (1<<(7-b)))
- hash ^= v;
- v <<= 1;
- if ((i + 4) < keylen &&
- (key[i+4] & (1<<(7-b))))
- v |= 1;
- }
- }
- return (hash);
-}
-
-int
-ff_in_pcbladdr(uint16_t family, void *faddr, uint16_t fport, void *laddr)
-{
- int ret = 0;
- uint16_t fa;
-
- if (!pcblddr_fun)
- return ret;
-
- if (family == AF_INET)
- fa = AF_INET;
- else if (family == AF_INET6_FREEBSD)
- fa = AF_INET6_LINUX;
- else
- return EADDRNOTAVAIL;
-
- ret = (*pcblddr_fun)(fa, faddr, fport, laddr);
-
- return ret;
-}
-
-void
-ff_regist_pcblddr_fun(pcblddr_func_t func)
-{
- pcblddr_fun = func;
-}
-
-
-//#define FF_RSS_NUMBER_TBL8S (1 << 20) /* Need less than 1 << 21 */
-#define FF_RSS_KEY_LEN (12) /* sip, dip, sport, dport */
-
-/* Not include all paras, just for get entries */
-struct rte_hash {
- char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */
- uint32_t entries; /**< Total table entries. */
- uint32_t num_buckets; /**< Number of buckets in table. */
-};
-
-static inline uint32_t
-ff_rss_hash(const void *data, __rte_unused uint32_t data_len,
- __rte_unused uint32_t init_val)
-{
- uint32_t *hash_data = (uint32_t *)data;
- uint32_t hash = *hash_data;
-
- hash ^= *(hash_data + 1);
- hash ^= *(hash_data + 2);
-
- return hash;
-}
-
-/* Remote IP:PORT */
-#define FF_RSS_TBL_MAX_SIP (4)
-#define FF_RSS_TBL_MAX_SPORT (4)
-#define FF_RSS_TBL_MAX_SIP_MASK (FF_RSS_TBL_MAX_SIP - 1)
-#define FF_RSS_TBL_MAX_SPORT_MASK (FF_RSS_TBL_MAX_SPORT - 1)
-/* Sever/local IP:PORT */
-#define FF_RSS_TBL_MAX_DIP (4)
-#define FF_RSS_TBL_MAX_DPORT (65536)
-#define FF_RSS_TBL_MAX_DIP_MASK (FF_RSS_TBL_MAX_DIP - 1)
-#define FF_RSS_TBL_MAX_DPORT_MASK (FF_RSS_TBL_MAX_DPORT - 1)
-
-#define FF_RSS_TBL_SIP_ENTRIES (FF_RSS_TBL_MAX_SIP * FF_RSS_TBL_MAX_SPORT)
-#define FF_RSS_TBL_SIP_ENTRIES_MASK (FF_RSS_TBL_SIP_ENTRIES - 1)
-//saddr 2429495146, daddr 4273001345, sport 13568, dport 24873
-
-enum ff_rss_tbl_stat_type {
- FF_RSS_TBL_STAT_UNKNOWN = -1,
- FF_RSS_TBL_STAT_NOT_MATCH = 0,
- FF_RSS_TBL_STAT_MATCH = 1
-};
-
-enum ff_rss_tbl_init_type {
- FF_RSS_TBL_NOT_INIT = 0,
- FF_RSS_TBL_INITING = 1,
- FF_RSS_TBL_INITED = 2
-};
-enum ff_rss_tbl_init_type ff_rss_tbl_init_flag = FF_RSS_TBL_NOT_INIT;
-
-struct ff_rss_tbl_dip_type {
- uint32_t dip;
- int8_t dport_stat[FF_RSS_TBL_MAX_DPORT];
-} __rte_cache_aligned;
-
-struct ff_rss_tbl_type {
- uint32_t sip;
- uint16_t sport;
- struct ff_rss_tbl_dip_type dip_tbl[FF_RSS_TBL_MAX_DIP];
-} __rte_cache_aligned;
-static struct ff_rss_tbl_type ff_rss_tbl[FF_RSS_TBL_SIP_ENTRIES];
-
-int
-ff_rss_tbl_init(void *softc, uint32_t sip, uint32_t dip, uint16_t sport)
-{
- uint32_t ori_idx, idx, ori_dip_idx, dip_idx;
- int i, j;
-
- ff_rss_tbl_init_flag = FF_RSS_TBL_INITING;
- memset(ff_rss_tbl, 0, sizeof(ff_rss_tbl));
-
- for (i = 0; i < FF_RSS_TBL_SIP_ENTRIES; i++) {
- ori_idx = idx = (sip ^ sport) & FF_RSS_TBL_SIP_ENTRIES_MASK;
-
- /* 仅用作测试,实际应该从配置中获取,最差循环16+4次 */
- if (i != FF_RSS_TBL_SIP_ENTRIES - 1) {
- sip += i + 1;
- sport += i + 1;
- }
-
- do {
- if (ff_rss_tbl[idx].sip == INADDR_ANY) {
- break;
- }
-
- if (ff_rss_tbl[idx].sip != sip || ff_rss_tbl[idx].sport != sport) {
- idx++;
- idx &= FF_RSS_TBL_SIP_ENTRIES_MASK;
- }
- } while (idx != ori_idx);
-
- if (idx == ori_idx && ff_rss_tbl[idx].sip != INADDR_ANY) {
- return -1;
- }
-
- ori_dip_idx = dip_idx = dip & FF_RSS_TBL_MAX_DIP_MASK;
-
- /* 仅用作测试,实际应该从配置中获取,最差循环16+4次 */
- if (i != FF_RSS_TBL_SIP_ENTRIES - 1) {
- dip += i + 1;
- }
-
- do {
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip == INADDR_ANY) {
- break;
- }
-
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip != dip) {
- dip_idx++;
- dip_idx &= FF_RSS_TBL_MAX_DIP_MASK;
- }
- } while (dip_idx != ori_dip_idx);
-
- if (dip_idx == ori_dip_idx && ff_rss_tbl[idx].dip_tbl[dip_idx].dip != INADDR_ANY) {
- return -1;
- }
-
- for (j = 0; j < FF_RSS_TBL_MAX_DPORT; j++) {
- ff_rss_tbl[idx].dip_tbl[dip_idx].dport_stat[j] = ff_rss_check(softc, sip, dip, sport, j);
- }
-
- ff_rss_tbl[idx].sip = sip;
- ff_rss_tbl[idx].sport = sport;
- ff_rss_tbl[idx].dip_tbl[dip_idx].dip = dip;
-
- /* 仅用作测试时跳出,实际应该根据配置来 */
- //break;
- }
-
- ff_rss_tbl_init_flag = FF_RSS_TBL_INITED;
-
- return 0;
-
-}
-
-int
-ff_rss_tbl_get(uint32_t sip, uint32_t dip, uint16_t sport, uint16_t dport)
-{
- uint32_t ori_idx, idx, ori_dip_idx, dip_idx;
- int i;
-
- ori_idx = idx = (sip ^ sport) & FF_RSS_TBL_SIP_ENTRIES_MASK;
- do {
- /* If not inited, no need to continue check */
- if (ff_rss_tbl[idx].sip == INADDR_ANY) {
- return -1;
- }
-
- if (ff_rss_tbl[idx].sip == sip && ff_rss_tbl[idx].sport == sport) {
- ori_dip_idx = dip_idx = dip & FF_RSS_TBL_MAX_DIP_MASK;
- do {
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip == INADDR_ANY) {
- return -1;
- }
-
- if (ff_rss_tbl[idx].dip_tbl[dip_idx].dip == dip) {
- return ff_rss_tbl[idx].dip_tbl[dip_idx].dport_stat[dport];
- }
-
- dip_idx++;
- dip_idx &= FF_RSS_TBL_MAX_DIP_MASK;
- } while (dip_idx != ori_dip_idx);
-
- if (dip_idx == ori_dip_idx) {
- return -1;
- }
- }
-
- idx++;
- idx &= FF_RSS_TBL_SIP_ENTRIES_MASK;
- } while (idx != ori_idx);
-
- if (idx == ori_idx) {
- return -1;
- }
-
- return -1;
-}
-
-int
-ff_rss_check(void *softc, uint32_t saddr, uint32_t daddr,
- uint16_t sport, uint16_t dport)
-{
- struct lcore_conf *qconf = &lcore_conf;
- struct ff_dpdk_if_context *ctx = ff_veth_softc_to_hostc(softc);
- uint16_t nb_queues = qconf->nb_queue_list[ctx->port_id];
- uint16_t queueid;
-
- int stat;
- int ret;
- uint64_t prev_tsc, cur_tsc;
-
-
- if (nb_queues <= 1) {
- return 1;
- }
-
- queueid = qconf->tx_queue_id[ctx->port_id];
-
- if (ff_rss_tbl_init_flag == FF_RSS_TBL_NOT_INIT) {
- prev_tsc = rte_rdtsc();
- ff_rss_tbl_init(softc, saddr, daddr, sport);
- cur_tsc = rte_rdtsc();
- printf("Init rss tbl success, diff_tsc %lu, port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- cur_tsc - prev_tsc, ctx->port_id, queueid,
- saddr, daddr, sport, dport);
- }
-
- uint8_t data[sizeof(saddr) + sizeof(daddr) + sizeof(sport) +
- sizeof(dport)];
- unsigned datalen = 0;
-
- bcopy(&saddr, &data[datalen], sizeof(saddr));
- datalen += sizeof(saddr);
-
- bcopy(&daddr, &data[datalen], sizeof(daddr));
- datalen += sizeof(daddr);
-
- bcopy(&sport, &data[datalen], sizeof(sport));
- datalen += sizeof(sport);
-
- bcopy(&dport, &data[datalen], sizeof(dport));
- datalen += sizeof(dport);
-
- if (ff_rss_tbl_init_flag == FF_RSS_TBL_INITED) {
- uint32_t idx = 0;
- uint64_t hash_val;
-
- prev_tsc = rte_rdtsc();
- ret = ff_rss_tbl_get(saddr, daddr, sport, dport);
- cur_tsc = rte_rdtsc();
- if (ret >= 0) {
- stat = ret;
- printf("Get rss tbl success, diff_tsc %lu, stat %d, port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- cur_tsc - prev_tsc, stat, ctx->port_id, queueid,
- saddr, daddr, sport, dport);
- return stat;
- } else {
- // do nothing
- printf("Get rss tbl failed %d, diff_tsc %lu, fall back to toeplitz_hash,"
- " port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- ret, cur_tsc - prev_tsc, ctx->port_id, queueid,
- saddr, daddr, sport, dport);
- }
- }
-
- uint16_t reta_size = rss_reta_size[ctx->port_id];
- uint32_t hash = 0;
- prev_tsc = rte_rdtsc();
- hash = toeplitz_hash(rsskey_len, rsskey, datalen, data);
- stat = ((hash & (reta_size - 1)) % nb_queues) == queueid;
- cur_tsc = rte_rdtsc();
- /*printf("toeplitz_hash diff tsc %lu, stat %d, port %u, queue %u,"
- " saddr %u, daddr %u, sport %u, dport %u\n",
- cur_tsc - prev_tsc, stat, ctx->port_id, queueid,
- saddr, daddr, sport, dport);*/
-
- return stat;
-}
-
-void
-ff_regist_packet_dispatcher(dispatch_func_t func)
-{
- packet_dispatcher = func;
-}
-
-uint64_t
-ff_get_tsc_ns()
-{
- uint64_t cur_tsc = rte_rdtsc();
- uint64_t hz = rte_get_tsc_hz();
- return ((double)cur_tsc/(double)hz) * NS_PER_S;
-}
-