net: Use k_fifo instead of k_work in RX and TX processing

The k_work handler cannot manipulate the used k_work. This means
that it is not easy to cleanup the net_pkt because it contains
k_work in it. Because of this, use k_fifo instead between
RX thread and network driver, and between application and TX
thread.

A echo-server/client run with IPv4 and UDP gave following
results:

Using k_work
------------
TX traffic class statistics:
TC  Priority	Sent pkts	bytes	time
[0] BK (1)	21922		5543071	103 us	[0->41->26->34=101 us]
[1] BE (0)	0		0	-
RX traffic class statistics:
TC  Priority	Recv pkts	bytes	time
[0] BK (0)	0		0	-
[1] BE (0)	21925		6039151	97 us	[0->21->16->37->20=94 us]

Using k_fifo
------------
TX traffic class statistics:
TC  Priority	Sent pkts	bytes	time
[0] BK (1)	15079		3811118	94 us	[0->36->23->32=91 us]
[1] BE (0)	0		0	-
RX traffic class statistics:
TC  Priority	Recv pkts	bytes	time
[0] BK (1)	0		0	-
[1] BE (0)	15073		4150947	79 us	[0->17->12->32->14=75 us]

So using k_fifo gives about 10% better performance with same workload.

Fixes #34690

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
This commit is contained in:
Jukka Rissanen 2021-04-29 15:58:54 +03:00 committed by Kumar Gala
commit bcdc762609
6 changed files with 96 additions and 54 deletions

View file

@ -396,15 +396,18 @@ struct net_if_config {
* *
* Traffic classes are used when sending or receiving data that is classified * Traffic classes are used when sending or receiving data that is classified
* with different priorities. So some traffic can be marked as high priority * with different priorities. So some traffic can be marked as high priority
* and it will be sent or received first. There is always at least one work * and it will be sent or received first. Each network packet that is
* queue in the system for Rx and Tx. Each network packet that is transmitted * transmitted or received goes through a fifo to a thread that will transmit
* or received goes through a work queue thread that will transmit it. * it.
*/ */
struct net_traffic_class { struct net_traffic_class {
/** Work queue for handling this Tx or Rx packet */ /** Fifo for handling this Tx or Rx packet */
struct k_work_q work_q; struct k_fifo fifo;
/** Stack for this work queue */ /** Traffic class handler thread */
struct k_thread handler;
/** Stack for this handler */
k_thread_stack_t *stack; k_thread_stack_t *stack;
}; };

View file

@ -60,18 +60,11 @@ struct net_pkt_cursor {
* net_pkt_clone() function. * net_pkt_clone() function.
*/ */
struct net_pkt { struct net_pkt {
union { /**
/** Internal variable that is used when packet is sent * The fifo is used by RX/TX threads and by socket layer. The net_pkt
* or received. * is queued via fifo to the processing thread.
*/ */
struct k_work work; intptr_t fifo;
/** Socket layer will queue received net_pkt into a k_fifo.
* Since this happens after consuming net_pkt's k_work on
* RX path, it is then fine to have both attributes sharing
* the same memory area.
*/
intptr_t sock_recv_fifo;
};
/** Slab pointer from where it belongs to */ /** Slab pointer from where it belongs to */
struct k_mem_slab *slab; struct k_mem_slab *slab;
@ -264,11 +257,6 @@ struct net_pkt {
/** @cond ignore */ /** @cond ignore */
static inline struct k_work *net_pkt_work(struct net_pkt *pkt)
{
return &pkt->work;
}
/* The interface real ll address */ /* The interface real ll address */
static inline struct net_linkaddr *net_pkt_lladdr_if(struct net_pkt *pkt) static inline struct net_linkaddr *net_pkt_lladdr_if(struct net_pkt *pkt)
{ {

View file

@ -364,12 +364,8 @@ static void net_rx(struct net_if *iface, struct net_pkt *pkt)
net_pkt_print(); net_pkt_print();
} }
static void process_rx_packet(struct k_work *work) void net_process_rx_packet(struct net_pkt *pkt)
{ {
struct net_pkt *pkt;
pkt = CONTAINER_OF(work, struct net_pkt, work);
net_pkt_set_rx_stats_tick(pkt, k_cycle_get_32()); net_pkt_set_rx_stats_tick(pkt, k_cycle_get_32());
net_capture_pkt(net_pkt_iface(pkt), pkt); net_capture_pkt(net_pkt_iface(pkt), pkt);
@ -382,8 +378,6 @@ static void net_queue_rx(struct net_if *iface, struct net_pkt *pkt)
uint8_t prio = net_pkt_priority(pkt); uint8_t prio = net_pkt_priority(pkt);
uint8_t tc = net_rx_priority2tc(prio); uint8_t tc = net_rx_priority2tc(prio);
k_work_init(net_pkt_work(pkt), process_rx_packet);
#if defined(CONFIG_NET_STATISTICS) #if defined(CONFIG_NET_STATISTICS)
net_stats_update_tc_recv_pkt(iface, tc); net_stats_update_tc_recv_pkt(iface, tc);
net_stats_update_tc_recv_bytes(iface, tc, net_pkt_get_len(pkt)); net_stats_update_tc_recv_bytes(iface, tc, net_pkt_get_len(pkt));
@ -395,7 +389,7 @@ static void net_queue_rx(struct net_if *iface, struct net_pkt *pkt)
#endif #endif
if (NET_TC_RX_COUNT == 0) { if (NET_TC_RX_COUNT == 0) {
process_rx_packet(net_pkt_work(pkt)); net_process_rx_packet(pkt);
} else { } else {
net_tc_submit_to_rx_queue(tc, pkt); net_tc_submit_to_rx_queue(tc, pkt);
} }

View file

@ -316,12 +316,9 @@ static bool net_if_tx(struct net_if *iface, struct net_pkt *pkt)
return true; return true;
} }
static void process_tx_packet(struct k_work *work) void net_process_tx_packet(struct net_pkt *pkt)
{ {
struct net_if *iface; struct net_if *iface;
struct net_pkt *pkt;
pkt = CONTAINER_OF(work, struct net_pkt, work);
net_pkt_set_tx_stats_tick(pkt, k_cycle_get_32()); net_pkt_set_tx_stats_tick(pkt, k_cycle_get_32());
@ -355,8 +352,6 @@ void net_if_queue_tx(struct net_if *iface, struct net_pkt *pkt)
return; return;
} }
k_work_init(net_pkt_work(pkt), process_tx_packet);
#if NET_TC_TX_COUNT > 1 #if NET_TC_TX_COUNT > 1
NET_DBG("TC %d with prio %d pkt %p", tc, prio, pkt); NET_DBG("TC %d with prio %d pkt %p", tc, prio, pkt);
#endif #endif

View file

@ -47,6 +47,8 @@ extern void net_if_post_init(void);
extern void net_if_carrier_down(struct net_if *iface); extern void net_if_carrier_down(struct net_if *iface);
extern void net_if_stats_reset(struct net_if *iface); extern void net_if_stats_reset(struct net_if *iface);
extern void net_if_stats_reset_all(void); extern void net_if_stats_reset_all(void);
extern void net_process_rx_packet(struct net_pkt *pkt);
extern void net_process_tx_packet(struct net_pkt *pkt);
#if defined(CONFIG_NET_NATIVE) || defined(CONFIG_NET_OFFLOAD) #if defined(CONFIG_NET_NATIVE) || defined(CONFIG_NET_OFFLOAD)
extern void net_context_init(void); extern void net_context_init(void);

View file

@ -40,12 +40,19 @@ static struct net_traffic_class tx_classes[NET_TC_TX_COUNT];
static struct net_traffic_class rx_classes[NET_TC_RX_COUNT]; static struct net_traffic_class rx_classes[NET_TC_RX_COUNT];
#endif #endif
#if NET_TC_RX_COUNT > 0 || NET_TC_TX_COUNT > 0
static void submit_to_queue(struct k_fifo *queue, struct net_pkt *pkt)
{
k_fifo_put(queue, pkt);
}
#endif
bool net_tc_submit_to_tx_queue(uint8_t tc, struct net_pkt *pkt) bool net_tc_submit_to_tx_queue(uint8_t tc, struct net_pkt *pkt)
{ {
#if NET_TC_TX_COUNT > 0 #if NET_TC_TX_COUNT > 0
net_pkt_set_tx_stats_tick(pkt, k_cycle_get_32()); net_pkt_set_tx_stats_tick(pkt, k_cycle_get_32());
k_work_submit_to_queue(&tx_classes[tc].work_q, net_pkt_work(pkt)); submit_to_queue(&tx_classes[tc].fifo, pkt);
#else #else
ARG_UNUSED(tc); ARG_UNUSED(tc);
ARG_UNUSED(pkt); ARG_UNUSED(pkt);
@ -58,7 +65,7 @@ void net_tc_submit_to_rx_queue(uint8_t tc, struct net_pkt *pkt)
#if NET_TC_RX_COUNT > 0 #if NET_TC_RX_COUNT > 0
net_pkt_set_rx_stats_tick(pkt, k_cycle_get_32()); net_pkt_set_rx_stats_tick(pkt, k_cycle_get_32());
k_work_submit_to_queue(&rx_classes[tc].work_q, net_pkt_work(pkt)); submit_to_queue(&rx_classes[tc].fifo, pkt);
#else #else
ARG_UNUSED(tc); ARG_UNUSED(tc);
ARG_UNUSED(pkt); ARG_UNUSED(pkt);
@ -229,9 +236,40 @@ static void net_tc_rx_stats_priority_setup(struct net_if *iface,
#endif #endif
#endif #endif
/* Create workqueue for each traffic class we are using. All the network #if NET_TC_RX_COUNT > 0
* traffic goes through these classes. There needs to be at least one traffic static void tc_rx_handler(struct k_fifo *fifo)
* class in the system. {
struct net_pkt *pkt;
while (1) {
pkt = k_fifo_get(fifo, K_FOREVER);
if (pkt == NULL) {
continue;
}
net_process_rx_packet(pkt);
}
}
#endif
#if NET_TC_TX_COUNT > 0
static void tc_tx_handler(struct k_fifo *fifo)
{
struct net_pkt *pkt;
while (1) {
pkt = k_fifo_get(fifo, K_FOREVER);
if (pkt == NULL) {
continue;
}
net_process_tx_packet(pkt);
}
}
#endif
/* Create a fifo for each traffic class we are using. All the network
* traffic goes through these classes.
*/ */
void net_tc_tx_init(void) void net_tc_tx_init(void)
{ {
@ -250,6 +288,7 @@ void net_tc_tx_init(void)
for (i = 0; i < NET_TC_TX_COUNT; i++) { for (i = 0; i < NET_TC_TX_COUNT; i++) {
uint8_t thread_priority; uint8_t thread_priority;
int priority; int priority;
k_tid_t tid;
thread_priority = tx_tc2thread(i); thread_priority = tx_tc2thread(i);
@ -257,25 +296,35 @@ void net_tc_tx_init(void)
K_PRIO_COOP(thread_priority) : K_PRIO_COOP(thread_priority) :
K_PRIO_PREEMPT(thread_priority); K_PRIO_PREEMPT(thread_priority);
NET_DBG("[%d] Starting TX queue %p stack size %zd " NET_DBG("[%d] Starting TX handler %p stack size %zd "
"prio %d %s(%d)", i, "prio %d %s(%d)", i,
&tx_classes[i].work_q, &tx_classes[i].handler,
K_KERNEL_STACK_SIZEOF(tx_stack[i]), K_KERNEL_STACK_SIZEOF(tx_stack[i]),
thread_priority, thread_priority,
IS_ENABLED(CONFIG_NET_TC_THREAD_COOPERATIVE) ? IS_ENABLED(CONFIG_NET_TC_THREAD_COOPERATIVE) ?
"coop" : "preempt", "coop" : "preempt",
priority); priority);
k_work_queue_start(&tx_classes[i].work_q, tx_stack[i], k_fifo_init(&tx_classes[i].fifo);
K_KERNEL_STACK_SIZEOF(tx_stack[i]), priority,
NULL); tid = k_thread_create(&tx_classes[i].handler, tx_stack[i],
K_KERNEL_STACK_SIZEOF(tx_stack[i]),
(k_thread_entry_t)tc_tx_handler,
&tx_classes[i].fifo, NULL, NULL,
priority, 0, K_FOREVER);
if (!tid) {
NET_ERR("Cannot create TC handler thread %d", i);
continue;
}
if (IS_ENABLED(CONFIG_THREAD_NAME)) { if (IS_ENABLED(CONFIG_THREAD_NAME)) {
char name[MAX_NAME_LEN]; char name[MAX_NAME_LEN];
snprintk(name, sizeof(name), "tx_q[%d]", i); snprintk(name, sizeof(name), "tx_q[%d]", i);
k_thread_name_set(&tx_classes[i].work_q.thread, name); k_thread_name_set(tid, name);
} }
k_thread_start(tid);
} }
#endif #endif
} }
@ -297,6 +346,7 @@ void net_tc_rx_init(void)
for (i = 0; i < NET_TC_RX_COUNT; i++) { for (i = 0; i < NET_TC_RX_COUNT; i++) {
uint8_t thread_priority; uint8_t thread_priority;
int priority; int priority;
k_tid_t tid;
thread_priority = rx_tc2thread(i); thread_priority = rx_tc2thread(i);
@ -304,25 +354,35 @@ void net_tc_rx_init(void)
K_PRIO_COOP(thread_priority) : K_PRIO_COOP(thread_priority) :
K_PRIO_PREEMPT(thread_priority); K_PRIO_PREEMPT(thread_priority);
NET_DBG("[%d] Starting RX queue %p stack size %zd " NET_DBG("[%d] Starting RX handler %p stack size %zd "
"prio %d %s(%d)", i, "prio %d %s(%d)", i,
&rx_classes[i].work_q, &rx_classes[i].handler,
K_KERNEL_STACK_SIZEOF(rx_stack[i]), K_KERNEL_STACK_SIZEOF(rx_stack[i]),
thread_priority, thread_priority,
IS_ENABLED(CONFIG_NET_TC_THREAD_COOPERATIVE) ? IS_ENABLED(CONFIG_NET_TC_THREAD_COOPERATIVE) ?
"coop" : "preempt", "coop" : "preempt",
priority); priority);
k_work_queue_start(&rx_classes[i].work_q, rx_stack[i], k_fifo_init(&rx_classes[i].fifo);
K_KERNEL_STACK_SIZEOF(rx_stack[i]), priority,
NULL); tid = k_thread_create(&rx_classes[i].handler, rx_stack[i],
K_KERNEL_STACK_SIZEOF(rx_stack[i]),
(k_thread_entry_t)tc_rx_handler,
&rx_classes[i].fifo, NULL, NULL,
priority, 0, K_FOREVER);
if (!tid) {
NET_ERR("Cannot create TC handler thread %d", i);
continue;
}
if (IS_ENABLED(CONFIG_THREAD_NAME)) { if (IS_ENABLED(CONFIG_THREAD_NAME)) {
char name[MAX_NAME_LEN]; char name[MAX_NAME_LEN];
snprintk(name, sizeof(name), "rx_q[%d]", i); snprintk(name, sizeof(name), "rx_q[%d]", i);
k_thread_name_set(&rx_classes[i].work_q.thread, name); k_thread_name_set(tid, name);
} }
k_thread_start(tid);
} }
#endif #endif
} }