zephyr/subsys/net/ip/tcp.c
Léonard Bise 14ced754f5 net: tcp: Do not send FIN when closing listening sockets
A net context in LISTENING mode waits for incoming connections, once
a new connection is established a new net context is spawned which
is responsible for handling the new connection.
Therefore when closing a LISTENING context it is not useful to send FIN
as it is never connected. Actually closing the connection would be done
by calling close on the spawned net context which is returned by the
accept call.

Signed-off-by: Léonard Bise <leonard.bise@gmail.com>
2020-06-16 23:47:40 +03:00

2761 lines
68 KiB
C

/** @file
* @brief TCP handler
*
* Handle TCP connections.
*/
/*
* Copyright (c) 2016 Intel Corporation
* Copyright 2011-2015 by Andrey Butok. FNET Community.
* Copyright 2008-2010 by Andrey Butok. Freescale Semiconductor, Inc.
* Copyright 2003 by Alexey Shervashidze, Andrey Butok. Motorola SPS.
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <logging/log.h>
LOG_MODULE_REGISTER(net_tcp, CONFIG_NET_TCP_LOG_LEVEL);
#include <kernel.h>
#include <string.h>
#include <errno.h>
#include <stdbool.h>
#include <net/net_pkt.h>
#include <net/net_ip.h>
#include <net/net_context.h>
#include <sys/byteorder.h>
#include "connection.h"
#include "net_private.h"
#include "ipv6.h"
#include "ipv4.h"
#include "tcp_internal.h"
#include "net_stats.h"
#define ALLOC_TIMEOUT K_MSEC(500)
static int net_tcp_queue_pkt(struct net_context *context, struct net_pkt *pkt);
/*
* Each TCP connection needs to be tracked by net_context, so
* we need to allocate equal number of control structures here.
*/
#define NET_MAX_TCP_CONTEXT CONFIG_NET_MAX_CONTEXTS
static struct net_tcp tcp_context[NET_MAX_TCP_CONTEXT];
static struct tcp_backlog_entry {
struct net_tcp *tcp;
uint32_t send_seq;
uint32_t send_ack;
struct k_delayed_work ack_timer;
struct sockaddr remote;
uint16_t send_mss;
} tcp_backlog[CONFIG_NET_TCP_BACKLOG_SIZE];
#if defined(CONFIG_NET_TCP_ACK_TIMEOUT)
#define ACK_TIMEOUT_MS CONFIG_NET_TCP_ACK_TIMEOUT
#define ACK_TIMEOUT K_MSEC(ACK_TIMEOUT_MS)
#else
#define ACK_TIMEOUT_MS (1 * MSEC_PER_SEC)
#define ACK_TIMEOUT K_MSEC(MSEC_PER_SEC)
#endif
#define FIN_TIMEOUT_MS (1 * MSEC_PER_SEC)
#define FIN_TIMEOUT K_MSEC(MSEC_PER_SEC)
/* Declares a wrapper function for a net_conn callback that refs the
* context around the invocation (to protect it from premature
* deletion). Long term would be nice to see this feature be part of
* the connection type itself, but right now it has opaque "user_data"
* pointers and doesn't understand what a net_context is.
*/
#define NET_CONN_CB(name) \
static enum net_verdict _##name(struct net_conn *conn, \
struct net_pkt *pkt, \
union net_ip_header *ip_hdr, \
union net_proto_header *proto_hdr, \
void *user_data); \
static enum net_verdict name(struct net_conn *conn, \
struct net_pkt *pkt, \
union net_ip_header *ip_hdr, \
union net_proto_header *proto_hdr, \
void *user_data) \
{ \
enum net_verdict result; \
\
net_context_ref(user_data); \
result = _##name(conn, pkt, ip_hdr, \
proto_hdr, user_data); \
net_context_unref(user_data); \
return result; \
} \
static enum net_verdict _##name(struct net_conn *conn, \
struct net_pkt *pkt, \
union net_ip_header *ip_hdr, \
union net_proto_header *proto_hdr, \
void *user_data) \
struct tcp_segment {
uint32_t seq;
uint32_t ack;
uint16_t wnd;
uint8_t flags;
uint8_t optlen;
void *options;
struct sockaddr_ptr *src_addr;
const struct sockaddr *dst_addr;
};
static char upper_if_set(char chr, bool set)
{
if (set) {
return chr & ~0x20;
}
return chr | 0x20;
}
static void net_tcp_trace(struct net_pkt *pkt,
struct net_tcp *tcp,
struct net_tcp_hdr *tcp_hdr)
{
uint32_t rel_ack, ack;
uint8_t flags;
if (CONFIG_NET_TCP_LOG_LEVEL < LOG_LEVEL_DBG) {
return;
}
flags = NET_TCP_FLAGS(tcp_hdr);
ack = sys_get_be32(tcp_hdr->ack);
if (!tcp->sent_ack) {
rel_ack = 0U;
} else {
rel_ack = ack ? ack - tcp->sent_ack : 0;
}
NET_DBG("[%p] pkt %p src %u dst %u",
tcp, pkt,
ntohs(tcp_hdr->src_port),
ntohs(tcp_hdr->dst_port));
NET_DBG(" seq 0x%04x (%u) ack 0x%04x (%u/%u)",
sys_get_be32(tcp_hdr->seq),
sys_get_be32(tcp_hdr->seq),
ack,
ack,
/* This tells how many bytes we are acking now */
rel_ack);
NET_DBG(" flags %c%c%c%c%c%c",
upper_if_set('u', flags & NET_TCP_URG),
upper_if_set('a', flags & NET_TCP_ACK),
upper_if_set('p', flags & NET_TCP_PSH),
upper_if_set('r', flags & NET_TCP_RST),
upper_if_set('s', flags & NET_TCP_SYN),
upper_if_set('f', flags & NET_TCP_FIN));
NET_DBG(" win %u chk 0x%04x",
sys_get_be16(tcp_hdr->wnd),
ntohs(tcp_hdr->chksum));
}
static inline k_timeout_t retry_timeout(const struct net_tcp *tcp)
{
return K_MSEC(((uint32_t)1 << tcp->retry_timeout_shift) *
CONFIG_NET_TCP_INIT_RETRANSMISSION_TIMEOUT);
}
#define is_6lo_technology(pkt) \
(IS_ENABLED(CONFIG_NET_IPV6) && net_pkt_family(pkt) == AF_INET6 && \
((IS_ENABLED(CONFIG_NET_L2_BT) && \
net_pkt_lladdr_dst(pkt)->type == NET_LINK_BLUETOOTH) || \
(IS_ENABLED(CONFIG_NET_L2_IEEE802154) && \
net_pkt_lladdr_dst(pkt)->type == NET_LINK_IEEE802154) || \
(IS_ENABLED(CONFIG_NET_L2_CANBUS) && \
net_pkt_lladdr_dst(pkt)->type == NET_LINK_CANBUS)))
/* The ref should not be done for Bluetooth and IEEE 802.15.4 which use
* IPv6 header compression (6lo). For BT and 802.15.4 we copy the pkt
* chain we are about to send so it is fine if the network driver
* releases it. As we have our own copy of the sent data, we do not
* need to take a reference of it. See also net_tcp_send_pkt().
*
* Note that this is macro so that we get information who called the
* net_pkt_ref() if memory debugging is active.
*/
#define do_ref_if_needed(tcp, pkt) \
do { \
if (!is_6lo_technology(pkt)) { \
NET_DBG("[%p] ref pkt %p new ref %d (%s:%d)", \
tcp, pkt, atomic_get(&pkt->atomic_ref) + 1, \
__func__, __LINE__); \
pkt = net_pkt_ref(pkt); \
} \
} while (0)
static void abort_connection(struct net_tcp *tcp)
{
struct net_context *ctx = tcp->context;
NET_DBG("[%p] segment retransmission exceeds %d, resetting context %p",
tcp, CONFIG_NET_TCP_RETRY_COUNT, ctx);
if (ctx->recv_cb) {
ctx->recv_cb(ctx, NULL, NULL, NULL, -ECONNRESET,
tcp->recv_user_data);
}
net_context_unref(ctx);
}
static void tcp_retry_expired(struct k_work *work)
{
struct net_tcp *tcp = CONTAINER_OF(work, struct net_tcp, retry_timer);
struct net_pkt *pkt;
/* Double the retry period for exponential backoff and resend
* the first (only the first!) unack'd packet.
*/
if (!sys_slist_is_empty(&tcp->sent_list)) {
tcp->retry_timeout_shift++;
if (tcp->retry_timeout_shift > CONFIG_NET_TCP_RETRY_COUNT) {
abort_connection(tcp);
return;
}
k_delayed_work_submit(&tcp->retry_timer, retry_timeout(tcp));
pkt = CONTAINER_OF(sys_slist_peek_head(&tcp->sent_list),
struct net_pkt, sent_list);
if (k_work_pending(net_pkt_work(pkt))) {
/* If the packet is still pending in TX queue, then do
* not try to resend it again. This can happen if the
* device is so busy that the TX thread has not yet
* finished previous sending of this packet.
*/
NET_DBG("[%p] pkt %p still pending in TX queue",
tcp, pkt);
return;
}
net_pkt_set_queued(pkt, true);
net_pkt_set_tcp_1st_msg(pkt, false);
/* The ref here is for the initial reference which was lost
* when the pkt was sent. Typically the ref count should be 2
* at this point if the pkt is being sent by the driver.
*/
if (!is_6lo_technology(pkt)) {
net_pkt_ref(pkt);
}
if (net_tcp_send_pkt(pkt) < 0 && !is_6lo_technology(pkt)) {
NET_DBG("retry %u: [%p] pkt %p send failed",
tcp->retry_timeout_shift, tcp, pkt);
/* Undo the ref done above */
net_pkt_unref(pkt);
} else {
NET_DBG("retry %u: [%p] sent pkt %p",
tcp->retry_timeout_shift, tcp, pkt);
if (IS_ENABLED(CONFIG_NET_STATISTICS_TCP) &&
!is_6lo_technology(pkt)) {
net_stats_update_tcp_seg_rexmit(
net_pkt_iface(pkt));
}
}
} else if (CONFIG_NET_TCP_TIME_WAIT_DELAY != 0) {
if (tcp->fin_sent && tcp->fin_rcvd) {
NET_DBG("[%p] Closing connection (context %p)",
tcp, tcp->context);
net_context_unref(tcp->context);
}
}
}
struct net_tcp *net_tcp_alloc(struct net_context *context)
{
int i, key;
key = irq_lock();
for (i = 0; i < NET_MAX_TCP_CONTEXT; i++) {
if (!net_tcp_is_used(&tcp_context[i])) {
tcp_context[i].flags |= NET_TCP_IN_USE;
break;
}
}
irq_unlock(key);
if (i >= NET_MAX_TCP_CONTEXT) {
return NULL;
}
(void)memset(&tcp_context[i], 0, sizeof(struct net_tcp));
tcp_context[i].flags = NET_TCP_IN_USE;
tcp_context[i].state = NET_TCP_CLOSED;
tcp_context[i].context = context;
tcp_context[i].send_seq = tcp_init_isn();
tcp_context[i].recv_wnd = MIN(NET_TCP_MAX_WIN, NET_TCP_BUF_MAX_LEN);
tcp_context[i].send_mss = NET_TCP_DEFAULT_MSS;
tcp_context[i].accept_cb = NULL;
k_delayed_work_init(&tcp_context[i].retry_timer, tcp_retry_expired);
k_sem_init(&tcp_context[i].connect_wait, 0, UINT_MAX);
return &tcp_context[i];
}
static void ack_timer_cancel(struct net_tcp *tcp)
{
k_delayed_work_cancel(&tcp->ack_timer);
}
static void fin_timer_cancel(struct net_tcp *tcp)
{
k_delayed_work_cancel(&tcp->fin_timer);
}
static void retry_timer_cancel(struct net_tcp *tcp)
{
k_delayed_work_cancel(&tcp->retry_timer);
}
static void timewait_timer_cancel(struct net_tcp *tcp)
{
k_delayed_work_cancel(&tcp->timewait_timer);
}
int net_tcp_release(struct net_tcp *tcp)
{
struct net_pkt *pkt;
struct net_pkt *tmp;
unsigned int key;
if (!PART_OF_ARRAY(tcp_context, tcp)) {
return -EINVAL;
}
retry_timer_cancel(tcp);
k_sem_reset(&tcp->connect_wait);
ack_timer_cancel(tcp);
fin_timer_cancel(tcp);
timewait_timer_cancel(tcp);
net_tcp_change_state(tcp, NET_TCP_CLOSED);
SYS_SLIST_FOR_EACH_CONTAINER_SAFE(&tcp->sent_list, pkt, tmp,
sent_list) {
int refcount;
sys_slist_remove(&tcp->sent_list, NULL, &pkt->sent_list);
/* The packet might get freed when sending it, so if that is
* so, just skip it.
*/
if (atomic_get(&pkt->atomic_ref) == 0) {
continue;
}
/* Make sure we undo the reference done in net_tcp_queue_pkt()
*/
net_pkt_unref(pkt);
/* Release the packet fully unless it is still pending */
refcount = atomic_get(&pkt->atomic_ref);
if (refcount > 0) {
/* If the pkt was already placed to TX queue, let
* it go as it will be released by L2 after it is
* sent.
*/
if (k_work_pending(net_pkt_work(pkt)) ||
net_pkt_sent(pkt)) {
refcount--;
}
while (refcount) {
net_pkt_unref(pkt);
refcount--;
}
}
}
tcp->context = NULL;
key = irq_lock();
tcp->flags &= ~(NET_TCP_IN_USE | NET_TCP_RECV_MSS_SET);
irq_unlock(key);
NET_DBG("[%p] Disposed of TCP connection state", tcp);
return 0;
}
static int finalize_segment(struct net_pkt *pkt)
{
net_pkt_cursor_init(pkt);
if (IS_ENABLED(CONFIG_NET_IPV4) &&
net_pkt_family(pkt) == AF_INET) {
return net_ipv4_finalize(pkt, IPPROTO_TCP);
} else if (IS_ENABLED(CONFIG_NET_IPV6) &&
net_pkt_family(pkt) == AF_INET6) {
return net_ipv6_finalize(pkt, IPPROTO_TCP);
}
return -EINVAL;
}
static int prepare_segment(struct net_tcp *tcp,
struct tcp_segment *segment,
struct net_pkt *pkt,
struct net_pkt **out_pkt)
{
NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct net_tcp_hdr);
struct net_context *context = tcp->context;
struct net_buf *tail = NULL;
struct net_tcp_hdr *tcp_hdr;
uint16_t dst_port, src_port;
bool pkt_allocated;
uint8_t optlen = 0U;
int status;
NET_ASSERT(context);
if (pkt) {
/* TCP transmit data comes in with a pre-allocated
* net_pkt at the head (so that net_context_send can find
* the context), and the data after. Rejigger so we
* can insert a TCP header cleanly
*/
tail = pkt->buffer;
pkt->buffer = NULL;
pkt_allocated = false;
status = net_pkt_alloc_buffer(pkt, segment->optlen,
IPPROTO_TCP, ALLOC_TIMEOUT);
if (status) {
goto fail;
}
} else {
pkt = net_pkt_alloc_with_buffer(net_context_get_iface(context),
segment->optlen,
net_context_get_family(context),
IPPROTO_TCP, ALLOC_TIMEOUT);
if (!pkt) {
return -ENOMEM;
}
net_pkt_set_context(pkt, context);
pkt_allocated = true;
}
net_pkt_set_tcp_1st_msg(pkt, true);
net_pkt_set_sent(pkt, false);
if (IS_ENABLED(CONFIG_NET_IPV4) &&
net_pkt_family(pkt) == AF_INET) {
status = net_context_create_ipv4_new(context, pkt,
net_sin_ptr(segment->src_addr)->sin_addr,
&(net_sin(segment->dst_addr)->sin_addr));
if (status < 0) {
goto fail;
}
dst_port = net_sin(segment->dst_addr)->sin_port;
src_port = ((struct sockaddr_in_ptr *)&context->local)->
sin_port;
} else if (IS_ENABLED(CONFIG_NET_IPV6) &&
net_pkt_family(pkt) == AF_INET6) {
status = net_context_create_ipv6_new(context, pkt,
net_sin6_ptr(segment->src_addr)->sin6_addr,
&(net_sin6(segment->dst_addr)->sin6_addr));
if (status < 0) {
goto fail;
}
dst_port = net_sin6(segment->dst_addr)->sin6_port;
src_port = ((struct sockaddr_in6_ptr *)&context->local)->
sin6_port;
} else {
NET_DBG("[%p] Protocol family %d not supported", tcp,
net_pkt_family(pkt));
status = -EINVAL;
goto fail;
}
tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, &tcp_access);
if (!tcp_hdr) {
status = -ENOBUFS;
goto fail;
}
if (segment->options && segment->optlen) {
/* Set the length (this value is saved in 4-byte words format)
*/
if ((segment->optlen & 0x3u) != 0u) {
optlen = (segment->optlen & 0xfffCu) + 4u;
} else {
optlen = segment->optlen;
}
}
memset(tcp_hdr, 0, NET_TCPH_LEN);
tcp_hdr->src_port = src_port;
tcp_hdr->dst_port = dst_port;
sys_put_be32(segment->seq, tcp_hdr->seq);
sys_put_be32(segment->ack, tcp_hdr->ack);
tcp_hdr->offset = (NET_TCPH_LEN + optlen) << 2;
tcp_hdr->flags = segment->flags;
sys_put_be16(segment->wnd, tcp_hdr->wnd);
tcp_hdr->chksum = 0U;
tcp_hdr->urg[0] = 0U;
tcp_hdr->urg[1] = 0U;
net_pkt_set_data(pkt, &tcp_access);
if (optlen && net_pkt_write(pkt, segment->options, segment->optlen)) {
goto fail;
}
if (tail) {
net_pkt_append_buffer(pkt, tail);
}
status = finalize_segment(pkt);
if (status < 0) {
if (pkt_allocated) {
net_pkt_unref(pkt);
}
return status;
}
net_tcp_trace(pkt, tcp, tcp_hdr);
*out_pkt = pkt;
return 0;
fail:
if (pkt_allocated) {
net_pkt_unref(pkt);
} else {
net_buf_unref(pkt->buffer);
pkt->buffer = tail;
}
return status;
}
uint32_t net_tcp_get_recv_wnd(const struct net_tcp *tcp)
{
return tcp->recv_wnd;
}
int net_tcp_prepare_segment(struct net_tcp *tcp, uint8_t flags,
void *options, size_t optlen,
const struct sockaddr_ptr *local,
const struct sockaddr *remote,
struct net_pkt **send_pkt)
{
struct tcp_segment segment = { 0 };
uint32_t seq;
uint16_t wnd;
int status;
if (!local) {
local = &tcp->context->local;
}
seq = tcp->send_seq;
if (flags & NET_TCP_ACK) {
if (net_tcp_get_state(tcp) == NET_TCP_FIN_WAIT_1) {
if (flags & NET_TCP_FIN) {
/* FIN is used here only to determine which
* state to go to next; it's not to be used
* in the sent segment.
*/
flags &= ~NET_TCP_FIN;
net_tcp_change_state(tcp, NET_TCP_TIME_WAIT);
} else {
net_tcp_change_state(tcp, NET_TCP_CLOSING);
}
} else if (net_tcp_get_state(tcp) == NET_TCP_FIN_WAIT_2) {
net_tcp_change_state(tcp, NET_TCP_TIME_WAIT);
} else if (net_tcp_get_state(tcp) == NET_TCP_CLOSE_WAIT) {
tcp->flags |= NET_TCP_IS_SHUTDOWN;
flags |= NET_TCP_FIN;
net_tcp_change_state(tcp, NET_TCP_LAST_ACK);
}
}
if (flags & NET_TCP_FIN) {
/* RFC793 says about ACK bit: "Once a connection is
* established this is always sent." as teardown
* happens when connection is established, it must
* have ACK set.
*/
flags |= NET_TCP_ACK;
seq++;
if (net_tcp_get_state(tcp) == NET_TCP_ESTABLISHED ||
net_tcp_get_state(tcp) == NET_TCP_SYN_RCVD) {
net_tcp_change_state(tcp, NET_TCP_FIN_WAIT_1);
}
}
wnd = net_tcp_get_recv_wnd(tcp);
segment.src_addr = (struct sockaddr_ptr *)local;
segment.dst_addr = remote;
segment.seq = tcp->send_seq;
segment.ack = tcp->send_ack;
segment.flags = flags;
segment.wnd = wnd;
segment.options = options;
segment.optlen = optlen;
status = prepare_segment(tcp, &segment, *send_pkt, send_pkt);
if (status < 0) {
return status;
}
tcp->send_seq = seq;
return 0;
}
static inline uint32_t get_size(uint32_t pos1, uint32_t pos2)
{
uint32_t size;
if (pos1 <= pos2) {
size = pos2 - pos1;
} else {
size = NET_TCP_MAX_SEQ - pos1 + pos2 + 1;
}
return size;
}
#if defined(CONFIG_NET_IPV4)
#ifndef NET_IP_MAX_PACKET
#define NET_IP_MAX_PACKET (10 * 1024)
#endif
#define NET_IP_MAX_OPTIONS 40 /* Maximum option field length */
static inline size_t ip_max_packet_len(struct in_addr *dest_ip)
{
ARG_UNUSED(dest_ip);
return (NET_IP_MAX_PACKET - (NET_IP_MAX_OPTIONS +
sizeof(struct net_ipv4_hdr))) & (~0x3LU);
}
#else /* CONFIG_NET_IPV4 */
#define ip_max_packet_len(...) 0
#endif /* CONFIG_NET_IPV4 */
uint16_t net_tcp_get_recv_mss(const struct net_tcp *tcp)
{
sa_family_t family = net_context_get_family(tcp->context);
if (family == AF_INET) {
#if defined(CONFIG_NET_IPV4)
struct net_if *iface = net_context_get_iface(tcp->context);
if (iface && net_if_get_mtu(iface) >= NET_IPV4TCPH_LEN) {
/* Detect MSS based on interface MTU minus "TCP,IP
* header size"
*/
return net_if_get_mtu(iface) - NET_IPV4TCPH_LEN;
}
#else
return 0;
#endif /* CONFIG_NET_IPV4 */
}
#if defined(CONFIG_NET_IPV6)
else if (family == AF_INET6) {
struct net_if *iface = net_context_get_iface(tcp->context);
int mss = 0;
if (iface && net_if_get_mtu(iface) >= NET_IPV6TCPH_LEN) {
/* Detect MSS based on interface MTU minus "TCP,IP
* header size"
*/
mss = net_if_get_mtu(iface) - NET_IPV6TCPH_LEN;
}
if (mss < NET_IPV6_MTU) {
mss = NET_IPV6_MTU;
}
return mss;
}
#endif /* CONFIG_NET_IPV6 */
return 0;
}
static void net_tcp_set_syn_opt(struct net_tcp *tcp, uint8_t *options,
uint8_t *optionlen)
{
uint32_t recv_mss;
*optionlen = 0U;
if (!(tcp->flags & NET_TCP_RECV_MSS_SET)) {
recv_mss = net_tcp_get_recv_mss(tcp);
tcp->flags |= NET_TCP_RECV_MSS_SET;
} else {
recv_mss = 0U;
}
recv_mss |= (NET_TCP_MSS_OPT << 24) | (NET_TCP_MSS_SIZE << 16);
UNALIGNED_PUT(htonl(recv_mss),
(uint32_t *)(options + *optionlen));
*optionlen += NET_TCP_MSS_SIZE;
}
int net_tcp_prepare_ack(struct net_tcp *tcp, const struct sockaddr *remote,
struct net_pkt **pkt)
{
uint8_t options[NET_TCP_MAX_OPT_SIZE];
uint8_t optionlen;
switch (net_tcp_get_state(tcp)) {
case NET_TCP_SYN_RCVD:
/* In the SYN_RCVD state acknowledgment must be with the
* SYN flag.
*/
net_tcp_set_syn_opt(tcp, options, &optionlen);
return net_tcp_prepare_segment(tcp, NET_TCP_SYN | NET_TCP_ACK,
options, optionlen, NULL, remote,
pkt);
case NET_TCP_FIN_WAIT_1:
case NET_TCP_LAST_ACK:
/* In the FIN_WAIT_1 and LAST_ACK states acknowledgment must
* be with the FIN flag.
*/
return net_tcp_prepare_segment(tcp, NET_TCP_FIN | NET_TCP_ACK,
0, 0, NULL, remote, pkt);
default:
return net_tcp_prepare_segment(tcp, NET_TCP_ACK, 0, 0, NULL,
remote, pkt);
}
return -EINVAL;
}
static inline void copy_sockaddr_to_sockaddr_ptr(struct net_tcp *tcp,
const struct sockaddr *local,
struct sockaddr_ptr *addr)
{
(void)memset(addr, 0, sizeof(struct sockaddr_ptr));
#if defined(CONFIG_NET_IPV4)
if (local->sa_family == AF_INET) {
net_sin_ptr(addr)->sin_family = AF_INET;
net_sin_ptr(addr)->sin_port = net_sin(local)->sin_port;
net_sin_ptr(addr)->sin_addr = &net_sin(local)->sin_addr;
}
#endif
#if defined(CONFIG_NET_IPV6)
if (local->sa_family == AF_INET6) {
net_sin6_ptr(addr)->sin6_family = AF_INET6;
net_sin6_ptr(addr)->sin6_port = net_sin6(local)->sin6_port;
net_sin6_ptr(addr)->sin6_addr = &net_sin6(local)->sin6_addr;
}
#endif
}
int net_tcp_prepare_reset(struct net_tcp *tcp,
const struct sockaddr *local,
const struct sockaddr *remote,
struct net_pkt **pkt)
{
struct tcp_segment segment = { 0 };
int status = 0;
struct sockaddr_ptr src_addr_ptr;
if ((net_context_get_state(tcp->context) != NET_CONTEXT_UNCONNECTED) &&
(net_tcp_get_state(tcp) != NET_TCP_SYN_SENT) &&
(net_tcp_get_state(tcp) != NET_TCP_TIME_WAIT)) {
/* Send the reset segment always with acknowledgment. */
segment.ack = tcp->send_ack;
segment.flags = NET_TCP_RST | NET_TCP_ACK;
segment.seq = tcp->send_seq;
if (!local) {
segment.src_addr = &tcp->context->local;
} else {
copy_sockaddr_to_sockaddr_ptr(tcp, local,
&src_addr_ptr);
segment.src_addr = &src_addr_ptr;
}
segment.dst_addr = remote;
segment.wnd = 0U;
segment.options = NULL;
segment.optlen = 0U;
status = prepare_segment(tcp, &segment, NULL, pkt);
}
return status;
}
const char *net_tcp_state_str(enum net_tcp_state state)
{
#if (CONFIG_NET_TCP_LOG_LEVEL >= LOG_LEVEL_DBG) || defined(CONFIG_NET_SHELL)
switch (state) {
case NET_TCP_CLOSED:
return "CLOSED";
case NET_TCP_LISTEN:
return "LISTEN";
case NET_TCP_SYN_SENT:
return "SYN_SENT";
case NET_TCP_SYN_RCVD:
return "SYN_RCVD";
case NET_TCP_ESTABLISHED:
return "ESTABLISHED";
case NET_TCP_CLOSE_WAIT:
return "CLOSE_WAIT";
case NET_TCP_LAST_ACK:
return "LAST_ACK";
case NET_TCP_FIN_WAIT_1:
return "FIN_WAIT_1";
case NET_TCP_FIN_WAIT_2:
return "FIN_WAIT_2";
case NET_TCP_TIME_WAIT:
return "TIME_WAIT";
case NET_TCP_CLOSING:
return "CLOSING";
}
#else
ARG_UNUSED(state);
#endif
return "";
}
int net_tcp_queue_data(struct net_context *context, struct net_pkt *pkt)
{
struct net_conn *conn = (struct net_conn *)context->conn_handler;
size_t data_len = net_pkt_get_len(pkt);
int ret;
NET_DBG("[%p] Queue %p len %zd", context->tcp, pkt, data_len);
if (net_context_get_state(context) != NET_CONTEXT_CONNECTED) {
return -ENOTCONN;
}
NET_ASSERT(context->tcp);
if (context->tcp->flags & NET_TCP_IS_SHUTDOWN) {
return -ESHUTDOWN;
}
/* Set PSH on all packets, our window is so small that there's
* no point in the remote side trying to finesse things and
* coalesce packets.
*/
ret = net_tcp_prepare_segment(context->tcp, NET_TCP_PSH | NET_TCP_ACK,
NULL, 0, NULL, &conn->remote_addr, &pkt);
if (ret) {
return ret;
}
context->tcp->send_seq += data_len;
net_stats_update_tcp_sent(net_pkt_iface(pkt), data_len);
return net_tcp_queue_pkt(context, pkt);
}
/* This function is the sole point of *adding* packets to tcp->sent_list,
* and should remain such.
*/
static int net_tcp_queue_pkt(struct net_context *context, struct net_pkt *pkt)
{
sys_slist_append(&context->tcp->sent_list, &pkt->sent_list);
/* We need to restart retry_timer if it is stopped. */
if (k_delayed_work_remaining_get(&context->tcp->retry_timer) == 0) {
k_delayed_work_submit(&context->tcp->retry_timer,
retry_timeout(context->tcp));
}
/* Increase the ref count so that we do not lose the packet and
* can resend later if needed. The pkt will be released after we
* have received the ACK or the TCP stream is removed. This is only
* done for non-6lo technologies that will keep the data until ACK
* is received or timeout happens.
*/
do_ref_if_needed(context->tcp, pkt);
return 0;
}
int net_tcp_send_pkt(struct net_pkt *pkt)
{
NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct net_tcp_hdr);
struct net_context *ctx = net_pkt_context(pkt);
struct net_tcp_hdr *tcp_hdr;
bool calc_chksum = false;
int ret;
if (!ctx || !ctx->tcp) {
NET_ERR("%scontext is not set on pkt %p",
!ctx ? "" : "TCP ", pkt);
return -EINVAL;
}
net_pkt_cursor_init(pkt);
net_pkt_set_overwrite(pkt, true);
if (net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) +
net_pkt_ip_opts_len(pkt))) {
return -EMSGSIZE;
}
tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, &tcp_access);
if (!tcp_hdr) {
NET_ERR("Packet %p does not contain TCP header", pkt);
return -EMSGSIZE;
}
if (sys_get_be32(tcp_hdr->ack) != ctx->tcp->send_ack) {
sys_put_be32(ctx->tcp->send_ack, tcp_hdr->ack);
tcp_hdr->chksum = 0U;
calc_chksum = true;
}
/* The data stream code always sets this flag, because
* existing stacks (Linux, anyway) seem to ignore data packets
* without a valid-but-already-transmitted ACK. But set it
* anyway if we know we need it just to sanify edge cases.
*/
if (ctx->tcp->sent_ack != ctx->tcp->send_ack &&
(tcp_hdr->flags & NET_TCP_ACK) == 0U) {
tcp_hdr->flags |= NET_TCP_ACK;
tcp_hdr->chksum = 0U;
calc_chksum = true;
}
/* As we modified the header, we need to write it back.
*/
net_pkt_set_data(pkt, &tcp_access);
if (calc_chksum) {
net_pkt_cursor_init(pkt);
net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) +
net_pkt_ip_opts_len(pkt));
/* No need to get tcp_hdr again */
tcp_hdr->chksum = net_calc_chksum_tcp(pkt);
net_pkt_set_data(pkt, &tcp_access);
}
if (tcp_hdr->flags & NET_TCP_FIN) {
ctx->tcp->fin_sent = 1U;
}
ctx->tcp->sent_ack = ctx->tcp->send_ack;
/* We must have special handling for some network technologies that
* tweak the IP protocol headers during packet sending. This happens
* with Bluetooth and IEEE 802.15.4 which use IPv6 header compression
* (6lo) and alter the sent network packet. So in order to avoid any
* corruption of the original data buffer, we must copy the sent data.
* For Bluetooth, its fragmentation code will even mangle the data
* part of the message so we need to copy those too.
*/
if (is_6lo_technology(pkt)) {
struct net_pkt *new_pkt, *check_pkt;
bool pkt_in_slist = false;
/*
* There are users of this function that don't add pkt to TCP
* sent_list. (See send_ack() in net_context.c) In these cases,
* we should avoid the extra 6lowpan specific buffer copy
* below.
*/
SYS_SLIST_FOR_EACH_CONTAINER(&ctx->tcp->sent_list,
check_pkt, sent_list) {
if (check_pkt == pkt) {
pkt_in_slist = true;
break;
}
}
if (pkt_in_slist) {
new_pkt = net_pkt_clone(pkt, ALLOC_TIMEOUT);
if (!new_pkt) {
return -ENOMEM;
}
/* This function is called from net_context.c and if we
* return < 0, the caller will unref the original pkt.
* This would leak the new_pkt so remove it here.
*/
ret = net_send_data(new_pkt);
if (ret < 0) {
net_pkt_unref(new_pkt);
} else {
net_stats_update_tcp_seg_rexmit(
net_pkt_iface(pkt));
net_pkt_set_sent(pkt, true);
}
return ret;
}
}
ret = net_send_data(pkt);
if (ret == 0) {
net_pkt_set_sent(pkt, true);
}
return ret;
}
static void flush_queue(struct net_context *context)
{
(void)net_tcp_send_data(context, NULL, NULL);
}
static void restart_timer(struct net_tcp *tcp)
{
if (!sys_slist_is_empty(&tcp->sent_list)) {
tcp->flags |= NET_TCP_RETRYING;
tcp->retry_timeout_shift = 0U;
k_delayed_work_submit(&tcp->retry_timer, retry_timeout(tcp));
} else if (CONFIG_NET_TCP_TIME_WAIT_DELAY != 0 &&
(tcp->fin_sent && tcp->fin_rcvd)) {
/* We know sent_list is empty, which means if
* fin_sent is true it must have been ACKd
*/
k_delayed_work_submit(&tcp->retry_timer,
K_MSEC(CONFIG_NET_TCP_TIME_WAIT_DELAY));
net_context_ref(tcp->context);
} else {
k_delayed_work_cancel(&tcp->retry_timer);
tcp->flags &= ~NET_TCP_RETRYING;
}
}
int net_tcp_send_data(struct net_context *context, net_context_send_cb_t cb,
void *user_data)
{
struct net_pkt *pkt;
int ret;
/* For now, just send all queued data synchronously. Need to
* add window handling and retry/ACK logic.
*/
SYS_SLIST_FOR_EACH_CONTAINER(&context->tcp->sent_list, pkt, sent_list) {
/* Do not resend packets that were sent by expire timer */
if (net_pkt_queued(pkt)) {
NET_DBG("[%p] Skipping pkt %p because it was already "
"sent.", context->tcp, pkt);
continue;
}
/* If this pkt is the first one (not a resend), then we do
* not need to increase the ref count as it is 1 already.
* For a resent packet, the ref count is only 1 atm, and
* the packet would be freed in driver if we do not increase
* it here. This is only done for non-6lo technologies where
* we keep the original packet (by referencing it) for possible
* re-send (if ACK is not received on time).
*/
if (!is_6lo_technology(pkt)) {
if (!net_pkt_tcp_1st_msg(pkt)) {
net_pkt_ref(pkt);
}
}
NET_DBG("[%p] Sending pkt %p (%zd bytes)", context->tcp,
pkt, net_pkt_get_len(pkt));
ret = net_tcp_send_pkt(pkt);
if (ret < 0) {
NET_DBG("[%p] pkt %p not sent (%d)",
context->tcp, pkt, ret);
if (!is_6lo_technology(pkt)) {
net_pkt_unref(pkt);
}
return ret;
}
net_pkt_set_queued(pkt, true);
net_pkt_set_tcp_1st_msg(pkt, false);
}
/* Just make the callback synchronously even if it didn't
* go over the wire. In theory it would be nice to track
* specific ACK locations in the stream and make the
* callback at that time, but there's nowhere to store the
* user_data value right now.
*/
if (cb) {
cb(context, 0, user_data);
}
return 0;
}
bool net_tcp_ack_received(struct net_context *ctx, uint32_t ack)
{
struct net_tcp *tcp = ctx->tcp;
sys_slist_t *list = &ctx->tcp->sent_list;
sys_snode_t *head;
struct net_pkt *pkt;
bool valid_ack = false;
if (net_tcp_seq_greater(ack, ctx->tcp->send_seq)) {
NET_ERR("ctx %p: ACK for unsent data", ctx);
net_stats_update_tcp_seg_ackerr(net_context_get_iface(ctx));
/* RFC 793 doesn't say that invalid ack sequence is an error
* in the general case, but we implement tighter checking,
* and consider entire packet invalid.
*/
return false;
}
while (!sys_slist_is_empty(list)) {
NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct net_tcp_hdr);
struct net_tcp_hdr *tcp_hdr;
uint32_t last_seq;
uint32_t seq_len;
head = sys_slist_peek_head(list);
pkt = CONTAINER_OF(head, struct net_pkt, sent_list);
net_pkt_cursor_init(pkt);
net_pkt_set_overwrite(pkt, true);
if (net_pkt_skip(pkt, net_pkt_ip_hdr_len(pkt) +
net_pkt_ip_opts_len(pkt))) {
sys_slist_remove(list, NULL, head);
net_pkt_unref(pkt);
continue;
}
tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt,
&tcp_access);
if (!tcp_hdr) {
/* The pkt does not contain TCP header, this should
* not happen.
*/
NET_ERR("pkt %p has no TCP header", pkt);
sys_slist_remove(list, NULL, head);
net_pkt_unref(pkt);
continue;
}
net_pkt_acknowledge_data(pkt, &tcp_access);
seq_len = net_pkt_remaining_data(pkt);
/* Each of SYN and FIN flags are counted
* as one sequence number.
*/
if (tcp_hdr->flags & NET_TCP_SYN) {
seq_len += 1U;
}
if (tcp_hdr->flags & NET_TCP_FIN) {
seq_len += 1U;
}
/* Last sequence number in this packet. */
last_seq = sys_get_be32(tcp_hdr->seq) + seq_len - 1;
/* Ack number should be strictly greater to acknowledged numbers
* below it. For example, ack no. 10 acknowledges all numbers up
* to and including 9.
*/
if (!net_tcp_seq_greater(ack, last_seq)) {
break;
}
if (tcp_hdr->flags & NET_TCP_FIN) {
enum net_tcp_state s = net_tcp_get_state(tcp);
if (s == NET_TCP_FIN_WAIT_1) {
net_tcp_change_state(tcp, NET_TCP_FIN_WAIT_2);
} else if (s == NET_TCP_CLOSING) {
net_tcp_change_state(tcp, NET_TCP_TIME_WAIT);
}
}
NET_DBG("[%p] Received ACK pkt %p (len %zd bytes)", ctx->tcp,
pkt, net_pkt_get_len(pkt));
sys_slist_remove(list, NULL, head);
/* If we receive a valid ACK, then we need to undo the ref
* set in net_tcp_queue_pkt() (when using non-6lo technology)
* or the ref set in packet creation (for 6lo packet) in order
* to release the pkt.
*/
net_pkt_set_sent(pkt, false);
net_pkt_unref(pkt);
valid_ack = true;
}
/* Restart the timer (if needed) on a valid inbound ACK. This isn't
* quite the same behavior as per-packet retry timers, but is close in
* practice (it starts retries one timer period after the connection
* "got stuck") and avoids the need to track per-packet timers or
* sent times.
*/
if (valid_ack) {
restart_timer(ctx->tcp);
/* Flush anything pending. This is important as if there
* is FIN waiting in the queue, it gets sent asap.
*/
flush_queue(ctx);
}
return true;
}
void net_tcp_init(void)
{
}
#if CONFIG_NET_TCP_LOG_LEVEL >= LOG_LEVEL_DBG
static void validate_state_transition(enum net_tcp_state current,
enum net_tcp_state new)
{
static const uint16_t valid_transitions[] = {
[NET_TCP_CLOSED] = 1 << NET_TCP_LISTEN |
1 << NET_TCP_SYN_SENT |
/* Initial transition from closed->established when
* socket is accepted.
*/
1 << NET_TCP_ESTABLISHED,
[NET_TCP_LISTEN] = 1 << NET_TCP_SYN_RCVD |
1 << NET_TCP_SYN_SENT |
1 << NET_TCP_CLOSED,
[NET_TCP_SYN_RCVD] = 1 << NET_TCP_FIN_WAIT_1 |
1 << NET_TCP_ESTABLISHED |
1 << NET_TCP_LISTEN |
1 << NET_TCP_CLOSED,
[NET_TCP_SYN_SENT] = 1 << NET_TCP_CLOSED |
1 << NET_TCP_ESTABLISHED |
1 << NET_TCP_SYN_RCVD |
1 << NET_TCP_CLOSED,
[NET_TCP_ESTABLISHED] = 1 << NET_TCP_CLOSE_WAIT |
1 << NET_TCP_FIN_WAIT_1 |
1 << NET_TCP_CLOSED,
[NET_TCP_CLOSE_WAIT] = 1 << NET_TCP_LAST_ACK |
1 << NET_TCP_CLOSED,
[NET_TCP_LAST_ACK] = 1 << NET_TCP_CLOSED,
[NET_TCP_FIN_WAIT_1] = 1 << NET_TCP_CLOSING |
1 << NET_TCP_FIN_WAIT_2 |
1 << NET_TCP_TIME_WAIT |
1 << NET_TCP_CLOSED,
[NET_TCP_FIN_WAIT_2] = 1 << NET_TCP_TIME_WAIT |
1 << NET_TCP_CLOSED,
[NET_TCP_CLOSING] = 1 << NET_TCP_TIME_WAIT |
1 << NET_TCP_CLOSED,
[NET_TCP_TIME_WAIT] = 1 << NET_TCP_CLOSED
};
if (!(valid_transitions[current] & 1 << new)) {
NET_DBG("Invalid state transition: %s (%d) => %s (%d)",
net_tcp_state_str(current), current,
net_tcp_state_str(new), new);
}
}
#else
static inline void validate_state_transition(enum net_tcp_state current,
enum net_tcp_state new)
{
ARG_UNUSED(current);
ARG_UNUSED(new);
}
#endif
void net_tcp_change_state(struct net_tcp *tcp,
enum net_tcp_state new_state)
{
NET_ASSERT(tcp);
if (net_tcp_get_state(tcp) == new_state) {
return;
}
NET_ASSERT(new_state >= NET_TCP_CLOSED &&
new_state <= NET_TCP_CLOSING);
NET_DBG("[%p] state %s (%d) => %s (%d)",
tcp, net_tcp_state_str(tcp->state), tcp->state,
net_tcp_state_str(new_state), new_state);
validate_state_transition(tcp->state, new_state);
tcp->state = new_state;
if (net_tcp_get_state(tcp) != NET_TCP_CLOSED) {
return;
}
if (!tcp->context) {
return;
}
/* Remove any port handlers if we are closing */
if (tcp->context->conn_handler) {
net_tcp_unregister(tcp->context->conn_handler);
tcp->context->conn_handler = NULL;
}
if (tcp->accept_cb) {
tcp->accept_cb(tcp->context,
&tcp->context->remote,
sizeof(struct sockaddr),
-ENETRESET,
tcp->context->user_data);
}
}
void net_tcp_foreach(net_tcp_cb_t cb, void *user_data)
{
int i, key;
key = irq_lock();
for (i = 0; i < NET_MAX_TCP_CONTEXT; i++) {
if (!net_tcp_is_used(&tcp_context[i])) {
continue;
}
irq_unlock(key);
cb(&tcp_context[i], user_data);
key = irq_lock();
}
irq_unlock(key);
}
bool net_tcp_validate_seq(struct net_tcp *tcp, struct net_tcp_hdr *tcp_hdr)
{
return (net_tcp_seq_cmp(sys_get_be32(tcp_hdr->seq),
tcp->send_ack) >= 0) &&
(net_tcp_seq_cmp(sys_get_be32(tcp_hdr->seq),
tcp->send_ack
+ net_tcp_get_recv_wnd(tcp)) < 0);
}
int net_tcp_finalize(struct net_pkt *pkt)
{
NET_PKT_DATA_ACCESS_DEFINE(tcp_access, struct net_tcp_hdr);
struct net_tcp_hdr *tcp_hdr;
tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, &tcp_access);
if (!tcp_hdr) {
return -ENOBUFS;
}
tcp_hdr->chksum = 0U;
if (net_if_need_calc_tx_checksum(net_pkt_iface(pkt))) {
tcp_hdr->chksum = net_calc_chksum_tcp(pkt);
}
return net_pkt_set_data(pkt, &tcp_access);
}
int net_tcp_parse_opts(struct net_pkt *pkt, int opt_totlen,
struct net_tcp_options *opts)
{
uint8_t opt, optlen;
while (opt_totlen) {
if (net_pkt_read_u8(pkt, &opt)) {
optlen = 0U;
goto error;
}
opt_totlen--;
/* https://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-parameters-1 */
/* "Options 0 and 1 are exactly one octet which is their
* kind field. All other options have their one octet
* kind field, followed by a one octet length field,
* followed by length-2 octets of option data."
*/
if (opt == NET_TCP_END_OPT) {
break;
} else if (opt == NET_TCP_NOP_OPT) {
continue;
}
if (!opt_totlen) {
optlen = 0U;
goto error;
}
if (net_pkt_read_u8(pkt, &optlen) || optlen < 2) {
goto error;
}
opt_totlen--;
/* Subtract opt/optlen size now to avoid doing this
* repeatedly.
*/
optlen -= 2U;
if (opt_totlen < optlen) {
goto error;
}
switch (opt) {
case NET_TCP_MSS_OPT:
if (optlen != 2U) {
goto error;
}
if (net_pkt_read_be16(pkt, &opts->mss)) {
goto error;
}
break;
default:
if (net_pkt_skip(pkt, optlen)) {
goto error;
}
break;
}
opt_totlen -= optlen;
}
return 0;
error:
NET_ERR("Invalid TCP opt: %d len: %d", opt, optlen);
return -EINVAL;
}
int net_tcp_recv(struct net_context *context, net_context_recv_cb_t cb,
void *user_data)
{
NET_ASSERT(context->tcp);
if (context->tcp->flags & NET_TCP_IS_SHUTDOWN) {
return -ESHUTDOWN;
} else if (net_context_get_state(context) != NET_CONTEXT_CONNECTED) {
return -ENOTCONN;
}
context->recv_cb = cb;
context->tcp->recv_user_data = user_data;
return 0;
}
static void queue_fin(struct net_context *ctx)
{
struct net_pkt *pkt = NULL;
bool flush = false;
int ret;
ret = net_tcp_prepare_segment(ctx->tcp, NET_TCP_FIN, NULL, 0,
NULL, &ctx->remote, &pkt);
if (ret || !pkt) {
return;
}
if (sys_slist_is_empty(&ctx->tcp->sent_list)) {
flush = true;
}
net_tcp_queue_pkt(ctx, pkt);
if (flush) {
flush_queue(ctx);
}
}
int net_tcp_put(struct net_context *context)
{
if (net_context_get_ip_proto(context) == IPPROTO_TCP) {
if (net_context_get_state(context) == NET_CONTEXT_CONNECTED
&& context->tcp
&& !context->tcp->fin_rcvd) {
NET_DBG("TCP connection in active close, not "
"disposing yet (waiting %dms)", FIN_TIMEOUT_MS);
k_delayed_work_submit(&context->tcp->fin_timer,
FIN_TIMEOUT);
queue_fin(context);
return 0;
}
/* A listening context is only used to establish connections.
* Since once the connection is established it is not handled
* directly by the listening context but rather by the child it
* spawned, it is not needed to send FIN when closing such
* contexts.
*/
if (context->tcp &&
net_context_get_state(context) == NET_CONTEXT_LISTENING) {
net_context_unref(context);
return 0;
}
if (context->tcp &&
net_tcp_get_state(context->tcp) == NET_TCP_SYN_SENT) {
net_context_unref(context);
}
return -ENOTCONN;
}
return -EOPNOTSUPP;
}
int net_tcp_listen(struct net_context *context)
{
if (net_context_get_ip_proto(context) == IPPROTO_TCP) {
net_tcp_change_state(context->tcp, NET_TCP_LISTEN);
net_context_set_state(context, NET_CONTEXT_LISTENING);
return 0;
}
return -EOPNOTSUPP;
}
int net_tcp_update_recv_wnd(struct net_context *context, int32_t delta)
{
int32_t new_win;
if (!context->tcp) {
NET_ERR("context->tcp == NULL");
return -EPROTOTYPE;
}
new_win = context->tcp->recv_wnd + delta;
if (new_win < 0 || new_win > UINT16_MAX) {
return -EINVAL;
}
context->tcp->recv_wnd = new_win;
return 0;
}
static int send_reset(struct net_context *context, struct sockaddr *local,
struct sockaddr *remote);
static void backlog_ack_timeout(struct k_work *work)
{
struct tcp_backlog_entry *backlog =
CONTAINER_OF(work, struct tcp_backlog_entry, ack_timer);
NET_DBG("Did not receive ACK in %dms", ACK_TIMEOUT_MS);
/* TODO: If net_context is bound to unspecified IPv6 address
* and some port number, local address is not available.
* RST packet might be invalid. Cache local address
* and use it in RST message preparation.
*/
send_reset(backlog->tcp->context, NULL, &backlog->remote);
(void)memset(backlog, 0, sizeof(struct tcp_backlog_entry));
}
static void tcp_copy_ip_addr_from_hdr(sa_family_t family,
union net_ip_header *ip_hdr,
struct net_tcp_hdr *tcp_hdr,
struct sockaddr *addr,
bool is_src_addr)
{
uint16_t port;
if (is_src_addr) {
port = tcp_hdr->src_port;
} else {
port = tcp_hdr->dst_port;
}
if (IS_ENABLED(CONFIG_NET_IPV4) && family == AF_INET) {
struct sockaddr_in *addr4 = net_sin(addr);
if (is_src_addr) {
net_ipaddr_copy(&addr4->sin_addr, &ip_hdr->ipv4->src);
} else {
net_ipaddr_copy(&addr4->sin_addr, &ip_hdr->ipv4->dst);
}
addr4->sin_port = port;
addr->sa_family = AF_INET;
}
if (IS_ENABLED(CONFIG_NET_IPV6) && family == AF_INET6) {
struct sockaddr_in6 *addr6 = net_sin6(addr);
if (is_src_addr) {
net_ipaddr_copy(&addr6->sin6_addr, &ip_hdr->ipv6->src);
} else {
net_ipaddr_copy(&addr6->sin6_addr, &ip_hdr->ipv6->dst);
}
addr6->sin6_port = port;
addr->sa_family = AF_INET6;
}
}
static int tcp_backlog_find(struct net_pkt *pkt,
union net_ip_header *ip_hdr,
struct net_tcp_hdr *tcp_hdr,
int *empty_slot)
{
int i, empty = -1;
for (i = 0; i < CONFIG_NET_TCP_BACKLOG_SIZE; i++) {
if (tcp_backlog[i].tcp == NULL && empty < 0) {
empty = i;
continue;
}
if (net_pkt_family(pkt) != tcp_backlog[i].remote.sa_family) {
continue;
}
if (IS_ENABLED(CONFIG_NET_IPV4) &&
net_pkt_family(pkt) == AF_INET) {
if (net_sin(&tcp_backlog[i].remote)->sin_port !=
tcp_hdr->src_port) {
continue;
}
if (memcmp(&net_sin(&tcp_backlog[i].remote)->sin_addr,
&ip_hdr->ipv4->src,
sizeof(struct in_addr))) {
continue;
}
} else if (IS_ENABLED(CONFIG_NET_IPV6) &&
net_pkt_family(pkt) == AF_INET6) {
if (net_sin6(&tcp_backlog[i].remote)->sin6_port !=
tcp_hdr->src_port) {
continue;
}
if (memcmp(&net_sin6(&tcp_backlog[i].remote)->sin6_addr,
&ip_hdr->ipv6->src,
sizeof(struct in6_addr))) {
continue;
}
}
return i;
}
if (empty_slot) {
*empty_slot = empty;
}
return -EADDRNOTAVAIL;
}
static int tcp_backlog_syn(struct net_pkt *pkt,
union net_ip_header *ip_hdr,
struct net_tcp_hdr *tcp_hdr,
struct net_context *context,
uint16_t send_mss)
{
int empty_slot = -1;
if (tcp_backlog_find(pkt, ip_hdr, tcp_hdr, &empty_slot) >= 0) {
return -EADDRINUSE;
}
if (empty_slot < 0) {
return -ENOSPC;
}
tcp_backlog[empty_slot].tcp = context->tcp;
tcp_copy_ip_addr_from_hdr(net_pkt_family(pkt), ip_hdr, tcp_hdr,
&tcp_backlog[empty_slot].remote, true);
tcp_backlog[empty_slot].send_seq = context->tcp->send_seq;
tcp_backlog[empty_slot].send_ack = context->tcp->send_ack;
tcp_backlog[empty_slot].send_mss = send_mss;
k_delayed_work_init(&tcp_backlog[empty_slot].ack_timer,
backlog_ack_timeout);
k_delayed_work_submit(&tcp_backlog[empty_slot].ack_timer, ACK_TIMEOUT);
return 0;
}
static int tcp_backlog_ack(struct net_pkt *pkt,
union net_ip_header *ip_hdr,
struct net_tcp_hdr *tcp_hdr,
struct net_context *context)
{
int r;
r = tcp_backlog_find(pkt, ip_hdr, tcp_hdr, NULL);
if (r < 0) {
return r;
}
/* Sent SEQ + 1 needs to be the same as the received ACK */
if (tcp_backlog[r].send_seq + 1 != sys_get_be32(tcp_hdr->ack)) {
return -EINVAL;
}
memcpy(&context->remote, &tcp_backlog[r].remote,
sizeof(struct sockaddr));
context->tcp->send_seq = tcp_backlog[r].send_seq + 1;
context->tcp->send_ack = tcp_backlog[r].send_ack;
context->tcp->send_mss = tcp_backlog[r].send_mss;
k_delayed_work_cancel(&tcp_backlog[r].ack_timer);
(void)memset(&tcp_backlog[r], 0, sizeof(struct tcp_backlog_entry));
return 0;
}
static int tcp_backlog_rst(struct net_pkt *pkt,
union net_ip_header *ip_hdr,
struct net_tcp_hdr *tcp_hdr)
{
int r;
r = tcp_backlog_find(pkt, ip_hdr, tcp_hdr, NULL);
if (r < 0) {
return r;
}
/* The ACK sent needs to be the same as the received SEQ */
if (tcp_backlog[r].send_ack != sys_get_be32(tcp_hdr->seq)) {
return -EINVAL;
}
k_delayed_work_cancel(&tcp_backlog[r].ack_timer);
(void)memset(&tcp_backlog[r], 0, sizeof(struct tcp_backlog_entry));
return 0;
}
static void handle_fin_timeout(struct k_work *work)
{
struct net_tcp *tcp =
CONTAINER_OF(work, struct net_tcp, fin_timer);
NET_DBG("Did not receive FIN in %dms", FIN_TIMEOUT_MS);
net_context_unref(tcp->context);
}
static void handle_ack_timeout(struct k_work *work)
{
/* This means that we did not receive ACK response in time. */
struct net_tcp *tcp = CONTAINER_OF(work, struct net_tcp, ack_timer);
NET_DBG("Did not receive ACK in %dms while in %s", ACK_TIMEOUT_MS,
net_tcp_state_str(net_tcp_get_state(tcp)));
if (net_tcp_get_state(tcp) == NET_TCP_LAST_ACK) {
/* We did not receive the last ACK on time. We can only
* close the connection at this point. We will not send
* anything to peer in this last state, but will go directly
* to to CLOSED state.
*/
net_tcp_change_state(tcp, NET_TCP_CLOSED);
if (tcp->context->recv_cb) {
tcp->context->recv_cb(tcp->context, NULL, NULL, NULL,
0, tcp->recv_user_data);
}
net_context_unref(tcp->context);
}
}
static void handle_timewait_timeout(struct k_work *work)
{
struct net_tcp *tcp = CONTAINER_OF(work, struct net_tcp,
timewait_timer);
NET_DBG("Timewait expired in %dms", CONFIG_NET_TCP_TIME_WAIT_DELAY);
if (net_tcp_get_state(tcp) == NET_TCP_TIME_WAIT) {
net_tcp_change_state(tcp, NET_TCP_CLOSED);
if (tcp->context->recv_cb) {
tcp->context->recv_cb(tcp->context, NULL, NULL, NULL,
0, tcp->recv_user_data);
}
net_context_unref(tcp->context);
}
}
int net_tcp_get(struct net_context *context)
{
context->tcp = net_tcp_alloc(context);
if (!context->tcp) {
NET_ASSERT(context->tcp, "Cannot allocate TCP context");
return -ENOBUFS;
}
k_delayed_work_init(&context->tcp->ack_timer, handle_ack_timeout);
k_delayed_work_init(&context->tcp->fin_timer, handle_fin_timeout);
k_delayed_work_init(&context->tcp->timewait_timer,
handle_timewait_timeout);
return 0;
}
int net_tcp_unref(struct net_context *context)
{
int i;
if (!context->tcp)
return 0;
/* Clear the backlog for this TCP context. */
for (i = 0; i < CONFIG_NET_TCP_BACKLOG_SIZE; i++) {
if (tcp_backlog[i].tcp != context->tcp) {
continue;
}
k_delayed_work_cancel(&tcp_backlog[i].ack_timer);
(void)memset(&tcp_backlog[i], 0, sizeof(tcp_backlog[i]));
}
net_tcp_release(context->tcp);
context->tcp = NULL;
return 0;
}
/** **/
#define net_tcp_print_recv_info(str, pkt, port) \
if (IS_ENABLED(CONFIG_NET_TCP_LOG_LEVEL_DBG)) { \
if (net_pkt_family(pkt) == AF_INET6) { \
NET_DBG("%s received from %s port %d", str, \
log_strdup(net_sprint_ipv6_addr( \
&NET_IPV6_HDR(pkt)->src)), \
ntohs(port)); \
} else if (net_pkt_family(pkt) == AF_INET) {\
NET_DBG("%s received from %s port %d", str, \
log_strdup(net_sprint_ipv4_addr( \
&NET_IPV4_HDR(pkt)->src)), \
ntohs(port)); \
} \
}
#define net_tcp_print_send_info(str, pkt, port) \
if (IS_ENABLED(CONFIG_NET_TCP_LOG_LEVEL_DBG)) { \
if (net_pkt_family(pkt) == AF_INET6) { \
NET_DBG("%s sent to %s port %d", str, \
log_strdup(net_sprint_ipv6_addr( \
&NET_IPV6_HDR(pkt)->dst)), \
ntohs(port)); \
} else if (net_pkt_family(pkt) == AF_INET) { \
NET_DBG("%s sent to %s port %d", str, \
log_strdup(net_sprint_ipv4_addr( \
&NET_IPV4_HDR(pkt)->dst)), \
ntohs(port)); \
} \
}
static void print_send_info(struct net_pkt *pkt,
const char *msg, const struct sockaddr *remote)
{
if (CONFIG_NET_TCP_LOG_LEVEL >= LOG_LEVEL_DBG) {
uint16_t port = 0U;
if (IS_ENABLED(CONFIG_NET_IPV4) &&
net_pkt_family(pkt) == AF_INET) {
struct sockaddr_in *addr4 = net_sin(remote);
port = addr4->sin_port;
}
if (IS_ENABLED(CONFIG_NET_IPV6) &&
net_pkt_family(pkt) == AF_INET6) {
struct sockaddr_in6 *addr6 = net_sin6(remote);
port = addr6->sin6_port;
}
net_tcp_print_send_info(msg, pkt, port);
}
}
/* Send SYN or SYN/ACK. */
static inline int send_syn_segment(struct net_context *context,
const struct sockaddr_ptr *local,
const struct sockaddr *remote,
int flags, const char *msg)
{
struct net_pkt *pkt = NULL;
int ret;
uint8_t options[NET_TCP_MAX_OPT_SIZE];
uint8_t optionlen = 0U;
if (flags == NET_TCP_SYN) {
net_tcp_set_syn_opt(context->tcp, options, &optionlen);
}
ret = net_tcp_prepare_segment(context->tcp, flags, options, optionlen,
local, remote, &pkt);
if (ret) {
return ret;
}
print_send_info(pkt, msg, remote);
ret = net_send_data(pkt);
if (ret < 0) {
net_pkt_unref(pkt);
return ret;
}
net_pkt_set_sent(pkt, true);
context->tcp->send_seq++;
return ret;
}
static inline int send_syn(struct net_context *context,
const struct sockaddr *remote)
{
net_tcp_change_state(context->tcp, NET_TCP_SYN_SENT);
return send_syn_segment(context, NULL, remote, NET_TCP_SYN, "SYN");
}
static inline int send_syn_ack(struct net_context *context,
struct sockaddr_ptr *local,
struct sockaddr *remote)
{
return send_syn_segment(context, local, remote,
NET_TCP_SYN | NET_TCP_ACK,
"SYN_ACK");
}
static int send_ack(struct net_context *context,
struct sockaddr *remote, bool force)
{
struct net_pkt *pkt = NULL;
int ret;
/* Something (e.g. a data transmission under the user
* callback) already sent the ACK, no need
*/
if (!force && context->tcp->send_ack == context->tcp->sent_ack) {
return 0;
}
ret = net_tcp_prepare_ack(context->tcp, remote, &pkt);
if (ret) {
return ret;
}
print_send_info(pkt, "ACK", remote);
ret = net_tcp_send_pkt(pkt);
if (ret < 0) {
net_pkt_unref(pkt);
}
return ret;
}
static int send_reset(struct net_context *context,
struct sockaddr *local,
struct sockaddr *remote)
{
struct net_pkt *pkt = NULL;
int ret;
ret = net_tcp_prepare_reset(context->tcp, local, remote, &pkt);
if (ret || !pkt) {
return ret;
}
print_send_info(pkt, "RST", remote);
ret = net_send_data(pkt);
if (ret < 0) {
net_pkt_unref(pkt);
}
net_pkt_set_sent(pkt, true);
return ret;
}
static uint16_t adjust_data_len(struct net_pkt *pkt, struct net_tcp_hdr *tcp_hdr,
uint16_t data_len)
{
uint8_t offset = tcp_hdr->offset >> 4;
/* We need to adjust the length of the data part if there
* are TCP options.
*/
if ((offset << 2) > sizeof(struct net_tcp_hdr)) {
net_pkt_skip(pkt, (offset << 2) -
sizeof(struct net_tcp_hdr));
data_len -= (offset << 2) - sizeof(struct net_tcp_hdr);
}
return data_len;
}
/* This is called when we receive data after the connection has been
* established. The core TCP logic is located here.
*
* Prototype:
* enum net_verdict tcp_established(struct net_conn *conn,
* union net_ip_header *ip_hdr,
* union net_proto_header *proto_hdr,
* struct net_pkt *pkt,
* void *user_data)
*/
NET_CONN_CB(tcp_established)
{
struct net_context *context = (struct net_context *)user_data;
struct net_tcp_hdr *tcp_hdr = proto_hdr->tcp;
enum net_verdict ret = NET_OK;
bool do_not_send_ack = false;
uint8_t tcp_flags;
uint16_t data_len;
k_mutex_lock(&context->lock, K_FOREVER);
NET_ASSERT(context && context->tcp);
if (net_tcp_get_state(context->tcp) < NET_TCP_ESTABLISHED) {
NET_ERR("Context %p in wrong state %d",
context, net_tcp_get_state(context->tcp));
ret = NET_DROP;
goto unlock;
}
net_tcp_print_recv_info("DATA", pkt, tcp_hdr->src_port);
tcp_flags = NET_TCP_FLAGS(tcp_hdr);
if (net_tcp_seq_cmp(sys_get_be32(tcp_hdr->seq),
context->tcp->send_ack) < 0) {
/* Peer sent us packet we've already seen. Apparently,
* our ack was lost.
*/
/* RFC793 specifies that "highest" (i.e. current from our PoV)
* ack # value can/should be sent, so we just force resend.
*/
resend_ack:
send_ack(context, &conn->remote_addr, true);
ret = NET_DROP;
goto unlock;
}
if (net_tcp_seq_cmp(sys_get_be32(tcp_hdr->seq),
context->tcp->send_ack) > 0) {
/* Don't try to reorder packets. If it doesn't
* match the next segment exactly, drop and wait for
* retransmit
*/
ret = NET_DROP;
goto unlock;
}
/*
* If we receive RST here, we close the socket. See RFC 793 chapter
* called "Reset Processing" for details.
*/
if (tcp_flags & NET_TCP_RST) {
/* We only accept RST packet that has valid seq field. */
if (!net_tcp_validate_seq(context->tcp, tcp_hdr)) {
net_stats_update_tcp_seg_rsterr(net_pkt_iface(pkt));
ret = NET_DROP;
goto unlock;
}
net_stats_update_tcp_seg_rst(net_pkt_iface(pkt));
net_tcp_print_recv_info("RST", pkt, tcp_hdr->src_port);
if (context->recv_cb) {
context->recv_cb(context, NULL, NULL, NULL, -ECONNRESET,
context->tcp->recv_user_data);
}
net_context_unref(context);
ret = NET_DROP;
goto unlock;
}
/* Handle TCP state transition */
if (tcp_flags & NET_TCP_ACK) {
if (!net_tcp_ack_received(context,
sys_get_be32(tcp_hdr->ack))) {
ret = NET_DROP;
goto unlock;
}
/* TCP state might be changed after maintaining the sent pkt
* list, e.g., an ack of FIN is received.
*/
if (net_tcp_get_state(context->tcp)
== NET_TCP_FIN_WAIT_1) {
/* Active close: step to FIN_WAIT_2 */
net_tcp_change_state(context->tcp, NET_TCP_FIN_WAIT_2);
} else if (net_tcp_get_state(context->tcp)
== NET_TCP_LAST_ACK) {
/* Passive close: step to CLOSED */
net_tcp_change_state(context->tcp, NET_TCP_CLOSED);
/* Release the pkt before clean up */
net_pkt_unref(pkt);
goto clean_up;
}
}
if (tcp_flags & NET_TCP_FIN) {
if (net_tcp_get_state(context->tcp) == NET_TCP_ESTABLISHED) {
/* Passive close: step to CLOSE_WAIT */
net_tcp_change_state(context->tcp, NET_TCP_CLOSE_WAIT);
/* We should receive ACK next in order to get rid of
* LAST_ACK state that we are entering in a short while.
* But we need to be prepared to NOT to receive it as
* otherwise the connection would be stuck forever.
*/
k_delayed_work_submit(&context->tcp->ack_timer,
ACK_TIMEOUT);
net_context_set_closing(context, true);
} else if (net_tcp_get_state(context->tcp)
== NET_TCP_FIN_WAIT_2) {
/* Received FIN on FIN_WAIT_2, so cancel the timer */
k_delayed_work_cancel(&context->tcp->fin_timer);
/* Active close: step to TIME_WAIT */
net_tcp_change_state(context->tcp, NET_TCP_TIME_WAIT);
}
context->tcp->fin_rcvd = 1U;
}
if (!IS_ENABLED(CONFIG_NET_TCP_AUTO_ACCEPT) &&
net_context_is_accepting(context)) {
data_len = 0;
do_not_send_ack = true;
} else {
data_len = net_pkt_remaining_data(pkt);
}
if (data_len > net_tcp_get_recv_wnd(context->tcp)) {
/* In case we have zero window, we should still accept
* Zero Window Probes from peer, which per convention
* come with len=1. Note that normally we need to check
* for net_tcp_get_recv_wnd(context->tcp) == 0, but
* given the if above, we know that if data_len == 1,
* then net_tcp_get_recv_wnd(context->tcp) can be only 0
* here.
*/
if (data_len == 1U) {
goto resend_ack;
}
NET_ERR("Context %p: overflow of recv window (%d vs %d), "
"pkt dropped",
context, net_tcp_get_recv_wnd(context->tcp), data_len);
ret = NET_DROP;
goto unlock;
}
/* If the pkt has data, notify the recv callback which should
* release the pkt. Otherwise, release the pkt immediately.
*/
if (data_len > 0) {
data_len = adjust_data_len(pkt, tcp_hdr, data_len);
ret = net_context_packet_received(conn, pkt, ip_hdr, proto_hdr,
context->tcp->recv_user_data);
} else if (data_len == 0U) {
net_pkt_unref(pkt);
}
if (do_not_send_ack == false) {
/* Increment the ack */
context->tcp->send_ack += data_len;
if (tcp_flags & NET_TCP_FIN) {
context->tcp->send_ack += 1U;
}
send_ack(context, &conn->remote_addr, false);
}
clean_up:
if (net_tcp_get_state(context->tcp) == NET_TCP_TIME_WAIT) {
k_delayed_work_submit(&context->tcp->timewait_timer,
K_MSEC(CONFIG_NET_TCP_TIME_WAIT_DELAY));
}
if (net_tcp_get_state(context->tcp) == NET_TCP_CLOSED) {
if (context->recv_cb) {
context->recv_cb(context, NULL, NULL, NULL, 0,
context->tcp->recv_user_data);
}
net_context_unref(context);
}
unlock:
k_mutex_unlock(&context->lock);
return ret;
}
/*
* Prototype:
* enum net_verdict tcp_synack_received(struct net_conn *conn,
* struct net_pkt *pkt,
* union net_ip_header *ip_hdr,
* union net_proto_header *proto_hdr,
* void *user_data)
*/
NET_CONN_CB(tcp_synack_received)
{
struct net_context *context = (struct net_context *)user_data;
struct net_tcp_hdr *tcp_hdr = proto_hdr->tcp;
int ret;
NET_ASSERT(context && context->tcp);
switch (net_tcp_get_state(context->tcp)) {
case NET_TCP_SYN_SENT:
net_context_set_iface(context, net_pkt_iface(pkt));
break;
default:
NET_DBG("Context %p in wrong state %d",
context, net_tcp_get_state(context->tcp));
return NET_DROP;
}
net_pkt_set_context(pkt, context);
NET_ASSERT(net_pkt_iface(pkt));
if (NET_TCP_FLAGS(tcp_hdr) & NET_TCP_RST) {
/* We only accept RST packet that has valid seq field. */
if (!net_tcp_validate_seq(context->tcp, tcp_hdr)) {
net_stats_update_tcp_seg_rsterr(net_pkt_iface(pkt));
return NET_DROP;
}
net_stats_update_tcp_seg_rst(net_pkt_iface(pkt));
k_sem_give(&context->tcp->connect_wait);
if (context->connect_cb) {
context->connect_cb(context, -ECONNREFUSED,
context->user_data);
}
return NET_DROP;
}
if (NET_TCP_FLAGS(tcp_hdr) & NET_TCP_SYN) {
context->tcp->send_ack =
sys_get_be32(tcp_hdr->seq) + 1;
}
/*
* If we receive SYN, we send SYN-ACK and go to SYN_RCVD state.
*/
if (NET_TCP_FLAGS(tcp_hdr) == (NET_TCP_SYN | NET_TCP_ACK)) {
/* Remove the temporary connection handler and register
* a proper now as we have an established connection.
*/
struct sockaddr local_addr;
struct sockaddr remote_addr;
tcp_copy_ip_addr_from_hdr(net_pkt_family(pkt), ip_hdr, tcp_hdr,
&remote_addr, true);
tcp_copy_ip_addr_from_hdr(net_pkt_family(pkt), ip_hdr, tcp_hdr,
&local_addr, false);
net_tcp_unregister(context->conn_handler);
ret = net_tcp_register(net_pkt_family(pkt),
&remote_addr,
&local_addr,
ntohs(tcp_hdr->src_port),
ntohs(tcp_hdr->dst_port),
tcp_established,
context,
&context->conn_handler);
if (ret < 0) {
NET_DBG("Cannot register TCP handler (%d)", ret);
send_reset(context, &local_addr, &remote_addr);
return NET_DROP;
}
net_tcp_change_state(context->tcp, NET_TCP_ESTABLISHED);
net_context_set_state(context, NET_CONTEXT_CONNECTED);
send_ack(context, &remote_addr, false);
k_sem_give(&context->tcp->connect_wait);
if (context->connect_cb) {
context->connect_cb(context, 0, context->user_data);
}
}
return NET_DROP;
}
static void get_sockaddr_ptr(union net_ip_header *ip_hdr,
struct net_tcp_hdr *tcp_hdr,
sa_family_t family,
struct sockaddr_ptr *addr)
{
(void)memset(addr, 0, sizeof(*addr));
if (IS_ENABLED(CONFIG_NET_IPV4) && family == AF_INET) {
struct sockaddr_in_ptr *addr4 = net_sin_ptr(addr);
addr4->sin_family = AF_INET;
addr4->sin_port = tcp_hdr->dst_port;
addr4->sin_addr = &ip_hdr->ipv4->dst;
}
if (IS_ENABLED(CONFIG_NET_IPV6) && family == AF_INET6) {
struct sockaddr_in6_ptr *addr6 = net_sin6_ptr(addr);
addr6->sin6_family = AF_INET6;
addr6->sin6_port = tcp_hdr->dst_port;
addr6->sin6_addr = &ip_hdr->ipv6->dst;
}
}
#if defined(CONFIG_NET_CONTEXT_NET_PKT_POOL)
static inline void copy_pool_vars(struct net_context *new_context,
struct net_context *listen_context)
{
new_context->tx_slab = listen_context->tx_slab;
new_context->data_pool = listen_context->data_pool;
}
#else
#define copy_pool_vars(...)
#endif /* CONFIG_NET_CONTEXT_NET_PKT_POOL */
/* This callback is called when we are waiting connections and we receive
* a packet. We need to check if we are receiving proper msg (SYN) here.
* The ACK could also be received, in which case we have an established
* connection.
*
* Prototype:
* enum net_verdict tcp_syn_rcvd(struct net_conn *conn,
* struct net_pkt *pkt,
* union net_ip_header *ip_hdr,
* union net_proto_header *proto_hdr,
* void *user_data)
*/
NET_CONN_CB(tcp_syn_rcvd)
{
struct net_context *context = (struct net_context *)user_data;
struct net_tcp_hdr *tcp_hdr = proto_hdr->tcp;
struct net_tcp *tcp;
struct sockaddr_ptr pkt_src_addr;
struct sockaddr local_addr;
struct sockaddr remote_addr;
NET_ASSERT(context && context->tcp);
tcp = context->tcp;
switch (net_tcp_get_state(tcp)) {
case NET_TCP_LISTEN:
net_context_set_iface(context, net_pkt_iface(pkt));
break;
case NET_TCP_SYN_RCVD:
if (net_pkt_iface(pkt) != net_context_get_iface(context)) {
return NET_DROP;
}
break;
default:
NET_DBG("Context %p in wrong state %d",
context, tcp->state);
return NET_DROP;
}
net_pkt_set_context(pkt, context);
NET_ASSERT(net_pkt_iface(pkt));
tcp_copy_ip_addr_from_hdr(net_pkt_family(pkt), ip_hdr, tcp_hdr,
&remote_addr, true);
tcp_copy_ip_addr_from_hdr(net_pkt_family(pkt), ip_hdr, tcp_hdr,
&local_addr, false);
/*
* If we receive SYN, we send SYN-ACK and go to SYN_RCVD state.
*/
if (NET_TCP_FLAGS(tcp_hdr) == NET_TCP_SYN) {
struct net_tcp_options tcp_opts = {
.mss = NET_TCP_DEFAULT_MSS,
};
int opt_totlen;
int r;
net_tcp_print_recv_info("SYN", pkt, tcp_hdr->src_port);
opt_totlen = NET_TCP_HDR_LEN(tcp_hdr)
- sizeof(struct net_tcp_hdr);
/* We expect MSS option to be present (opt_totlen > 0),
* so call unconditionally.
*/
if (net_tcp_parse_opts(pkt, opt_totlen, &tcp_opts) < 0) {
return NET_DROP;
}
net_tcp_change_state(tcp, NET_TCP_SYN_RCVD);
/* Set TCP seq and ack which are then stored in the backlog */
context->tcp->send_seq = tcp_init_isn();
context->tcp->send_ack =
sys_get_be32(tcp_hdr->seq) + 1;
/* Get MSS from TCP options here*/
r = tcp_backlog_syn(pkt, ip_hdr, tcp_hdr,
context, tcp_opts.mss);
if (r < 0) {
if (r == -EADDRINUSE) {
NET_DBG("TCP connection already exists");
} else {
NET_DBG("No free TCP backlog entries");
}
return NET_DROP;
}
get_sockaddr_ptr(ip_hdr, tcp_hdr,
net_context_get_family(context),
&pkt_src_addr);
send_syn_ack(context, &pkt_src_addr, &remote_addr);
net_pkt_unref(pkt);
return NET_OK;
}
/*
* See RFC 793 chapter 3.4 "Reset Processing" and RFC 793, page 65
* for more details.
*/
if (NET_TCP_FLAGS(tcp_hdr) & NET_TCP_RST) {
if (tcp_backlog_rst(pkt, ip_hdr, tcp_hdr) < 0) {
net_stats_update_tcp_seg_rsterr(net_pkt_iface(pkt));
return NET_DROP;
}
net_stats_update_tcp_seg_rst(net_pkt_iface(pkt));
net_tcp_print_recv_info("RST", pkt, tcp_hdr->src_port);
return NET_DROP;
}
/*
* If we receive ACK, we go to ESTABLISHED state.
*/
if (NET_TCP_FLAGS(tcp_hdr) & NET_TCP_ACK) {
struct net_context *new_context;
socklen_t addrlen;
int ret;
net_tcp_print_recv_info("ACK", pkt, tcp_hdr->src_port);
if (!context->tcp->accept_cb) {
NET_DBG("No accept callback, connection reset.");
goto reset;
}
/* We create a new context that starts to wait data.
*/
ret = net_context_get(net_pkt_family(pkt),
SOCK_STREAM, IPPROTO_TCP,
&new_context);
if (ret < 0) {
NET_DBG("Cannot get accepted context, "
"connection reset");
goto conndrop;
}
ret = tcp_backlog_ack(pkt, ip_hdr, tcp_hdr, new_context);
if (ret < 0) {
NET_DBG("Cannot find context from TCP backlog");
net_context_unref(new_context);
goto conndrop;
}
ret = net_context_bind(new_context, &local_addr,
sizeof(local_addr));
if (ret < 0) {
NET_DBG("Cannot bind accepted context, "
"connection reset");
net_context_unref(new_context);
goto conndrop;
}
new_context->flags |= NET_CONTEXT_REMOTE_ADDR_SET;
memcpy(&new_context->remote, &remote_addr,
sizeof(remote_addr));
ret = net_tcp_register(net_pkt_family(pkt),
&new_context->remote,
&local_addr,
ntohs(net_sin(&new_context->remote)->sin_port),
ntohs(net_sin(&local_addr)->sin_port),
tcp_established,
new_context,
&new_context->conn_handler);
if (ret < 0) {
NET_DBG("Cannot register accepted TCP handler (%d)",
ret);
net_context_unref(new_context);
goto conndrop;
}
/* Swap the newly-created TCP states with the one that
* was used to establish this connection. The old TCP
* must be listening to accept other connections.
*/
copy_pool_vars(new_context, context);
net_tcp_change_state(tcp, NET_TCP_LISTEN);
net_tcp_change_state(new_context->tcp, NET_TCP_ESTABLISHED);
/* Mark the new context to be still accepting so that we
* can do proper cleanup if connection is closed before
* we have called accept()
*/
net_context_set_accepting(new_context, true);
net_context_set_state(new_context, NET_CONTEXT_CONNECTED);
if (new_context->remote.sa_family == AF_INET) {
addrlen = sizeof(struct sockaddr_in);
} else if (new_context->remote.sa_family == AF_INET6) {
addrlen = sizeof(struct sockaddr_in6);
} else {
NET_ASSERT(false, "Invalid protocol family %d",
new_context->remote.sa_family);
net_context_unref(new_context);
return NET_DROP;
}
context->tcp->accept_cb(new_context,
&new_context->remote,
addrlen,
0,
context->user_data);
net_pkt_unref(pkt);
return NET_OK;
}
return NET_DROP;
conndrop:
net_stats_update_tcp_seg_conndrop(net_pkt_iface(pkt));
reset:
send_reset(tcp->context, &local_addr, &remote_addr);
return NET_DROP;
}
int net_tcp_accept(struct net_context *context,
net_tcp_accept_cb_t cb,
void *user_data)
{
struct sockaddr local_addr;
struct sockaddr *laddr = NULL;
uint16_t lport = 0U;
int ret;
NET_ASSERT(context->tcp);
if (net_tcp_get_state(context->tcp) != NET_TCP_LISTEN) {
NET_DBG("Context %p in wrong state %d, should be %d",
context, context->tcp->state, NET_TCP_LISTEN);
return -EINVAL;
}
if (cb == NULL) {
/* The context is being shut down */
if (net_context_get_ip_proto(context) == IPPROTO_TCP) {
context->tcp->accept_cb = NULL;
return 0;
}
}
local_addr.sa_family = net_context_get_family(context);
#if defined(CONFIG_NET_IPV6)
if (net_context_get_family(context) == AF_INET6) {
if (net_sin6_ptr(&context->local)->sin6_addr) {
net_ipaddr_copy(&net_sin6(&local_addr)->sin6_addr,
net_sin6_ptr(&context->local)->sin6_addr);
laddr = &local_addr;
}
net_sin6(&local_addr)->sin6_port = lport =
net_sin6((struct sockaddr *)&context->local)->sin6_port;
}
#endif /* CONFIG_NET_IPV6 */
#if defined(CONFIG_NET_IPV4)
if (net_context_get_family(context) == AF_INET) {
if (net_sin_ptr(&context->local)->sin_addr) {
net_ipaddr_copy(&net_sin(&local_addr)->sin_addr,
net_sin_ptr(&context->local)->sin_addr);
laddr = &local_addr;
}
net_sin(&local_addr)->sin_port = lport =
net_sin((struct sockaddr *)&context->local)->sin_port;
}
#endif /* CONFIG_NET_IPV4 */
ret = net_tcp_register(net_context_get_family(context),
context->flags & NET_CONTEXT_REMOTE_ADDR_SET ?
&context->remote : NULL,
laddr,
ntohs(net_sin(&context->remote)->sin_port),
ntohs(lport),
tcp_syn_rcvd,
context,
&context->conn_handler);
if (ret < 0) {
return ret;
}
context->user_data = user_data;
/* accept callback is only valid for TCP contexts */
if (net_context_get_ip_proto(context) == IPPROTO_TCP) {
context->tcp->accept_cb = cb;
}
return 0;
}
int net_tcp_connect(struct net_context *context,
const struct sockaddr *addr,
struct sockaddr *laddr,
uint16_t rport,
uint16_t lport,
k_timeout_t timeout,
net_context_connect_cb_t cb,
void *user_data)
{
int ret;
NET_ASSERT(context->tcp);
if (net_context_get_type(context) != SOCK_STREAM) {
return -ENOTSUP;
}
/* We need to register a handler, otherwise the SYN-ACK
* packet would not be received.
*/
ret = net_tcp_register(net_context_get_family(context),
addr,
laddr,
ntohs(rport),
ntohs(lport),
tcp_synack_received,
context,
&context->conn_handler);
if (ret < 0) {
return ret;
}
context->connect_cb = cb;
context->user_data = user_data;
net_context_set_state(context, NET_CONTEXT_CONNECTING);
send_syn(context, addr);
/* in tcp_synack_received() we give back this semaphore */
if (!K_TIMEOUT_EQ(timeout, K_NO_WAIT) &&
k_sem_take(&context->tcp->connect_wait, timeout)) {
return -ETIMEDOUT;
}
return 0;
}
struct net_tcp_hdr *net_tcp_input(struct net_pkt *pkt,
struct net_pkt_data_access *tcp_access)
{
struct net_tcp_hdr *tcp_hdr;
if (IS_ENABLED(CONFIG_NET_TCP_CHECKSUM) &&
net_if_need_calc_rx_checksum(net_pkt_iface(pkt)) &&
net_calc_chksum_tcp(pkt) != 0U) {
NET_DBG("DROP: checksum mismatch");
goto drop;
}
tcp_hdr = (struct net_tcp_hdr *)net_pkt_get_data(pkt, tcp_access);
if (tcp_hdr && !net_pkt_set_data(pkt, tcp_access)) {
return tcp_hdr;
}
drop:
net_stats_update_tcp_seg_chkerr(net_pkt_iface(pkt));
return NULL;
}