net: ipv4: Add PMTU support

Catch "Destination Unreachable" ICMPv4 messages and update PMTU for
a given destination IPv4 address.
Use that PMTU when sending data to the destination.

Signed-off-by: Jukka Rissanen <jukka.rissanen@nordicsemi.no>
This commit is contained in:
Jukka Rissanen 2024-11-05 17:50:11 +02:00 committed by Anas Nashif
commit 9dba02f8f4
9 changed files with 247 additions and 37 deletions

View file

@ -342,6 +342,11 @@ struct net_pkt {
uint8_t cooked_mode_pkt : 1;
#endif /* CONFIG_NET_CAPTURE_COOKED_MODE */
#if defined(CONFIG_NET_IPV4_PMTU)
/* Path MTU needed for this destination address */
uint8_t ipv4_pmtu : 1;
#endif /* CONFIG_NET_IPV4_PMTU */
/* @endcond */
};
@ -783,6 +788,31 @@ static inline uint16_t net_pkt_ip_opts_len(struct net_pkt *pkt)
#endif
}
#if defined(CONFIG_NET_IPV4_PMTU)
static inline bool net_pkt_ipv4_pmtu(struct net_pkt *pkt)
{
return !!pkt->ipv4_pmtu;
}
static inline void net_pkt_set_ipv4_pmtu(struct net_pkt *pkt, bool value)
{
pkt->ipv4_pmtu = value;
}
#else
static inline bool net_pkt_ipv4_pmtu(struct net_pkt *pkt)
{
ARG_UNUSED(pkt);
return false;
}
static inline void net_pkt_set_ipv4_pmtu(struct net_pkt *pkt, bool value)
{
ARG_UNUSED(pkt);
ARG_UNUSED(value);
}
#endif /* CONFIG_NET_IPV4_PMTU */
#if defined(CONFIG_NET_IPV4_FRAGMENT)
static inline uint16_t net_pkt_ipv4_fragment_offset(struct net_pkt *pkt)
{

View file

@ -21,6 +21,7 @@ LOG_MODULE_REGISTER(net_icmpv4, CONFIG_NET_ICMPV4_LOG_LEVEL);
#include "ipv4.h"
#include "icmpv4.h"
#include "net_stats.h"
#include "pmtu.h"
#define PKT_WAIT_TIME K_SECONDS(1)
@ -654,6 +655,108 @@ drop:
return NET_DROP;
}
#if defined(CONFIG_NET_IPV4_PMTU)
/* The RFC 1191 chapter 3 says the minimum MTU size is 68 octets.
* This is way too small in modern world, so make the minimum 576 octets.
*/
#define MIN_IPV4_MTU NET_IPV4_MTU
static int icmpv4_handle_dst_unreach(struct net_icmp_ctx *ctx,
struct net_pkt *pkt,
struct net_icmp_ip_hdr *hdr,
struct net_icmp_hdr *icmp_hdr,
void *user_data)
{
NET_PKT_DATA_ACCESS_CONTIGUOUS_DEFINE(dst_unreach_access,
struct net_icmpv4_dest_unreach);
struct net_icmpv4_dest_unreach *dest_unreach_hdr;
struct net_ipv4_hdr *ip_hdr = hdr->ipv4;
uint16_t length = net_pkt_get_len(pkt);
struct net_pmtu_entry *entry;
struct sockaddr_in sockaddr_src = {
.sin_family = AF_INET,
};
uint16_t mtu;
int ret;
ARG_UNUSED(user_data);
dest_unreach_hdr = (struct net_icmpv4_dest_unreach *)
net_pkt_get_data(pkt, &dst_unreach_access);
if (dest_unreach_hdr == NULL) {
NET_DBG("DROP: NULL ICMPv4 Destination Unreachable header");
goto drop;
}
net_stats_update_ipv4_pmtu_recv(net_pkt_iface(pkt));
NET_DBG("Received Destination Unreachable from %s to %s",
net_sprint_ipv4_addr(&ip_hdr->src),
net_sprint_ipv4_addr(&ip_hdr->dst));
if (length < (sizeof(struct net_ipv4_hdr) +
sizeof(struct net_icmp_hdr) +
sizeof(struct net_icmpv4_dest_unreach))) {
NET_DBG("DROP: length %d too big %zd",
length, sizeof(struct net_ipv4_hdr) +
sizeof(struct net_icmp_hdr) +
sizeof(struct net_icmpv4_dest_unreach));
goto drop;
}
net_pkt_acknowledge_data(pkt, &dst_unreach_access);
mtu = ntohs(dest_unreach_hdr->mtu);
if (mtu < MIN_IPV4_MTU) {
NET_DBG("DROP: Unsupported MTU %u, min is %u",
mtu, MIN_IPV4_MTU);
goto drop;
}
net_ipaddr_copy(&sockaddr_src.sin_addr, (struct in_addr *)&ip_hdr->src);
entry = net_pmtu_get_entry((struct sockaddr *)&sockaddr_src);
if (entry == NULL) {
NET_DBG("DROP: Cannot find PMTU entry for %s",
net_sprint_ipv4_addr(&ip_hdr->src));
goto silent_drop;
}
/* We must not accept larger PMTU value than what we already know.
* RFC 1191 chapter 3 page 5.
*/
if (entry->mtu > 0 && entry->mtu < mtu) {
NET_DBG("DROP: PMTU for %s %u larger than %u",
net_sprint_ipv4_addr(&ip_hdr->src), mtu,
entry->mtu);
goto silent_drop;
}
ret = net_pmtu_update_entry(entry, mtu);
if (ret > 0) {
NET_DBG("PMTU for %s changed from %u to %u",
net_sprint_ipv4_addr(&ip_hdr->src), ret, mtu);
}
return 0;
drop:
net_stats_update_ipv4_pmtu_drop(net_pkt_iface(pkt));
return -EIO;
silent_drop:
/* If the event is not really an error then just ignore it and
* return 0 so that icmpv4 module will not complain about it.
*/
net_stats_update_ipv4_pmtu_drop(net_pkt_iface(pkt));
return 0;
}
static struct net_icmp_ctx dst_unreach_ctx;
#endif /* CONFIG_NET_IPV4_PMTU */
void net_icmpv4_init(void)
{
static struct net_icmp_ctx ctx;
@ -664,4 +767,13 @@ void net_icmpv4_init(void)
NET_ERR("Cannot register %s handler (%d)", STRINGIFY(NET_ICMPV4_ECHO_REQUEST),
ret);
}
#if defined(CONFIG_NET_IPV4_PMTU)
ret = net_icmp_init_ctx(&dst_unreach_ctx, NET_ICMPV4_DST_UNREACH, 0,
icmpv4_handle_dst_unreach);
if (ret < 0) {
NET_ERR("Cannot register %s handler (%d)", STRINGIFY(NET_ICMPV4_DST_UNREACH),
ret);
}
#endif
}

View file

@ -34,6 +34,11 @@ struct net_icmpv4_echo_req {
uint16_t sequence;
} __packed;
struct net_icmpv4_dest_unreach {
uint16_t unused;
uint16_t mtu;
} __packed;
/**
* @brief Send ICMPv4 error message.
* @param pkt Network packet that this error is related to.

View file

@ -25,6 +25,7 @@ LOG_MODULE_REGISTER(net_ipv4, CONFIG_NET_IPV4_LOG_LEVEL);
#include "tcp_internal.h"
#include "dhcpv4/dhcpv4_internal.h"
#include "ipv4.h"
#include "pmtu.h"
BUILD_ASSERT(sizeof(struct in_addr) == NET_IPV4_ADDR_SIZE);
@ -90,13 +91,18 @@ int net_ipv4_create(struct net_pkt *pkt,
const struct in_addr *dst)
{
uint8_t tos = 0;
uint8_t flags = 0U;
if (IS_ENABLED(CONFIG_NET_IP_DSCP_ECN)) {
net_ipv4_set_dscp(&tos, net_pkt_ip_dscp(pkt));
net_ipv4_set_ecn(&tos, net_pkt_ip_ecn(pkt));
}
return net_ipv4_create_full(pkt, src, dst, tos, 0U, 0U, 0U);
if (IS_ENABLED(CONFIG_NET_IPV4_PMTU) && net_pkt_ipv4_pmtu(pkt)) {
flags = NET_IPV4_DF;
}
return net_ipv4_create_full(pkt, src, dst, tos, 0U, flags, 0U);
}
int net_ipv4_finalize(struct net_pkt *pkt, uint8_t next_header_proto)
@ -444,6 +450,36 @@ drop:
return NET_DROP;
}
enum net_verdict net_ipv4_prepare_for_send(struct net_pkt *pkt)
{
if (IS_ENABLED(CONFIG_NET_IPV4_PMTU)) {
struct net_pmtu_entry *entry;
struct sockaddr_in dst = {
.sin_family = AF_INET,
};
int ret;
net_ipv4_addr_copy_raw((uint8_t *)&dst.sin_addr,
NET_IPV4_HDR(pkt)->dst);
entry = net_pmtu_get_entry((struct sockaddr *)&dst);
if (entry == NULL) {
ret = net_pmtu_update_mtu((struct sockaddr *)&dst,
net_if_get_mtu(net_pkt_iface(pkt)));
if (ret < 0) {
NET_DBG("Cannot update PMTU for %s (%d)",
net_sprint_ipv4_addr(&dst.sin_addr),
ret);
}
}
}
#if defined(CONFIG_NET_IPV4_FRAGMENT)
return net_ipv4_prepare_for_send_fragment(pkt);
#else
return NET_OK;
#endif
}
void net_ipv4_init(void)
{
if (IS_ENABLED(CONFIG_NET_IPV4_FRAGMENT)) {

View file

@ -357,6 +357,17 @@ typedef void (*net_ipv4_frag_cb_t)(struct net_ipv4_reassembly *reass, void *user
*/
void net_ipv4_frag_foreach(net_ipv4_frag_cb_t cb, void *user_data);
/**
* @brief Prepare packet for sending, this will split up a packet that is too large to send into
* multiple fragments so that it can be sent. It will also update PMTU destination cache if it
* is enabled.
*
* @param pkt Network packet
*
* @return Return verdict about the packet.
*/
enum net_verdict net_ipv4_prepare_for_send(struct net_pkt *pkt);
#if defined(CONFIG_NET_NATIVE_IPV4)
/**
* @brief Initialises IPv4
@ -384,22 +395,9 @@ static inline enum net_verdict net_ipv4_handle_fragment_hdr(struct net_pkt *pkt,
}
#endif /* CONFIG_NET_IPV4_FRAGMENT */
/**
* @brief Prepare packet for sending, this will split up a packet that is too large to send into
* multiple fragments so that it can be sent.
*
* @param pkt Network packet
*
* @return Return verdict about the packet.
*/
#if defined(CONFIG_NET_IPV4_FRAGMENT)
enum net_verdict net_ipv4_prepare_for_send(struct net_pkt *pkt);
#else
static inline enum net_verdict net_ipv4_prepare_for_send(struct net_pkt *pkt)
{
return NET_OK;
}
#endif /* CONFIG_NET_IPV4_FRAGMENT */
enum net_verdict net_ipv4_prepare_for_send_fragment(struct net_pkt *pkt);
#endif
/**
* @brief Sets up fragment buffers for usage, should only be called by the SYS_INIT() handler in

View file

@ -607,7 +607,7 @@ int net_ipv4_send_fragmented_pkt(struct net_if *iface, struct net_pkt *pkt,
return 0;
}
enum net_verdict net_ipv4_prepare_for_send(struct net_pkt *pkt)
enum net_verdict net_ipv4_prepare_for_send_fragment(struct net_pkt *pkt)
{
NET_PKT_DATA_ACCESS_CONTIGUOUS_DEFINE(ipv4_access, struct net_ipv4_hdr);
struct net_ipv4_hdr *ip_hdr;

View file

@ -37,6 +37,7 @@ LOG_MODULE_REGISTER(net_ctx, CONFIG_NET_CONTEXT_LOG_LEVEL);
#include "udp_internal.h"
#include "tcp_internal.h"
#include "net_stats.h"
#include "pmtu.h"
#if defined(CONFIG_NET_TCP)
#include "tcp.h"
@ -1139,6 +1140,22 @@ int net_context_create_ipv4_new(struct net_context *context,
}
#endif
if (IS_ENABLED(CONFIG_NET_IPV4_PMTU)) {
struct net_pmtu_entry *entry;
struct sockaddr_in dst_addr = {
.sin_family = AF_INET,
.sin_addr = *dst,
};
entry = net_pmtu_get_entry((struct sockaddr *)&dst_addr);
if (entry == NULL) {
/* Try to figure out the MTU of the path */
net_pkt_set_ipv4_pmtu(pkt, true);
} else {
net_pkt_set_ipv4_pmtu(pkt, false);
}
}
return net_ipv4_create(pkt, src, dst);
}
#endif /* CONFIG_NET_IPV4 */

View file

@ -520,11 +520,9 @@ enum net_verdict net_if_send_data(struct net_if *iface, struct net_pkt *pkt)
verdict = net_ipv6_prepare_for_send(pkt);
}
#if defined(CONFIG_NET_IPV4_FRAGMENT)
if (net_pkt_family(pkt) == AF_INET) {
if (IS_ENABLED(CONFIG_NET_IPV4) && net_pkt_family(pkt) == AF_INET) {
verdict = net_ipv4_prepare_for_send(pkt);
}
#endif
done:
/* NET_OK in which case packet has checked successfully. In this case

View file

@ -4419,30 +4419,44 @@ static uint16_t get_ipv6_destination_mtu(struct net_if *iface,
#endif /* CONFIG_NET_IPV6_PMTU */
}
static uint16_t get_ipv4_destination_mtu(struct net_if *iface,
const struct in_addr *dest)
{
#if defined(CONFIG_NET_IPV4_PMTU)
int mtu = net_pmtu_get_mtu((struct sockaddr *)&(struct sockaddr_in){
.sin_family = AF_INET,
.sin_addr = *dest });
if (mtu < 0) {
if (iface != NULL) {
return net_if_get_mtu(iface);
}
return NET_IPV4_MTU;
}
return (uint16_t)mtu;
#else
if (iface != NULL) {
return net_if_get_mtu(iface);
}
return NET_IPV4_MTU;
#endif /* CONFIG_NET_IPV4_PMTU */
}
uint16_t net_tcp_get_supported_mss(const struct tcp *conn)
{
sa_family_t family = net_context_get_family(conn->context);
if (family == AF_INET) {
#if defined(CONFIG_NET_IPV4)
if (IS_ENABLED(CONFIG_NET_IPV4) && family == AF_INET) {
struct net_if *iface = net_context_get_iface(conn->context);
int mss = 0;
uint16_t dest_mtu;
if (iface && net_if_get_mtu(iface) >= NET_IPV4TCPH_LEN) {
/* Detect MSS based on interface MTU minus "TCP,IP
* header size"
*/
mss = net_if_get_mtu(iface) - NET_IPV4TCPH_LEN;
}
dest_mtu = get_ipv4_destination_mtu(iface, &conn->dst.sin.sin_addr);
if (mss == 0) {
mss = NET_IPV4_MTU - NET_IPV4TCPH_LEN;
}
return mss;
#else
return 0;
#endif /* CONFIG_NET_IPV4 */
/* Detect MSS based on interface MTU minus "TCP,IP header size" */
return dest_mtu - NET_IPV4TCPH_LEN;
} else if (IS_ENABLED(CONFIG_NET_IPV6) && family == AF_INET6) {
struct net_if *iface = net_context_get_iface(conn->context);