mirror of https://github.com/ipxe/ipxe.git
Use total free memory as advertised window. This seems to be sufficient
to avoid drops even on slow NICs.pull/1/head
parent
6d4e37cf42
commit
c014f607a8
|
@ -211,41 +211,30 @@ struct tcp_mss_option {
|
||||||
#define MIN_PKB_LEN MAX_HDR_LEN + 100 /* To account for padding by LL */
|
#define MIN_PKB_LEN MAX_HDR_LEN + 100 /* To account for padding by LL */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advertised TCP window size
|
* Maxmimum advertised TCP window size
|
||||||
*
|
*
|
||||||
*
|
* We estimate the TCP window size as the amount of free memory we
|
||||||
* Our TCP window is actually limited by the amount of space available
|
* have. This is not strictly accurate (since it ignores any space
|
||||||
* for RX packets in the NIC's RX ring; we tend to populate the rings
|
* already allocated as RX buffers), but it will do for now.
|
||||||
* with far fewer descriptors than a typical driver. This would
|
|
||||||
* result in a desperately small window size, which kills WAN download
|
|
||||||
* performance; the maximum bandwidth on any link is limited to
|
|
||||||
*
|
|
||||||
* max_bandwidth = ( tcp_window / round_trip_time )
|
|
||||||
*
|
|
||||||
* With a 4kB window, which probably accurately reflects our amount of
|
|
||||||
* buffer space, and a WAN RTT of say 200ms, this gives a maximum
|
|
||||||
* achievable bandwidth of 20kB/s, which is not acceptable.
|
|
||||||
*
|
|
||||||
* We therefore aim to process packets as fast as they arrive, and
|
|
||||||
* advertise an "infinite" window. If we don't process packets as
|
|
||||||
* fast as they arrive, then we will drop packets and have to incur
|
|
||||||
* the retransmission penalty.
|
|
||||||
*
|
*
|
||||||
* Since we don't store out-of-order received packets, the
|
* Since we don't store out-of-order received packets, the
|
||||||
* retransmission penalty is that the whole window contents must be
|
* retransmission penalty is that the whole window contents must be
|
||||||
* resent.
|
* resent. This suggests keeping the window size small, but bear in
|
||||||
|
* mind that the maximum bandwidth on any link is limited to
|
||||||
*
|
*
|
||||||
* We choose to compromise on a window size of 64kB (which is the
|
* max_bandwidth = ( tcp_window / round_trip_time )
|
||||||
* maximum that can be represented without using TCP options). This
|
*
|
||||||
* gives a maximum bandwidth of 320kB/s at 200ms RTT, which is
|
* With a 48kB window, which probably accurately reflects our amount
|
||||||
* probably faster than the actual link bandwidth. It also limits
|
* of free memory, and a WAN RTT of say 200ms, this gives a maximum
|
||||||
* retransmissions to 64kB, which is reasonable.
|
* bandwidth of 240kB/s. This is sufficiently close to realistic that
|
||||||
|
* we will need to be careful that our advertised window doesn't end
|
||||||
|
* up limiting WAN download speeds.
|
||||||
*
|
*
|
||||||
* Finally, since the window goes into a 16-bit field and we cannot
|
* Finally, since the window goes into a 16-bit field and we cannot
|
||||||
* actually use 65536, we use a window size of (65536-4) to ensure
|
* actually use 65536, we use a window size of (65536-4) to ensure
|
||||||
* that payloads remain dword-aligned.
|
* that payloads remain dword-aligned.
|
||||||
*/
|
*/
|
||||||
#define TCP_WINDOW_SIZE ( 65536 - 4 )
|
#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 )
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advertised TCP MSS
|
* Advertised TCP MSS
|
||||||
|
|
|
@ -398,8 +398,13 @@ static void net_step ( struct process *process ) {
|
||||||
/* Poll for new packets */
|
/* Poll for new packets */
|
||||||
netdev_poll ( netdev, -1U );
|
netdev_poll ( netdev, -1U );
|
||||||
|
|
||||||
/* Process received packets */
|
/* Process at most one received packet. Give priority
|
||||||
while ( ( pkb = netdev_rx_dequeue ( netdev ) ) ) {
|
* to getting packets out of the NIC over processing
|
||||||
|
* the received packets, because we advertise a window
|
||||||
|
* that assumes that we can receive packets from the
|
||||||
|
* NIC faster than they arrive.
|
||||||
|
*/
|
||||||
|
if ( ( pkb = netdev_rx_dequeue ( netdev ) ) ) {
|
||||||
DBGC ( netdev, "NETDEV %p processing %p\n",
|
DBGC ( netdev, "NETDEV %p processing %p\n",
|
||||||
netdev, pkb );
|
netdev, pkb );
|
||||||
netdev->ll_protocol->rx ( pkb, netdev );
|
netdev->ll_protocol->rx ( pkb, netdev );
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <timer.h>
|
#include <timer.h>
|
||||||
#include <vsprintf.h>
|
#include <vsprintf.h>
|
||||||
#include <gpxe/pkbuff.h>
|
#include <gpxe/pkbuff.h>
|
||||||
|
#include <gpxe/malloc.h>
|
||||||
#include <gpxe/retry.h>
|
#include <gpxe/retry.h>
|
||||||
#include <gpxe/tcpip.h>
|
#include <gpxe/tcpip.h>
|
||||||
#include <gpxe/tcp.h>
|
#include <gpxe/tcp.h>
|
||||||
|
@ -265,6 +266,7 @@ static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
|
||||||
unsigned int flags;
|
unsigned int flags;
|
||||||
size_t len;
|
size_t len;
|
||||||
size_t seq_len;
|
size_t seq_len;
|
||||||
|
size_t window;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* Allocate space to the TX buffer */
|
/* Allocate space to the TX buffer */
|
||||||
|
@ -322,6 +324,12 @@ static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
|
||||||
if ( seq_len )
|
if ( seq_len )
|
||||||
start_timer ( &conn->timer );
|
start_timer ( &conn->timer );
|
||||||
|
|
||||||
|
/* Estimate window size */
|
||||||
|
window = freemem;
|
||||||
|
if ( window > TCP_MAX_WINDOW_SIZE )
|
||||||
|
window = TCP_MAX_WINDOW_SIZE;
|
||||||
|
window &= ~0x03; /* Keep everything dword-aligned */
|
||||||
|
|
||||||
/* Fill up the TCP header */
|
/* Fill up the TCP header */
|
||||||
payload = pkb->data;
|
payload = pkb->data;
|
||||||
if ( flags & TCP_SYN ) {
|
if ( flags & TCP_SYN ) {
|
||||||
|
@ -338,7 +346,7 @@ static int tcp_senddata_conn ( struct tcp_connection *conn, int force_send ) {
|
||||||
tcphdr->ack = htonl ( conn->rcv_ack );
|
tcphdr->ack = htonl ( conn->rcv_ack );
|
||||||
tcphdr->hlen = ( ( payload - pkb->data ) << 2 );
|
tcphdr->hlen = ( ( payload - pkb->data ) << 2 );
|
||||||
tcphdr->flags = flags;
|
tcphdr->flags = flags;
|
||||||
tcphdr->win = htons ( TCP_WINDOW_SIZE );
|
tcphdr->win = htons ( window );
|
||||||
tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
|
tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
|
||||||
|
|
||||||
/* Dump header */
|
/* Dump header */
|
||||||
|
@ -492,7 +500,7 @@ static int tcp_send_reset ( struct tcp_connection *conn,
|
||||||
tcphdr->ack = in_tcphdr->seq;
|
tcphdr->ack = in_tcphdr->seq;
|
||||||
tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
|
tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
|
||||||
tcphdr->flags = ( TCP_RST | TCP_ACK );
|
tcphdr->flags = ( TCP_RST | TCP_ACK );
|
||||||
tcphdr->win = htons ( TCP_WINDOW_SIZE );
|
tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE );
|
||||||
tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
|
tcphdr->csum = tcpip_chksum ( pkb->data, pkb_len ( pkb ) );
|
||||||
|
|
||||||
/* Dump header */
|
/* Dump header */
|
||||||
|
|
Loading…
Reference in New Issue