From: Alexander Couzens Date: Mon, 8 Jan 2018 08:54:58 +0000 (+0100) Subject: ltq-atm: rewrite tx path to use IRQs X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=17eb826a703d996d12004b68df12003a12d71421;p=openwrt%2Fstaging%2Fldir.git ltq-atm: rewrite tx path to use IRQs The ATM subsystem is different from the generic ethernet NICs. The ATM subsystem requires a callback when a packet has been sent. It means a tx skb_buff need to be used after it has sent. While the generic NIC can fill up the TX ring and free skb_buffs if it encounter a ring buffer slot with an already sent skbuff. The ATM drivers need call the pop() function after it has send a single ATM package. The ATM subsystem controls via this ways the queuing. The ppe engine use DMA channels for read and write. Every atm_vcc has it's own TX DMA channel and each TX DMA channel has it's own ring buffer. The old driver had multiple issues: - Call the subsystem callback at the beginning of tx function (ppe_send). Didn't allowed the ATM subsystem to control the enqueued package amount. - Filled up the TX ring until full and fail futher - copy or decouple the skb from all other subsystem before giving it over to TX ring The new tx path uses interupts. - call the subsystem callback _after_ it was sent by hardware - no need to copy our decouple the skb any more - gives back control to the atm subsystem over the enqueued packages - use an interupt for every sent atm package Using interupts shouldn't be a problem because of the slow uplink bandwidth of ADSL. The speed _through_ the DSL router was always as high as it should be, only traffic generated on the router itself were affected. After changing to new tx path, the speed of iperf's run on the router itself reached the same speed. The master/trunk wasn't as much affected because of TCP optimisations (reboot-5022-gb2ea46fe236a). The following results are taken on the remote server, which receives the stream over the internet and the DSL line. The sync moves between every sync a litte bit, but is so far stable Latency / Interleave Delay: Down: Fast (0.25 ms) / Up: Fast (0.50 ms) Data Rate: Down: 13.287 Mb/s / Up: 1.151 Mb/s reboot-5521-g9f8d28285d without patch [ ID] Interval Transfer Bandwidth Retr [ 5] 0.00-10.04 sec 947 KBytes 773 Kbits/sec 0 sender [ 5] 0.00-10.04 sec 928 KBytes 757 Kbits/sec receiver reboot-5521-g9f8d28285d with patch [ 5] 0.00-10.06 sec 1.16 MBytes 970 Kbits/sec 0 sender [ 5] 0.00-10.06 sec 1.15 MBytes 959 Kbits/sec receiver v17.01.4-239-g55c23e44f4 without patch [ ID] Interval Transfer Bandwidth Retr [ 5] 0.00-10.04 sec 87.4 KBytes 71.3 Kbits/sec 0 sender [ 5] 0.00-10.04 sec 59.6 KBytes 48.7 Kbits/sec receiver v17.01.4-239-g55c23e44f4 with patch [ ID] Interval Transfer Bandwidth Retr [ 5] 0.00-10.05 sec 1.18 MBytes 983 Kbits/sec 1 sender [ 5] 0.00-10.05 sec 1.15 MBytes 959 Kbits/sec receiver Signed-off-by: Alexander Couzens --- diff --git a/package/kernel/lantiq/ltq-atm/src/ifxmips_atm_core.h b/package/kernel/lantiq/ltq-atm/src/ifxmips_atm_core.h index 2f754c982b..398be7d828 100644 --- a/package/kernel/lantiq/ltq-atm/src/ifxmips_atm_core.h +++ b/package/kernel/lantiq/ltq-atm/src/ifxmips_atm_core.h @@ -43,6 +43,7 @@ struct ltq_atm_ops { void (*fw_ver)(unsigned int *major, unsigned int *minor); }; +#include #include /* @@ -195,6 +196,7 @@ struct connection { volatile struct tx_descriptor *tx_desc; unsigned int tx_desc_pos; struct sk_buff **tx_skb; + spinlock_t lock; unsigned int aal5_vcc_crc_err; /* number of packets with CRC error */ unsigned int aal5_vcc_oversize_sdu; /* number of packets with oversize error */ diff --git a/package/kernel/lantiq/ltq-atm/src/ltq_atm.c b/package/kernel/lantiq/ltq-atm/src/ltq_atm.c index a08fa22ce2..f306d19364 100644 --- a/package/kernel/lantiq/ltq-atm/src/ltq_atm.c +++ b/package/kernel/lantiq/ltq-atm/src/ltq_atm.c @@ -19,6 +19,8 @@ ** HISTORY ** $Date $Author $Comment ** 07 JUL 2009 Xu Liang Init Version +** +** Copyright 2017 Alexander Couzens *******************************************************************************/ #define IFX_ATM_VER_MAJOR 1 @@ -444,6 +446,9 @@ static int ppe_open(struct atm_vcc *vcc) /* set htu entry */ set_htu_entry(vpi, vci, conn, vcc->qos.aal == ATM_AAL5 ? 1 : 0, 0); + *MBOX_IGU1_ISRC |= (1 << (conn + FIRST_QSB_QID + 16)); + *MBOX_IGU1_IER |= (1 << (conn + FIRST_QSB_QID + 16)); + ret = 0; PPE_OPEN_EXIT: @@ -511,14 +516,18 @@ static int ppe_send(struct atm_vcc *vcc, struct sk_buff *skb) int ret; int conn; int desc_base; + int byteoff; + int required; + /* the len of the data without offset and header */ + int datalen; + unsigned long flags; struct tx_descriptor reg_desc = {0}; + struct tx_inband_header *header; struct sk_buff *new_skb; if ( vcc == NULL || skb == NULL ) return -EINVAL; - skb_get(skb); - atm_free_tx_skb_vcc(skb, vcc); conn = find_vcc(vcc); if ( conn < 0 ) { @@ -532,31 +541,28 @@ static int ppe_send(struct atm_vcc *vcc, struct sk_buff *skb) goto PPE_SEND_FAIL; } - if ( vcc->qos.aal == ATM_AAL5 ) { - int byteoff; - int datalen; - struct tx_inband_header *header; + byteoff = (unsigned int)skb->data & (DATA_BUFFER_ALIGNMENT - 1); + required = sizeof(*header) + byteoff; + if (!skb_clone_writable(skb, required)) { + int expand_by = 0; + int ret; - byteoff = (unsigned int)skb->data & (DATA_BUFFER_ALIGNMENT - 1); - if ( skb_headroom(skb) < byteoff + TX_INBAND_HEADER_LENGTH ) - new_skb = skb_duplicate(skb); - else - new_skb = skb_break_away_from_protocol(skb); - if ( new_skb == NULL ) { - pr_err("either skb_duplicate or skb_break_away_from_protocol fail\n"); - ret = -ENOMEM; - goto PPE_SEND_FAIL; - } - dev_kfree_skb_any(skb); - skb = new_skb; + if (skb_headroom(skb) < required) + expand_by = required - skb_headroom(skb); - datalen = skb->len; - byteoff = (unsigned int)skb->data & (DATA_BUFFER_ALIGNMENT - 1); + ret = pskb_expand_head(skb, expand_by, 0, GFP_ATOMIC); + if (ret) { + printk("pskb_expand_head failed.\n"); + atm_free_tx_skb_vcc(skb, vcc); + return ret; + } + } - skb_push(skb, byteoff + TX_INBAND_HEADER_LENGTH); + datalen = skb->len; + header = (void *)skb_push(skb, byteoff + TX_INBAND_HEADER_LENGTH); - header = (struct tx_inband_header *)skb->data; + if ( vcc->qos.aal == ATM_AAL5 ) { /* setup inband trailer */ header->uu = 0; header->cpi = 0; @@ -576,23 +582,9 @@ static int ppe_send(struct atm_vcc *vcc, struct sk_buff *skb) reg_desc.byteoff = byteoff; reg_desc.iscell = 0; } else { - /* if data pointer is not aligned, allocate new sk_buff */ - if ( ((unsigned int)skb->data & (DATA_BUFFER_ALIGNMENT - 1)) != 0 ) { - pr_err("skb->data not aligned\n"); - new_skb = skb_duplicate(skb); - } else - new_skb = skb_break_away_from_protocol(skb); - if ( new_skb == NULL ) { - pr_err("either skb_duplicate or skb_break_away_from_protocol fail\n"); - ret = -ENOMEM; - goto PPE_SEND_FAIL; - } - dev_kfree_skb_any(skb); - skb = new_skb; - reg_desc.dataptr = (unsigned int)skb->data >> 2; reg_desc.datalen = skb->len; - reg_desc.byteoff = 0; + reg_desc.byteoff = byteoff; reg_desc.iscell = 1; } @@ -600,23 +592,25 @@ static int ppe_send(struct atm_vcc *vcc, struct sk_buff *skb) reg_desc.c = 1; reg_desc.sop = reg_desc.eop = 1; + spin_lock_irqsave(&g_atm_priv_data.conn[conn].lock, flags); desc_base = get_tx_desc(conn); if ( desc_base < 0 ) { + spin_unlock_irqrestore(&g_atm_priv_data.conn[conn].lock, flags); pr_debug("ALLOC_TX_CONNECTION_FAIL\n"); ret = -EIO; goto PPE_SEND_FAIL; } - - if ( vcc->stats ) - atomic_inc(&vcc->stats->tx); - if ( vcc->qos.aal == ATM_AAL5 ) - g_atm_priv_data.wtx_pdu++; - /* update descriptor send pointer */ if ( g_atm_priv_data.conn[conn].tx_skb[desc_base] != NULL ) dev_kfree_skb_any(g_atm_priv_data.conn[conn].tx_skb[desc_base]); g_atm_priv_data.conn[conn].tx_skb[desc_base] = skb; + spin_unlock_irqrestore(&g_atm_priv_data.conn[conn].lock, flags); + + if ( vcc->stats ) + atomic_inc(&vcc->stats->tx); + if ( vcc->qos.aal == ATM_AAL5 ) + g_atm_priv_data.wtx_pdu++; /* write discriptor to memory and write back cache */ g_atm_priv_data.conn[conn].tx_desc[desc_base] = reg_desc; dma_cache_wback((unsigned long)skb->data, skb->len); @@ -900,6 +894,42 @@ static struct sk_buff* skb_break_away_from_protocol(struct sk_buff *skb) return new_skb; } +static void free_tx_ring(unsigned int queue) +{ + unsigned long flags; + int i; + struct connection *conn = &g_atm_priv_data.conn[queue]; + struct sk_buff *skb; + + if (!conn) + return; + + spin_lock_irqsave(&conn->lock, flags); + + for (i = 0; i < dma_tx_descriptor_length; i++) { + if (conn->tx_desc[i].own == 0 && conn->tx_skb[i] != NULL) { + skb = conn->tx_skb[i]; + conn->tx_skb[i] = NULL; + atm_free_tx_skb_vcc(skb, ATM_SKB(skb)->vcc); + } + } + spin_unlock_irqrestore(&conn->lock, flags); +} + +static void mailbox_tx_handler(unsigned int queue_bitmap) +{ + int i; + int bit; + + /* only get valid queues */ + queue_bitmap &= g_atm_priv_data.conn_table; + + for ( i = 0, bit = 1; i < MAX_PVC_NUMBER; i++, bit <<= 1 ) { + if (queue_bitmap & bit) + free_tx_ring(i); + } +} + static inline void mailbox_oam_rx_handler(void) { unsigned int vlddes = WRX_DMA_CHANNEL_CONFIG(RX_DMA_CH_OAM)->vlddes; @@ -1050,12 +1080,22 @@ static inline void mailbox_aal_rx_handler(void) static void do_ppe_tasklet(unsigned long data) { + unsigned int irqs = *MBOX_IGU1_ISR; *MBOX_IGU1_ISRC = *MBOX_IGU1_ISR; - mailbox_oam_rx_handler(); - mailbox_aal_rx_handler(); + + if (irqs & (1 << RX_DMA_CH_AAL)) + mailbox_aal_rx_handler(); + if (irqs & (1 << RX_DMA_CH_OAM)) + mailbox_oam_rx_handler(); + + /* any valid tx irqs */ + if ((irqs >> (FIRST_QSB_QID + 16)) & g_atm_priv_data.conn_table) + mailbox_tx_handler(irqs >> (FIRST_QSB_QID + 16)); if ((*MBOX_IGU1_ISR & ((1 << RX_DMA_CH_AAL) | (1 << RX_DMA_CH_OAM))) != 0) tasklet_schedule(&g_dma_tasklet); + else if (*MBOX_IGU1_ISR >> (FIRST_QSB_QID + 16)) /* TX queue */ + tasklet_schedule(&g_dma_tasklet); else enable_irq(PPE_MAILBOX_IGU1_INT); } @@ -1512,6 +1552,7 @@ static inline int init_priv_data(void) p_tx_desc = (volatile struct tx_descriptor *)((((unsigned int)g_atm_priv_data.tx_desc_base + DESC_ALIGNMENT - 1) & ~(DESC_ALIGNMENT - 1)) | KSEG1); ppskb = (struct sk_buff **)(((unsigned int)g_atm_priv_data.tx_skb_base + 3) & ~3); for ( i = 0; i < MAX_PVC_NUMBER; i++ ) { + spin_lock_init(&g_atm_priv_data.conn[i].lock); g_atm_priv_data.conn[i].tx_desc = &p_tx_desc[i * dma_tx_descriptor_length]; g_atm_priv_data.conn[i].tx_skb = &ppskb[i * dma_tx_descriptor_length]; } @@ -1799,7 +1840,6 @@ static int ltq_atm_probe(struct platform_device *pdev) int ret; int port_num; struct port_cell_info port_cell = {0}; - int i, j; char ver_str[256]; match = of_match_device(ltq_atm_match, &pdev->dev);