From: David S. Miller Date: Tue, 23 Apr 2013 00:32:51 +0000 (-0400) Subject: Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=6e0895c2ea326cc4bb11e8fa2f654628d5754c31;p=openwrt%2Fstaging%2Fblogic.git Merge git://git./linux/kernel/git/davem/net Conflicts: drivers/net/ethernet/emulex/benet/be_main.c drivers/net/ethernet/intel/igb/igb_main.c drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c include/net/scm.h net/batman-adv/routing.c net/ipv4/tcp_input.c The e{uid,gid} --> {uid,gid} credentials fix conflicted with the cleanup in net-next to now pass cred structs around. The be2net driver had a bug fix in 'net' that overlapped with the VLAN interface changes by Patrick McHardy in net-next. An IGB conflict existed because in 'net' the build_skb() support was reverted, and in 'net-next' there was a comment style fix within that code. Several batman-adv conflicts were resolved by making sure that all calls to batadv_is_my_mac() are changed to have a new bat_priv first argument. Eric Dumazet's TS ECR fix in TCP in 'net' conflicted with the F-RTO rewrite in 'net-next', mostly overlapping changes. Thanks to Stephen Rothwell and Antonio Quartulli for help with several of these merge resolutions. Signed-off-by: David S. Miller --- 6e0895c2ea326cc4bb11e8fa2f654628d5754c31 diff --cc drivers/net/ethernet/emulex/benet/be_main.c index 21808680b91f,2886c9b63f90..654e7820daa0 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@@ -772,8 -759,9 +772,9 @@@ static struct sk_buff *be_insert_vlan_i if (vlan_tx_tag_present(skb)) { vlan_tag = be_get_tx_vlan_tag(adapter, skb); - __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - skb->vlan_tci = 0; - skb = __vlan_put_tag(skb, vlan_tag); ++ skb = __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); + if (skb) + skb->vlan_tci = 0; } return skb; diff --cc drivers/net/ethernet/freescale/fec_main.c index 20890874ead7,000000000000..2451ab1b5a83 mode 100644,000000..100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@@ -1,1957 -1,0 +1,1958 @@@ +/* + * Fast Ethernet Controller (FEC) driver for Motorola MPC8xx. + * Copyright (c) 1997 Dan Malek (dmalek@jlc.net) + * + * Right now, I am very wasteful with the buffers. I allocate memory + * pages and then divide them into 2K frame buffers. This way I know I + * have buffers large enough to hold one frame within one buffer descriptor. + * Once I get this working, I will use 64 or 128 byte CPM buffers, which + * will be much more memory efficient and will easily handle lots of + * small packets. + * + * Much better multiple PHY support by Magnus Damm. + * Copyright (c) 2000 Ericsson Radio Systems AB. + * + * Support for FEC controller of ColdFire processors. + * Copyright (c) 2001-2005 Greg Ungerer (gerg@snapgear.com) + * + * Bug fixes and cleanup by Philippe De Muyter (phdm@macqel.be) + * Copyright (c) 2004-2006 Macq Electronique SA. + * + * Copyright (C) 2010-2011 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "fec.h" + +#if defined(CONFIG_ARM) +#define FEC_ALIGNMENT 0xf +#else +#define FEC_ALIGNMENT 0x3 +#endif + +#define DRIVER_NAME "fec" +#define FEC_NAPI_WEIGHT 64 + +/* Pause frame feild and FIFO threshold */ +#define FEC_ENET_FCE (1 << 5) +#define FEC_ENET_RSEM_V 0x84 +#define FEC_ENET_RSFL_V 16 +#define FEC_ENET_RAEM_V 0x8 +#define FEC_ENET_RAFL_V 0x8 +#define FEC_ENET_OPD_V 0xFFF0 + +/* Controller is ENET-MAC */ +#define FEC_QUIRK_ENET_MAC (1 << 0) +/* Controller needs driver to swap frame */ +#define FEC_QUIRK_SWAP_FRAME (1 << 1) +/* Controller uses gasket */ +#define FEC_QUIRK_USE_GASKET (1 << 2) +/* Controller has GBIT support */ +#define FEC_QUIRK_HAS_GBIT (1 << 3) +/* Controller has extend desc buffer */ +#define FEC_QUIRK_HAS_BUFDESC_EX (1 << 4) + +static struct platform_device_id fec_devtype[] = { + { + /* keep it for coldfire */ + .name = DRIVER_NAME, + .driver_data = 0, + }, { + .name = "imx25-fec", + .driver_data = FEC_QUIRK_USE_GASKET, + }, { + .name = "imx27-fec", + .driver_data = 0, + }, { + .name = "imx28-fec", + .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME, + }, { + .name = "imx6q-fec", + .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | + FEC_QUIRK_HAS_BUFDESC_EX, + }, { + .name = "mvf-fec", + .driver_data = FEC_QUIRK_ENET_MAC, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(platform, fec_devtype); + +enum imx_fec_type { + IMX25_FEC = 1, /* runs on i.mx25/50/53 */ + IMX27_FEC, /* runs on i.mx27/35/51 */ + IMX28_FEC, + IMX6Q_FEC, + MVF_FEC, +}; + +static const struct of_device_id fec_dt_ids[] = { + { .compatible = "fsl,imx25-fec", .data = &fec_devtype[IMX25_FEC], }, + { .compatible = "fsl,imx27-fec", .data = &fec_devtype[IMX27_FEC], }, + { .compatible = "fsl,imx28-fec", .data = &fec_devtype[IMX28_FEC], }, + { .compatible = "fsl,imx6q-fec", .data = &fec_devtype[IMX6Q_FEC], }, + { .compatible = "fsl,mvf-fec", .data = &fec_devtype[MVF_FEC], }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, fec_dt_ids); + +static unsigned char macaddr[ETH_ALEN]; +module_param_array(macaddr, byte, NULL, 0); +MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); + +#if defined(CONFIG_M5272) +/* + * Some hardware gets it MAC address out of local flash memory. + * if this is non-zero then assume it is the address to get MAC from. + */ +#if defined(CONFIG_NETtel) +#define FEC_FLASHMAC 0xf0006006 +#elif defined(CONFIG_GILBARCONAP) || defined(CONFIG_SCALES) +#define FEC_FLASHMAC 0xf0006000 +#elif defined(CONFIG_CANCam) +#define FEC_FLASHMAC 0xf0020000 +#elif defined (CONFIG_M5272C3) +#define FEC_FLASHMAC (0xffe04000 + 4) +#elif defined(CONFIG_MOD5272) +#define FEC_FLASHMAC 0xffc0406b +#else +#define FEC_FLASHMAC 0 +#endif +#endif /* CONFIG_M5272 */ + +#if (((RX_RING_SIZE + TX_RING_SIZE) * 32) > PAGE_SIZE) +#error "FEC: descriptor ring size constants too large" +#endif + +/* Interrupt events/masks. */ +#define FEC_ENET_HBERR ((uint)0x80000000) /* Heartbeat error */ +#define FEC_ENET_BABR ((uint)0x40000000) /* Babbling receiver */ +#define FEC_ENET_BABT ((uint)0x20000000) /* Babbling transmitter */ +#define FEC_ENET_GRA ((uint)0x10000000) /* Graceful stop complete */ +#define FEC_ENET_TXF ((uint)0x08000000) /* Full frame transmitted */ +#define FEC_ENET_TXB ((uint)0x04000000) /* A buffer was transmitted */ +#define FEC_ENET_RXF ((uint)0x02000000) /* Full frame received */ +#define FEC_ENET_RXB ((uint)0x01000000) /* A buffer was received */ +#define FEC_ENET_MII ((uint)0x00800000) /* MII interrupt */ +#define FEC_ENET_EBERR ((uint)0x00400000) /* SDMA bus error */ + +#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII) +#define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF)) + +/* The FEC stores dest/src/type, data, and checksum for receive packets. + */ +#define PKT_MAXBUF_SIZE 1518 +#define PKT_MINBUF_SIZE 64 +#define PKT_MAXBLR_SIZE 1520 + +/* + * The 5270/5271/5280/5282/532x RX control register also contains maximum frame + * size bits. Other FEC hardware does not, so we need to take that into + * account when setting it. + */ +#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ + defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) +#define OPT_FRAME_SIZE (PKT_MAXBUF_SIZE << 16) +#else +#define OPT_FRAME_SIZE 0 +#endif + +/* FEC MII MMFR bits definition */ +#define FEC_MMFR_ST (1 << 30) +#define FEC_MMFR_OP_READ (2 << 28) +#define FEC_MMFR_OP_WRITE (1 << 28) +#define FEC_MMFR_PA(v) ((v & 0x1f) << 23) +#define FEC_MMFR_RA(v) ((v & 0x1f) << 18) +#define FEC_MMFR_TA (2 << 16) +#define FEC_MMFR_DATA(v) (v & 0xffff) + +#define FEC_MII_TIMEOUT 30000 /* us */ + +/* Transmitter timeout */ +#define TX_TIMEOUT (2 * HZ) + +#define FEC_PAUSE_FLAG_AUTONEG 0x1 +#define FEC_PAUSE_FLAG_ENABLE 0x2 + +static int mii_cnt; + +static struct bufdesc *fec_enet_get_nextdesc(struct bufdesc *bdp, int is_ex) +{ + struct bufdesc_ex *ex = (struct bufdesc_ex *)bdp; + if (is_ex) + return (struct bufdesc *)(ex + 1); + else + return bdp + 1; +} + +static struct bufdesc *fec_enet_get_prevdesc(struct bufdesc *bdp, int is_ex) +{ + struct bufdesc_ex *ex = (struct bufdesc_ex *)bdp; + if (is_ex) + return (struct bufdesc *)(ex - 1); + else + return bdp - 1; +} + +static void *swap_buffer(void *bufaddr, int len) +{ + int i; + unsigned int *buf = bufaddr; + + for (i = 0; i < (len + 3) / 4; i++, buf++) + *buf = cpu_to_be32(*buf); + + return bufaddr; +} + +static netdev_tx_t +fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + struct bufdesc *bdp; + void *bufaddr; + unsigned short status; + unsigned int index; + + if (!fep->link) { + /* Link is down or autonegotiation is in progress. */ + return NETDEV_TX_BUSY; + } + + /* Fill in a Tx ring entry */ + bdp = fep->cur_tx; + + status = bdp->cbd_sc; + + if (status & BD_ENET_TX_READY) { + /* Ooops. All transmit buffers are full. Bail out. + * This should not happen, since ndev->tbusy should be set. + */ + netdev_err(ndev, "tx queue full!\n"); + return NETDEV_TX_BUSY; + } + + /* Clear all of the status flags */ + status &= ~BD_ENET_TX_STATS; + + /* Set buffer length and buffer pointer */ + bufaddr = skb->data; + bdp->cbd_datlen = skb->len; + + /* + * On some FEC implementations data must be aligned on + * 4-byte boundaries. Use bounce buffers to copy data + * and get it aligned. Ugh. + */ + if (fep->bufdesc_ex) + index = (struct bufdesc_ex *)bdp - + (struct bufdesc_ex *)fep->tx_bd_base; + else + index = bdp - fep->tx_bd_base; + + if (((unsigned long) bufaddr) & FEC_ALIGNMENT) { + memcpy(fep->tx_bounce[index], skb->data, skb->len); + bufaddr = fep->tx_bounce[index]; + } + + /* + * Some design made an incorrect assumption on endian mode of + * the system that it's running on. As the result, driver has to + * swap every frame going to and coming from the controller. + */ + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(bufaddr, skb->len); + + /* Save skb pointer */ + fep->tx_skbuff[index] = skb; + + /* Push the data cache so the CPM does not get stale memory + * data. + */ + bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, bufaddr, + FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE); + + /* Send it on its way. Tell FEC it's ready, interrupt when done, + * it's the last BD of the frame, and to put the CRC on the end. + */ + status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR + | BD_ENET_TX_LAST | BD_ENET_TX_TC); + bdp->cbd_sc = status; + + if (fep->bufdesc_ex) { + + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + ebdp->cbd_bdu = 0; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + fep->hwts_tx_en)) { + ebdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT); + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + } else { + + ebdp->cbd_esc = BD_ENET_TX_INT; + } + } + /* If this was the last BD in the ring, start at the beginning again. */ + if (status & BD_ENET_TX_WRAP) + bdp = fep->tx_bd_base; + else + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + + fep->cur_tx = bdp; + + if (fep->cur_tx == fep->dirty_tx) + netif_stop_queue(ndev); + + /* Trigger transmission start */ + writel(0, fep->hwp + FEC_X_DES_ACTIVE); + + skb_tx_timestamp(skb); + + return NETDEV_TX_OK; +} + +/* Init RX & TX buffer descriptors + */ +static void fec_enet_bd_init(struct net_device *dev) +{ + struct fec_enet_private *fep = netdev_priv(dev); + struct bufdesc *bdp; + unsigned int i; + + /* Initialize the receive buffer descriptors. */ + bdp = fep->rx_bd_base; + for (i = 0; i < RX_RING_SIZE; i++) { + + /* Initialize the BD for every fragment in the page. */ + if (bdp->cbd_bufaddr) + bdp->cbd_sc = BD_ENET_RX_EMPTY; + else + bdp->cbd_sc = 0; + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + } + + /* Set the last buffer to wrap */ + bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp->cbd_sc |= BD_SC_WRAP; + + fep->cur_rx = fep->rx_bd_base; + + /* ...and the same for transmit */ + bdp = fep->tx_bd_base; + fep->cur_tx = bdp; + for (i = 0; i < TX_RING_SIZE; i++) { + + /* Initialize the BD for every fragment in the page. */ + bdp->cbd_sc = 0; + if (bdp->cbd_bufaddr && fep->tx_skbuff[i]) { + dev_kfree_skb_any(fep->tx_skbuff[i]); + fep->tx_skbuff[i] = NULL; + } + bdp->cbd_bufaddr = 0; + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + } + + /* Set the last buffer to wrap */ + bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp->cbd_sc |= BD_SC_WRAP; + fep->dirty_tx = bdp; +} + +/* This function is called to start or restart the FEC during a link + * change. This only happens when switching between half and full + * duplex. + */ +static void +fec_restart(struct net_device *ndev, int duplex) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + int i; + u32 temp_mac[2]; + u32 rcntl = OPT_FRAME_SIZE | 0x04; + u32 ecntl = 0x2; /* ETHEREN */ + + /* Whack a reset. We should wait for this. */ + writel(1, fep->hwp + FEC_ECNTRL); + udelay(10); + + /* + * enet-mac reset will reset mac address registers too, + * so need to reconfigure it. + */ + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { + memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); + writel(cpu_to_be32(temp_mac[0]), fep->hwp + FEC_ADDR_LOW); + writel(cpu_to_be32(temp_mac[1]), fep->hwp + FEC_ADDR_HIGH); + } + + /* Clear any outstanding interrupt. */ + writel(0xffc00000, fep->hwp + FEC_IEVENT); + + /* Reset all multicast. */ + writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); +#ifndef CONFIG_M5272 + writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); + writel(0, fep->hwp + FEC_HASH_TABLE_LOW); +#endif + + /* Set maximum receive buffer size. */ + writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE); + + fec_enet_bd_init(ndev); + + /* Set receive and transmit descriptor base. */ + writel(fep->bd_dma, fep->hwp + FEC_R_DES_START); + if (fep->bufdesc_ex) + writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc_ex) + * RX_RING_SIZE, fep->hwp + FEC_X_DES_START); + else + writel((unsigned long)fep->bd_dma + sizeof(struct bufdesc) + * RX_RING_SIZE, fep->hwp + FEC_X_DES_START); + + + for (i = 0; i <= TX_RING_MOD_MASK; i++) { + if (fep->tx_skbuff[i]) { + dev_kfree_skb_any(fep->tx_skbuff[i]); + fep->tx_skbuff[i] = NULL; + } + } + + /* Enable MII mode */ + if (duplex) { + /* FD enable */ + writel(0x04, fep->hwp + FEC_X_CNTRL); + } else { + /* No Rcv on Xmit */ + rcntl |= 0x02; + writel(0x0, fep->hwp + FEC_X_CNTRL); + } + + fep->full_duplex = duplex; + + /* Set MII speed */ + writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); + + /* + * The phy interface and speed need to get configured + * differently on enet-mac. + */ + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { + /* Enable flow control and length check */ + rcntl |= 0x40000000 | 0x00000020; + + /* RGMII, RMII or MII */ + if (fep->phy_interface == PHY_INTERFACE_MODE_RGMII) + rcntl |= (1 << 6); + else if (fep->phy_interface == PHY_INTERFACE_MODE_RMII) + rcntl |= (1 << 8); + else + rcntl &= ~(1 << 8); + + /* 1G, 100M or 10M */ + if (fep->phy_dev) { + if (fep->phy_dev->speed == SPEED_1000) + ecntl |= (1 << 5); + else if (fep->phy_dev->speed == SPEED_100) + rcntl &= ~(1 << 9); + else + rcntl |= (1 << 9); + } + } else { +#ifdef FEC_MIIGSK_ENR + if (id_entry->driver_data & FEC_QUIRK_USE_GASKET) { + u32 cfgr; + /* disable the gasket and wait */ + writel(0, fep->hwp + FEC_MIIGSK_ENR); + while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4) + udelay(1); + + /* + * configure the gasket: + * RMII, 50 MHz, no loopback, no echo + * MII, 25 MHz, no loopback, no echo + */ + cfgr = (fep->phy_interface == PHY_INTERFACE_MODE_RMII) + ? BM_MIIGSK_CFGR_RMII : BM_MIIGSK_CFGR_MII; + if (fep->phy_dev && fep->phy_dev->speed == SPEED_10) + cfgr |= BM_MIIGSK_CFGR_FRCONT_10M; + writel(cfgr, fep->hwp + FEC_MIIGSK_CFGR); + + /* re-enable the gasket */ + writel(2, fep->hwp + FEC_MIIGSK_ENR); + } +#endif + } + + /* enable pause frame*/ + if ((fep->pause_flag & FEC_PAUSE_FLAG_ENABLE) || + ((fep->pause_flag & FEC_PAUSE_FLAG_AUTONEG) && + fep->phy_dev && fep->phy_dev->pause)) { + rcntl |= FEC_ENET_FCE; + + /* set FIFO thresh hold parameter to reduce overrun */ + writel(FEC_ENET_RSEM_V, fep->hwp + FEC_R_FIFO_RSEM); + writel(FEC_ENET_RSFL_V, fep->hwp + FEC_R_FIFO_RSFL); + writel(FEC_ENET_RAEM_V, fep->hwp + FEC_R_FIFO_RAEM); + writel(FEC_ENET_RAFL_V, fep->hwp + FEC_R_FIFO_RAFL); + + /* OPD */ + writel(FEC_ENET_OPD_V, fep->hwp + FEC_OPD); + } else { + rcntl &= ~FEC_ENET_FCE; + } + + writel(rcntl, fep->hwp + FEC_R_CNTRL); + + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { + /* enable ENET endian swap */ + ecntl |= (1 << 8); + /* enable ENET store and forward mode */ + writel(1 << 8, fep->hwp + FEC_X_WMRK); + } + + if (fep->bufdesc_ex) + ecntl |= (1 << 4); + + /* And last, enable the transmit and receive processing */ + writel(ecntl, fep->hwp + FEC_ECNTRL); + writel(0, fep->hwp + FEC_R_DES_ACTIVE); + + if (fep->bufdesc_ex) + fec_ptp_start_cyclecounter(ndev); + + /* Enable interrupts we wish to service */ + writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); +} + +static void +fec_stop(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8); + + /* We cannot expect a graceful transmit stop without link !!! */ + if (fep->link) { + writel(1, fep->hwp + FEC_X_CNTRL); /* Graceful transmit stop */ + udelay(10); + if (!(readl(fep->hwp + FEC_IEVENT) & FEC_ENET_GRA)) + netdev_err(ndev, "Graceful transmit stop did not complete!\n"); + } + + /* Whack a reset. We should wait for this. */ + writel(1, fep->hwp + FEC_ECNTRL); + udelay(10); + writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); + writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); + + /* We have to keep ENET enabled to have MII interrupt stay working */ + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { + writel(2, fep->hwp + FEC_ECNTRL); + writel(rmii_mode, fep->hwp + FEC_R_CNTRL); + } +} + + +static void +fec_timeout(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + ndev->stats.tx_errors++; + + fec_restart(ndev, fep->full_duplex); + netif_wake_queue(ndev); +} + +static void +fec_enet_tx(struct net_device *ndev) +{ + struct fec_enet_private *fep; + struct bufdesc *bdp; + unsigned short status; + struct sk_buff *skb; + int index = 0; + + fep = netdev_priv(ndev); + bdp = fep->dirty_tx; + + /* get next bdp of dirty_tx */ + if (bdp->cbd_sc & BD_ENET_TX_WRAP) + bdp = fep->tx_bd_base; + else + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + + while (((status = bdp->cbd_sc) & BD_ENET_TX_READY) == 0) { + + /* current queue is empty */ + if (bdp == fep->cur_tx) + break; + + if (fep->bufdesc_ex) + index = (struct bufdesc_ex *)bdp - + (struct bufdesc_ex *)fep->tx_bd_base; + else + index = bdp - fep->tx_bd_base; + + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + FEC_ENET_TX_FRSIZE, DMA_TO_DEVICE); + bdp->cbd_bufaddr = 0; + + skb = fep->tx_skbuff[index]; + + /* Check for errors. */ + if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | + BD_ENET_TX_RL | BD_ENET_TX_UN | + BD_ENET_TX_CSL)) { + ndev->stats.tx_errors++; + if (status & BD_ENET_TX_HB) /* No heartbeat */ + ndev->stats.tx_heartbeat_errors++; + if (status & BD_ENET_TX_LC) /* Late collision */ + ndev->stats.tx_window_errors++; + if (status & BD_ENET_TX_RL) /* Retrans limit */ + ndev->stats.tx_aborted_errors++; + if (status & BD_ENET_TX_UN) /* Underrun */ + ndev->stats.tx_fifo_errors++; + if (status & BD_ENET_TX_CSL) /* Carrier lost */ + ndev->stats.tx_carrier_errors++; + } else { + ndev->stats.tx_packets++; + } + + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) && + fep->bufdesc_ex) { + struct skb_shared_hwtstamps shhwtstamps; + unsigned long flags; + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + spin_lock_irqsave(&fep->tmreg_lock, flags); + shhwtstamps.hwtstamp = ns_to_ktime( + timecounter_cyc2time(&fep->tc, ebdp->ts)); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + skb_tstamp_tx(skb, &shhwtstamps); + } + + if (status & BD_ENET_TX_READY) + netdev_err(ndev, "HEY! Enet xmit interrupt and TX_READY\n"); + + /* Deferred means some collisions occurred during transmit, + * but we eventually sent the packet OK. + */ + if (status & BD_ENET_TX_DEF) + ndev->stats.collisions++; + + /* Free the sk buffer associated with this last transmit */ + dev_kfree_skb_any(skb); + fep->tx_skbuff[index] = NULL; + + fep->dirty_tx = bdp; + + /* Update pointer to next buffer descriptor to be transmitted */ + if (status & BD_ENET_TX_WRAP) + bdp = fep->tx_bd_base; + else + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + + /* Since we have freed up a buffer, the ring is no longer full + */ + if (fep->dirty_tx != fep->cur_tx) { + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } + } + return; +} + + +/* During a receive, the cur_rx points to the current incoming buffer. + * When we update through the ring, if the next incoming buffer has + * not been given to the system, we just set the empty indicator, + * effectively tossing the packet. + */ +static int +fec_enet_rx(struct net_device *ndev, int budget) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + struct bufdesc *bdp; + unsigned short status; + struct sk_buff *skb; + ushort pkt_len; + __u8 *data; + int pkt_received = 0; + +#ifdef CONFIG_M532x + flush_cache_all(); +#endif + + /* First, grab all of the stats for the incoming packet. + * These get messed up if we get called due to a busy condition. + */ + bdp = fep->cur_rx; + + while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) { + + if (pkt_received >= budget) + break; + pkt_received++; + + /* Since we have allocated space to hold a complete frame, + * the last indicator should be set. + */ + if ((status & BD_ENET_RX_LAST) == 0) + netdev_err(ndev, "rcv is not +last\n"); + + if (!fep->opened) + goto rx_processing_done; + + /* Check for errors. */ + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO | + BD_ENET_RX_CR | BD_ENET_RX_OV)) { + ndev->stats.rx_errors++; + if (status & (BD_ENET_RX_LG | BD_ENET_RX_SH)) { + /* Frame too long or too short. */ + ndev->stats.rx_length_errors++; + } + if (status & BD_ENET_RX_NO) /* Frame alignment */ + ndev->stats.rx_frame_errors++; + if (status & BD_ENET_RX_CR) /* CRC Error */ + ndev->stats.rx_crc_errors++; + if (status & BD_ENET_RX_OV) /* FIFO overrun */ + ndev->stats.rx_fifo_errors++; + } + + /* Report late collisions as a frame error. + * On this error, the BD is closed, but we don't know what we + * have in the buffer. So, just drop this frame on the floor. + */ + if (status & BD_ENET_RX_CL) { + ndev->stats.rx_errors++; + ndev->stats.rx_frame_errors++; + goto rx_processing_done; + } + + /* Process the incoming frame. */ + ndev->stats.rx_packets++; + pkt_len = bdp->cbd_datlen; + ndev->stats.rx_bytes += pkt_len; + data = (__u8*)__va(bdp->cbd_bufaddr); + + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + FEC_ENET_TX_FRSIZE, DMA_FROM_DEVICE); + + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(data, pkt_len); + + /* This does 16 byte alignment, exactly what we need. + * The packet length includes FCS, but we don't want to + * include that when passing upstream as it messes up + * bridging applications. + */ + skb = netdev_alloc_skb(ndev, pkt_len - 4 + NET_IP_ALIGN); + + if (unlikely(!skb)) { + ndev->stats.rx_dropped++; + } else { + skb_reserve(skb, NET_IP_ALIGN); + skb_put(skb, pkt_len - 4); /* Make room */ + skb_copy_to_linear_data(skb, data, pkt_len - 4); + skb->protocol = eth_type_trans(skb, ndev); + + /* Get receive timestamp from the skb */ + if (fep->hwts_rx_en && fep->bufdesc_ex) { + struct skb_shared_hwtstamps *shhwtstamps = + skb_hwtstamps(skb); + unsigned long flags; + struct bufdesc_ex *ebdp = + (struct bufdesc_ex *)bdp; + + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + + spin_lock_irqsave(&fep->tmreg_lock, flags); + shhwtstamps->hwtstamp = ns_to_ktime( + timecounter_cyc2time(&fep->tc, ebdp->ts)); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + } + + if (!skb_defer_rx_timestamp(skb)) + napi_gro_receive(&fep->napi, skb); + } + + bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data, + FEC_ENET_TX_FRSIZE, DMA_FROM_DEVICE); +rx_processing_done: + /* Clear the status flags for this buffer */ + status &= ~BD_ENET_RX_STATS; + + /* Mark the buffer empty */ + status |= BD_ENET_RX_EMPTY; + bdp->cbd_sc = status; + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + ebdp->cbd_esc = BD_ENET_RX_INT; + ebdp->cbd_prot = 0; + ebdp->cbd_bdu = 0; + } + + /* Update BD pointer to next entry */ + if (status & BD_ENET_RX_WRAP) + bdp = fep->rx_bd_base; + else + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + /* Doing this here will keep the FEC running while we process + * incoming frames. On a heavily loaded network, we should be + * able to keep up at the expense of system resources. + */ + writel(0, fep->hwp + FEC_R_DES_ACTIVE); + } + fep->cur_rx = bdp; + + return pkt_received; +} + +static irqreturn_t +fec_enet_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct fec_enet_private *fep = netdev_priv(ndev); + uint int_events; + irqreturn_t ret = IRQ_NONE; + + do { + int_events = readl(fep->hwp + FEC_IEVENT); + writel(int_events, fep->hwp + FEC_IEVENT); + + if (int_events & (FEC_ENET_RXF | FEC_ENET_TXF)) { + ret = IRQ_HANDLED; + + /* Disable the RX interrupt */ + if (napi_schedule_prep(&fep->napi)) { + writel(FEC_RX_DISABLED_IMASK, + fep->hwp + FEC_IMASK); + __napi_schedule(&fep->napi); + } + } + + if (int_events & FEC_ENET_MII) { + ret = IRQ_HANDLED; + complete(&fep->mdio_done); + } + } while (int_events); + + return ret; +} + +static int fec_enet_rx_napi(struct napi_struct *napi, int budget) +{ + struct net_device *ndev = napi->dev; + int pkts = fec_enet_rx(ndev, budget); + struct fec_enet_private *fep = netdev_priv(ndev); + + fec_enet_tx(ndev); + + if (pkts < budget) { + napi_complete(napi); + writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); + } + return pkts; +} + +/* ------------------------------------------------------------------------- */ +static void fec_get_mac(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct fec_platform_data *pdata = fep->pdev->dev.platform_data; + unsigned char *iap, tmpaddr[ETH_ALEN]; + + /* + * try to get mac address in following order: + * + * 1) module parameter via kernel command line in form + * fec.macaddr=0x00,0x04,0x9f,0x01,0x30,0xe0 + */ + iap = macaddr; + + /* + * 2) from device tree data + */ + if (!is_valid_ether_addr(iap)) { + struct device_node *np = fep->pdev->dev.of_node; + if (np) { + const char *mac = of_get_mac_address(np); + if (mac) + iap = (unsigned char *) mac; + } + } + + /* + * 3) from flash or fuse (via platform data) + */ + if (!is_valid_ether_addr(iap)) { +#ifdef CONFIG_M5272 + if (FEC_FLASHMAC) + iap = (unsigned char *)FEC_FLASHMAC; +#else + if (pdata) + iap = (unsigned char *)&pdata->mac; +#endif + } + + /* + * 4) FEC mac registers set by bootloader + */ + if (!is_valid_ether_addr(iap)) { + *((unsigned long *) &tmpaddr[0]) = + be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW)); + *((unsigned short *) &tmpaddr[4]) = + be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16); + iap = &tmpaddr[0]; + } + + memcpy(ndev->dev_addr, iap, ETH_ALEN); + + /* Adjust MAC if using macaddr */ + if (iap == macaddr) + ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id; +} + +/* ------------------------------------------------------------------------- */ + +/* + * Phy section + */ +static void fec_enet_adjust_link(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct phy_device *phy_dev = fep->phy_dev; + unsigned long flags; + + int status_change = 0; + + spin_lock_irqsave(&fep->hw_lock, flags); + + /* Prevent a state halted on mii error */ + if (fep->mii_timeout && phy_dev->state == PHY_HALTED) { + phy_dev->state = PHY_RESUMING; + goto spin_unlock; + } + + if (phy_dev->link) { + if (!fep->link) { + fep->link = phy_dev->link; + status_change = 1; + } + + if (fep->full_duplex != phy_dev->duplex) + status_change = 1; + + if (phy_dev->speed != fep->speed) { + fep->speed = phy_dev->speed; + status_change = 1; + } + + /* if any of the above changed restart the FEC */ + if (status_change) + fec_restart(ndev, phy_dev->duplex); + } else { + if (fep->link) { + fec_stop(ndev); ++ fep->link = phy_dev->link; + status_change = 1; + } + } + +spin_unlock: + spin_unlock_irqrestore(&fep->hw_lock, flags); + + if (status_change) + phy_print_status(phy_dev); +} + +static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) +{ + struct fec_enet_private *fep = bus->priv; + unsigned long time_left; + + fep->mii_timeout = 0; + init_completion(&fep->mdio_done); + + /* start a read op */ + writel(FEC_MMFR_ST | FEC_MMFR_OP_READ | + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) | + FEC_MMFR_TA, fep->hwp + FEC_MII_DATA); + + /* wait for end of transfer */ + time_left = wait_for_completion_timeout(&fep->mdio_done, + usecs_to_jiffies(FEC_MII_TIMEOUT)); + if (time_left == 0) { + fep->mii_timeout = 1; + netdev_err(fep->netdev, "MDIO read timeout\n"); + return -ETIMEDOUT; + } + + /* return value */ + return FEC_MMFR_DATA(readl(fep->hwp + FEC_MII_DATA)); +} + +static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, + u16 value) +{ + struct fec_enet_private *fep = bus->priv; + unsigned long time_left; + + fep->mii_timeout = 0; + init_completion(&fep->mdio_done); + + /* start a write op */ + writel(FEC_MMFR_ST | FEC_MMFR_OP_WRITE | + FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) | + FEC_MMFR_TA | FEC_MMFR_DATA(value), + fep->hwp + FEC_MII_DATA); + + /* wait for end of transfer */ + time_left = wait_for_completion_timeout(&fep->mdio_done, + usecs_to_jiffies(FEC_MII_TIMEOUT)); + if (time_left == 0) { + fep->mii_timeout = 1; + netdev_err(fep->netdev, "MDIO write timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static int fec_enet_mdio_reset(struct mii_bus *bus) +{ + return 0; +} + +static int fec_enet_mii_probe(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + struct phy_device *phy_dev = NULL; + char mdio_bus_id[MII_BUS_ID_SIZE]; + char phy_name[MII_BUS_ID_SIZE + 3]; + int phy_id; + int dev_id = fep->dev_id; + + fep->phy_dev = NULL; + + /* check for attached phy */ + for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { + if ((fep->mii_bus->phy_mask & (1 << phy_id))) + continue; + if (fep->mii_bus->phy_map[phy_id] == NULL) + continue; + if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) + continue; + if (dev_id--) + continue; + strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE); + break; + } + + if (phy_id >= PHY_MAX_ADDR) { + netdev_info(ndev, "no PHY, assuming direct connection to switch\n"); + strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); + phy_id = 0; + } + + snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id); + phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, + fep->phy_interface); + if (IS_ERR(phy_dev)) { + netdev_err(ndev, "could not attach to PHY\n"); + return PTR_ERR(phy_dev); + } + + /* mask with MAC supported features */ + if (id_entry->driver_data & FEC_QUIRK_HAS_GBIT) { + phy_dev->supported &= PHY_GBIT_FEATURES; + phy_dev->supported |= SUPPORTED_Pause; + } + else + phy_dev->supported &= PHY_BASIC_FEATURES; + + phy_dev->advertising = phy_dev->supported; + + fep->phy_dev = phy_dev; + fep->link = 0; + fep->full_duplex = 0; + + netdev_info(ndev, "Freescale FEC PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", + fep->phy_dev->drv->name, dev_name(&fep->phy_dev->dev), + fep->phy_dev->irq); + + return 0; +} + +static int fec_enet_mii_init(struct platform_device *pdev) +{ + static struct mii_bus *fec0_mii_bus; + struct net_device *ndev = platform_get_drvdata(pdev); + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + int err = -ENXIO, i; + + /* + * The dual fec interfaces are not equivalent with enet-mac. + * Here are the differences: + * + * - fec0 supports MII & RMII modes while fec1 only supports RMII + * - fec0 acts as the 1588 time master while fec1 is slave + * - external phys can only be configured by fec0 + * + * That is to say fec1 can not work independently. It only works + * when fec0 is working. The reason behind this design is that the + * second interface is added primarily for Switch mode. + * + * Because of the last point above, both phys are attached on fec0 + * mdio interface in board design, and need to be configured by + * fec0 mii_bus. + */ + if ((id_entry->driver_data & FEC_QUIRK_ENET_MAC) && fep->dev_id > 0) { + /* fec1 uses fec0 mii_bus */ + if (mii_cnt && fec0_mii_bus) { + fep->mii_bus = fec0_mii_bus; + mii_cnt++; + return 0; + } + return -ENOENT; + } + + fep->mii_timeout = 0; + + /* + * Set MII speed to 2.5 MHz (= clk_get_rate() / 2 * phy_speed) + * + * The formula for FEC MDC is 'ref_freq / (MII_SPEED x 2)' while + * for ENET-MAC is 'ref_freq / ((MII_SPEED + 1) x 2)'. The i.MX28 + * Reference Manual has an error on this, and gets fixed on i.MX6Q + * document. + */ + fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ahb), 5000000); + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) + fep->phy_speed--; + fep->phy_speed <<= 1; + writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); + + fep->mii_bus = mdiobus_alloc(); + if (fep->mii_bus == NULL) { + err = -ENOMEM; + goto err_out; + } + + fep->mii_bus->name = "fec_enet_mii_bus"; + fep->mii_bus->read = fec_enet_mdio_read; + fep->mii_bus->write = fec_enet_mdio_write; + fep->mii_bus->reset = fec_enet_mdio_reset; + snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, fep->dev_id + 1); + fep->mii_bus->priv = fep; + fep->mii_bus->parent = &pdev->dev; + + fep->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); + if (!fep->mii_bus->irq) { + err = -ENOMEM; + goto err_out_free_mdiobus; + } + + for (i = 0; i < PHY_MAX_ADDR; i++) + fep->mii_bus->irq[i] = PHY_POLL; + + if (mdiobus_register(fep->mii_bus)) + goto err_out_free_mdio_irq; + + mii_cnt++; + + /* save fec0 mii_bus */ + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) + fec0_mii_bus = fep->mii_bus; + + return 0; + +err_out_free_mdio_irq: + kfree(fep->mii_bus->irq); +err_out_free_mdiobus: + mdiobus_free(fep->mii_bus); +err_out: + return err; +} + +static void fec_enet_mii_remove(struct fec_enet_private *fep) +{ + if (--mii_cnt == 0) { + mdiobus_unregister(fep->mii_bus); + kfree(fep->mii_bus->irq); + mdiobus_free(fep->mii_bus); + } +} + +static int fec_enet_get_settings(struct net_device *ndev, + struct ethtool_cmd *cmd) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct phy_device *phydev = fep->phy_dev; + + if (!phydev) + return -ENODEV; + + return phy_ethtool_gset(phydev, cmd); +} + +static int fec_enet_set_settings(struct net_device *ndev, + struct ethtool_cmd *cmd) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct phy_device *phydev = fep->phy_dev; + + if (!phydev) + return -ENODEV; + + return phy_ethtool_sset(phydev, cmd); +} + +static void fec_enet_get_drvinfo(struct net_device *ndev, + struct ethtool_drvinfo *info) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + strlcpy(info->driver, fep->pdev->dev.driver->name, + sizeof(info->driver)); + strlcpy(info->version, "Revision: 1.0", sizeof(info->version)); + strlcpy(info->bus_info, dev_name(&ndev->dev), sizeof(info->bus_info)); +} + +static int fec_enet_get_ts_info(struct net_device *ndev, + struct ethtool_ts_info *info) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + if (fep->bufdesc_ex) { + + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + if (fep->ptp_clock) + info->phc_index = ptp_clock_index(fep->ptp_clock); + else + info->phc_index = -1; + + info->tx_types = (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_ALL); + return 0; + } else { + return ethtool_op_get_ts_info(ndev, info); + } +} + +static void fec_enet_get_pauseparam(struct net_device *ndev, + struct ethtool_pauseparam *pause) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + pause->autoneg = (fep->pause_flag & FEC_PAUSE_FLAG_AUTONEG) != 0; + pause->tx_pause = (fep->pause_flag & FEC_PAUSE_FLAG_ENABLE) != 0; + pause->rx_pause = pause->tx_pause; +} + +static int fec_enet_set_pauseparam(struct net_device *ndev, + struct ethtool_pauseparam *pause) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + if (pause->tx_pause != pause->rx_pause) { + netdev_info(ndev, + "hardware only support enable/disable both tx and rx"); + return -EINVAL; + } + + fep->pause_flag = 0; + + /* tx pause must be same as rx pause */ + fep->pause_flag |= pause->rx_pause ? FEC_PAUSE_FLAG_ENABLE : 0; + fep->pause_flag |= pause->autoneg ? FEC_PAUSE_FLAG_AUTONEG : 0; + + if (pause->rx_pause || pause->autoneg) { + fep->phy_dev->supported |= ADVERTISED_Pause; + fep->phy_dev->advertising |= ADVERTISED_Pause; + } else { + fep->phy_dev->supported &= ~ADVERTISED_Pause; + fep->phy_dev->advertising &= ~ADVERTISED_Pause; + } + + if (pause->autoneg) { + if (netif_running(ndev)) + fec_stop(ndev); + phy_start_aneg(fep->phy_dev); + } + if (netif_running(ndev)) + fec_restart(ndev, 0); + + return 0; +} + +static const struct ethtool_ops fec_enet_ethtool_ops = { + .get_pauseparam = fec_enet_get_pauseparam, + .set_pauseparam = fec_enet_set_pauseparam, + .get_settings = fec_enet_get_settings, + .set_settings = fec_enet_set_settings, + .get_drvinfo = fec_enet_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ts_info = fec_enet_get_ts_info, +}; + +static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct phy_device *phydev = fep->phy_dev; + + if (!netif_running(ndev)) + return -EINVAL; + + if (!phydev) + return -ENODEV; + + if (cmd == SIOCSHWTSTAMP && fep->bufdesc_ex) + return fec_ptp_ioctl(ndev, rq, cmd); + + return phy_mii_ioctl(phydev, rq, cmd); +} + +static void fec_enet_free_buffers(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + unsigned int i; + struct sk_buff *skb; + struct bufdesc *bdp; + + bdp = fep->rx_bd_base; + for (i = 0; i < RX_RING_SIZE; i++) { + skb = fep->rx_skbuff[i]; + + if (bdp->cbd_bufaddr) + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); + if (skb) + dev_kfree_skb(skb); + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + } + + bdp = fep->tx_bd_base; + for (i = 0; i < TX_RING_SIZE; i++) + kfree(fep->tx_bounce[i]); +} + +static int fec_enet_alloc_buffers(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + unsigned int i; + struct sk_buff *skb; + struct bufdesc *bdp; + + bdp = fep->rx_bd_base; + for (i = 0; i < RX_RING_SIZE; i++) { + skb = netdev_alloc_skb(ndev, FEC_ENET_RX_FRSIZE); + if (!skb) { + fec_enet_free_buffers(ndev); + return -ENOMEM; + } + fep->rx_skbuff[i] = skb; + + bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data, + FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); + bdp->cbd_sc = BD_ENET_RX_EMPTY; + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + ebdp->cbd_esc = BD_ENET_RX_INT; + } + + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + } + + /* Set the last buffer to wrap. */ + bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp->cbd_sc |= BD_SC_WRAP; + + bdp = fep->tx_bd_base; + for (i = 0; i < TX_RING_SIZE; i++) { + fep->tx_bounce[i] = kmalloc(FEC_ENET_TX_FRSIZE, GFP_KERNEL); + + bdp->cbd_sc = 0; + bdp->cbd_bufaddr = 0; + + if (fep->bufdesc_ex) { + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + ebdp->cbd_esc = BD_ENET_TX_INT; + } + + bdp = fec_enet_get_nextdesc(bdp, fep->bufdesc_ex); + } + + /* Set the last buffer to wrap. */ + bdp = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex); + bdp->cbd_sc |= BD_SC_WRAP; + + return 0; +} + +static int +fec_enet_open(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + int ret; + + napi_enable(&fep->napi); + + /* I should reset the ring buffers here, but I don't yet know + * a simple way to do that. + */ + + ret = fec_enet_alloc_buffers(ndev); + if (ret) + return ret; + + /* Probe and connect to PHY when open the interface */ + ret = fec_enet_mii_probe(ndev); + if (ret) { + fec_enet_free_buffers(ndev); + return ret; + } + phy_start(fep->phy_dev); + netif_start_queue(ndev); + fep->opened = 1; + return 0; +} + +static int +fec_enet_close(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + /* Don't know what to do yet. */ + napi_disable(&fep->napi); + fep->opened = 0; + netif_stop_queue(ndev); + fec_stop(ndev); + + if (fep->phy_dev) { + phy_stop(fep->phy_dev); + phy_disconnect(fep->phy_dev); + } + + fec_enet_free_buffers(ndev); + + return 0; +} + +/* Set or clear the multicast filter for this adaptor. + * Skeleton taken from sunlance driver. + * The CPM Ethernet implementation allows Multicast as well as individual + * MAC address filtering. Some of the drivers check to make sure it is + * a group multicast address, and discard those that are not. I guess I + * will do the same for now, but just remove the test if you want + * individual filtering as well (do the upper net layers want or support + * this kind of feature?). + */ + +#define HASH_BITS 6 /* #bits in hash */ +#define CRC32_POLY 0xEDB88320 + +static void set_multicast_list(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct netdev_hw_addr *ha; + unsigned int i, bit, data, crc, tmp; + unsigned char hash; + + if (ndev->flags & IFF_PROMISC) { + tmp = readl(fep->hwp + FEC_R_CNTRL); + tmp |= 0x8; + writel(tmp, fep->hwp + FEC_R_CNTRL); + return; + } + + tmp = readl(fep->hwp + FEC_R_CNTRL); + tmp &= ~0x8; + writel(tmp, fep->hwp + FEC_R_CNTRL); + + if (ndev->flags & IFF_ALLMULTI) { + /* Catch all multicast addresses, so set the + * filter to all 1's + */ + writel(0xffffffff, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + writel(0xffffffff, fep->hwp + FEC_GRP_HASH_TABLE_LOW); + + return; + } + + /* Clear filter and add the addresses in hash register + */ + writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); + + netdev_for_each_mc_addr(ha, ndev) { + /* calculate crc32 value of mac address */ + crc = 0xffffffff; + + for (i = 0; i < ndev->addr_len; i++) { + data = ha->addr[i]; + for (bit = 0; bit < 8; bit++, data >>= 1) { + crc = (crc >> 1) ^ + (((crc ^ data) & 1) ? CRC32_POLY : 0); + } + } + + /* only upper 6 bits (HASH_BITS) are used + * which point to specific bit in he hash registers + */ + hash = (crc >> (32 - HASH_BITS)) & 0x3f; + + if (hash > 31) { + tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + tmp |= 1 << (hash - 32); + writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + } else { + tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW); + tmp |= 1 << hash; + writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW); + } + } +} + +/* Set a MAC change in hardware. */ +static int +fec_set_mac_address(struct net_device *ndev, void *p) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + + writel(ndev->dev_addr[3] | (ndev->dev_addr[2] << 8) | + (ndev->dev_addr[1] << 16) | (ndev->dev_addr[0] << 24), + fep->hwp + FEC_ADDR_LOW); + writel((ndev->dev_addr[5] << 16) | (ndev->dev_addr[4] << 24), + fep->hwp + FEC_ADDR_HIGH); + return 0; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +/** + * fec_poll_controller - FEC Poll controller function + * @dev: The FEC network adapter + * + * Polled functionality used by netconsole and others in non interrupt mode + * + */ +static void fec_poll_controller(struct net_device *dev) +{ + int i; + struct fec_enet_private *fep = netdev_priv(dev); + + for (i = 0; i < FEC_IRQ_NUM; i++) { + if (fep->irq[i] > 0) { + disable_irq(fep->irq[i]); + fec_enet_interrupt(fep->irq[i], dev); + enable_irq(fep->irq[i]); + } + } +} +#endif + +static const struct net_device_ops fec_netdev_ops = { + .ndo_open = fec_enet_open, + .ndo_stop = fec_enet_close, + .ndo_start_xmit = fec_enet_start_xmit, + .ndo_set_rx_mode = set_multicast_list, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_tx_timeout = fec_timeout, + .ndo_set_mac_address = fec_set_mac_address, + .ndo_do_ioctl = fec_enet_ioctl, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = fec_poll_controller, +#endif +}; + + /* + * XXX: We need to clean up on failure exits here. + * + */ +static int fec_enet_init(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + struct bufdesc *cbd_base; + + /* Allocate memory for buffer descriptors. */ + cbd_base = dma_alloc_coherent(NULL, PAGE_SIZE, &fep->bd_dma, + GFP_KERNEL); + if (!cbd_base) + return -ENOMEM; + + memset(cbd_base, 0, PAGE_SIZE); + spin_lock_init(&fep->hw_lock); + + fep->netdev = ndev; + + /* Get the Ethernet address */ + fec_get_mac(ndev); + + /* Set receive and transmit descriptor base. */ + fep->rx_bd_base = cbd_base; + if (fep->bufdesc_ex) + fep->tx_bd_base = (struct bufdesc *) + (((struct bufdesc_ex *)cbd_base) + RX_RING_SIZE); + else + fep->tx_bd_base = cbd_base + RX_RING_SIZE; + + /* The FEC Ethernet specific entries in the device structure */ + ndev->watchdog_timeo = TX_TIMEOUT; + ndev->netdev_ops = &fec_netdev_ops; + ndev->ethtool_ops = &fec_enet_ethtool_ops; + + writel(FEC_RX_DISABLED_IMASK, fep->hwp + FEC_IMASK); + netif_napi_add(ndev, &fep->napi, fec_enet_rx_napi, FEC_NAPI_WEIGHT); + + fec_restart(ndev, 0); + + return 0; +} + +#ifdef CONFIG_OF +static void fec_reset_phy(struct platform_device *pdev) +{ + int err, phy_reset; + int msec = 1; + struct device_node *np = pdev->dev.of_node; + + if (!np) + return; + + of_property_read_u32(np, "phy-reset-duration", &msec); + /* A sane reset duration should not be longer than 1s */ + if (msec > 1000) + msec = 1; + + phy_reset = of_get_named_gpio(np, "phy-reset-gpios", 0); + if (!gpio_is_valid(phy_reset)) + return; + + err = devm_gpio_request_one(&pdev->dev, phy_reset, + GPIOF_OUT_INIT_LOW, "phy-reset"); + if (err) { + dev_err(&pdev->dev, "failed to get phy-reset-gpios: %d\n", err); + return; + } + msleep(msec); + gpio_set_value(phy_reset, 1); +} +#else /* CONFIG_OF */ +static void fec_reset_phy(struct platform_device *pdev) +{ + /* + * In case of platform probe, the reset has been done + * by machine code. + */ +} +#endif /* CONFIG_OF */ + +static int +fec_probe(struct platform_device *pdev) +{ + struct fec_enet_private *fep; + struct fec_platform_data *pdata; + struct net_device *ndev; + int i, irq, ret = 0; + struct resource *r; + const struct of_device_id *of_id; + static int dev_id; + struct pinctrl *pinctrl; + struct regulator *reg_phy; + + of_id = of_match_device(fec_dt_ids, &pdev->dev); + if (of_id) + pdev->id_entry = of_id->data; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) + return -ENXIO; + + /* Init network device */ + ndev = alloc_etherdev(sizeof(struct fec_enet_private)); + if (!ndev) + return -ENOMEM; + + SET_NETDEV_DEV(ndev, &pdev->dev); + + /* setup board info structure */ + fep = netdev_priv(ndev); + + /* default enable pause frame auto negotiation */ + if (pdev->id_entry && + (pdev->id_entry->driver_data & FEC_QUIRK_HAS_GBIT)) + fep->pause_flag |= FEC_PAUSE_FLAG_AUTONEG; + + fep->hwp = devm_request_and_ioremap(&pdev->dev, r); + fep->pdev = pdev; + fep->dev_id = dev_id++; + + fep->bufdesc_ex = 0; + + if (!fep->hwp) { + ret = -ENOMEM; + goto failed_ioremap; + } + + platform_set_drvdata(pdev, ndev); + + ret = of_get_phy_mode(pdev->dev.of_node); + if (ret < 0) { + pdata = pdev->dev.platform_data; + if (pdata) + fep->phy_interface = pdata->phy; + else + fep->phy_interface = PHY_INTERFACE_MODE_MII; + } else { + fep->phy_interface = ret; + } + + pinctrl = devm_pinctrl_get_select_default(&pdev->dev); + if (IS_ERR(pinctrl)) { + ret = PTR_ERR(pinctrl); + goto failed_pin; + } + + fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(fep->clk_ipg)) { + ret = PTR_ERR(fep->clk_ipg); + goto failed_clk; + } + + fep->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(fep->clk_ahb)) { + ret = PTR_ERR(fep->clk_ahb); + goto failed_clk; + } + + fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp"); + fep->bufdesc_ex = + pdev->id_entry->driver_data & FEC_QUIRK_HAS_BUFDESC_EX; + if (IS_ERR(fep->clk_ptp)) { + ret = PTR_ERR(fep->clk_ptp); + fep->bufdesc_ex = 0; + } + + clk_prepare_enable(fep->clk_ahb); + clk_prepare_enable(fep->clk_ipg); + if (!IS_ERR(fep->clk_ptp)) + clk_prepare_enable(fep->clk_ptp); + + reg_phy = devm_regulator_get(&pdev->dev, "phy"); + if (!IS_ERR(reg_phy)) { + ret = regulator_enable(reg_phy); + if (ret) { + dev_err(&pdev->dev, + "Failed to enable phy regulator: %d\n", ret); + goto failed_regulator; + } + } + + fec_reset_phy(pdev); + + if (fep->bufdesc_ex) + fec_ptp_init(ndev, pdev); + + ret = fec_enet_init(ndev); + if (ret) + goto failed_init; + + for (i = 0; i < FEC_IRQ_NUM; i++) { + irq = platform_get_irq(pdev, i); + if (irq < 0) { + if (i) + break; + ret = irq; + goto failed_irq; + } + ret = request_irq(irq, fec_enet_interrupt, IRQF_DISABLED, pdev->name, ndev); + if (ret) { + while (--i >= 0) { + irq = platform_get_irq(pdev, i); + free_irq(irq, ndev); + } + goto failed_irq; + } + } + + ret = fec_enet_mii_init(pdev); + if (ret) + goto failed_mii_init; + + /* Carrier starts down, phylib will bring it up */ + netif_carrier_off(ndev); + + ret = register_netdev(ndev); + if (ret) + goto failed_register; + + if (fep->bufdesc_ex && fep->ptp_clock) + netdev_info(ndev, "registered PHC device %d\n", fep->dev_id); + + return 0; + +failed_register: + fec_enet_mii_remove(fep); +failed_mii_init: +failed_init: + for (i = 0; i < FEC_IRQ_NUM; i++) { + irq = platform_get_irq(pdev, i); + if (irq > 0) + free_irq(irq, ndev); + } +failed_irq: +failed_regulator: + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); + if (!IS_ERR(fep->clk_ptp)) + clk_disable_unprepare(fep->clk_ptp); +failed_pin: +failed_clk: +failed_ioremap: + free_netdev(ndev); + + return ret; +} + +static int +fec_drv_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct fec_enet_private *fep = netdev_priv(ndev); + int i; + + unregister_netdev(ndev); + fec_enet_mii_remove(fep); + del_timer_sync(&fep->time_keep); + clk_disable_unprepare(fep->clk_ptp); + if (fep->ptp_clock) + ptp_clock_unregister(fep->ptp_clock); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); + for (i = 0; i < FEC_IRQ_NUM; i++) { + int irq = platform_get_irq(pdev, i); + if (irq > 0) + free_irq(irq, ndev); + } + free_netdev(ndev); + + platform_set_drvdata(pdev, NULL); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int +fec_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + if (netif_running(ndev)) { + fec_stop(ndev); + netif_device_detach(ndev); + } + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); + + return 0; +} + +static int +fec_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct fec_enet_private *fep = netdev_priv(ndev); + + clk_prepare_enable(fep->clk_ahb); + clk_prepare_enable(fep->clk_ipg); + if (netif_running(ndev)) { + fec_restart(ndev, fep->full_duplex); + netif_device_attach(ndev); + } + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(fec_pm_ops, fec_suspend, fec_resume); + +static struct platform_driver fec_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + .pm = &fec_pm_ops, + .of_match_table = fec_dt_ids, + }, + .id_table = fec_devtype, + .probe = fec_probe, + .remove = fec_drv_remove, +}; + +module_platform_driver(fec_driver); + +MODULE_LICENSE("GPL"); diff --cc drivers/net/ethernet/intel/igb/igb.h index c92115e71ebe,ab577a763a20..9d6c075e232d --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@@ -298,20 -288,13 +297,13 @@@ enum e1000_ring_flags_t IGB_RING_FLAG_TX_DETECT_HANG }; - #define ring_uses_build_skb(ring) \ - test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) - #define set_ring_build_skb_enabled(ring) \ - set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) - #define clear_ring_build_skb_enabled(ring) \ - clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) - #define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) -#define IGB_RX_DESC(R, i) \ +#define IGB_RX_DESC(R, i) \ (&(((union e1000_adv_rx_desc *)((R)->desc))[i])) -#define IGB_TX_DESC(R, i) \ +#define IGB_TX_DESC(R, i) \ (&(((union e1000_adv_tx_desc *)((R)->desc))[i])) -#define IGB_TX_CTXTDESC(R, i) \ +#define IGB_TX_CTXTDESC(R, i) \ (&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i])) /* igb_test_staterr - tests bits within Rx descriptor status and error fields */ diff --cc drivers/net/ethernet/intel/igb/igb_main.c index 9bf08b977daa,64f75291e3a5..dcaa35481dd7 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@@ -3387,25 -3350,11 +3387,11 @@@ void igb_configure_rx_ring(struct igb_a wr32(E1000_RXDCTL(reg_idx), rxdctl); } - static void igb_set_rx_buffer_len(struct igb_adapter *adapter, - struct igb_ring *rx_ring) - { - #define IGB_MAX_BUILD_SKB_SIZE \ - (SKB_WITH_OVERHEAD(IGB_RX_BUFSZ) - \ - (NET_SKB_PAD + NET_IP_ALIGN + IGB_TS_HDR_LEN)) - - /* set build_skb flag */ - if (adapter->max_frame_size <= IGB_MAX_BUILD_SKB_SIZE) - set_ring_build_skb_enabled(rx_ring); - else - clear_ring_build_skb_enabled(rx_ring); - } - /** - * igb_configure_rx - Configure receive Unit after Reset - * @adapter: board private structure + * igb_configure_rx - Configure receive Unit after Reset + * @adapter: board private structure * - * Configure the Rx unit of the MAC after a reset. + * Configure the Rx unit of the MAC after a reset. **/ static void igb_configure_rx(struct igb_adapter *adapter) { @@@ -3416,16 -3365,12 +3402,13 @@@ /* set the correct pool for the PF default MAC address in entry 0 */ igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, - adapter->vfs_allocated_count); + adapter->vfs_allocated_count); /* Setup the HW Rx Head and Tail Descriptor Pointers and - * the Base and Length of the Rx Descriptor Ring */ + * the Base and Length of the Rx Descriptor Ring + */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct igb_ring *rx_ring = adapter->rx_ring[i]; - igb_set_rx_buffer_len(adapter, rx_ring); - igb_configure_rx_ring(adapter, rx_ring); - } + for (i = 0; i < adapter->num_rx_queues; i++) + igb_configure_rx_ring(adapter, adapter->rx_ring[i]); } /** @@@ -6808,17 -6688,9 +6716,9 @@@ static bool igb_alloc_mapped_page(struc return true; } - static inline unsigned int igb_rx_offset(struct igb_ring *rx_ring) - { - if (ring_uses_build_skb(rx_ring)) - return NET_SKB_PAD + NET_IP_ALIGN; - else - return 0; - } - /** - * igb_alloc_rx_buffers - Replace used receive buffers; packet split - * @adapter: address of board private structure + * igb_alloc_rx_buffers - Replace used receive buffers; packet split + * @adapter: address of board private structure **/ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) { @@@ -6838,12 -6710,11 +6738,10 @@@ if (!igb_alloc_mapped_page(rx_ring, bi)) break; - /* - * Refresh the desc even if buffer_addrs didn't change + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + - bi->page_offset + - igb_rx_offset(rx_ring)); + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; diff --cc include/net/scm.h index 5a4c6a9eb122,b11708105681..8de2d37d2077 --- a/include/net/scm.h +++ b/include/net/scm.h @@@ -75,10 -81,8 +75,10 @@@ static __inline__ int scm_send(struct s struct scm_cookie *scm, bool forcecreds) { memset(scm, 0, sizeof(*scm)); + scm->creds.uid = INVALID_UID; + scm->creds.gid = INVALID_GID; if (forcecreds) - scm_set_cred(scm, task_tgid(current), current_euid(), current_egid()); - scm_set_cred(scm, task_tgid(current), current_cred()); ++ scm_set_cred(scm, task_tgid(current), current_uid(), current_gid()); unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; diff --cc net/batman-adv/main.c index 6277735cd89e,fa563e497c48..3e30a0f1b908 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@@ -177,7 -169,7 +177,13 @@@ void batadv_mesh_free(struct net_devic atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } - int batadv_is_my_mac(const uint8_t *addr) ++/** ++ * batadv_is_my_mac - check if the given mac address belongs to any of the real ++ * interfaces in the current mesh ++ * @bat_priv: the bat priv with all the soft interface information ++ * @addr: the address to check ++ */ + int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; diff --cc net/batman-adv/network-coding.c index 6b9a54485314,000000000000..f7c54305a918 mode 100644,000000..100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@@ -1,1821 -1,0 +1,1822 @@@ +/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll, Jeppe Ledet-Pedersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include + +#include "main.h" +#include "hash.h" +#include "network-coding.h" +#include "send.h" +#include "originator.h" +#include "hard-interface.h" +#include "routing.h" + +static struct lock_class_key batadv_nc_coding_hash_lock_class_key; +static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; + +static void batadv_nc_worker(struct work_struct *work); +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); + +/** + * batadv_nc_start_timer - initialise the nc periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_start_timer(struct batadv_priv *bat_priv) +{ + queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work, + msecs_to_jiffies(10)); +} + +/** + * batadv_nc_init - initialise coding hash table and start house keeping + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init(struct batadv_priv *bat_priv) +{ + bat_priv->nc.timestamp_fwd_flush = jiffies; + bat_priv->nc.timestamp_sniffed_purge = jiffies; + + if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash) + return 0; + + bat_priv->nc.coding_hash = batadv_hash_new(128); + if (!bat_priv->nc.coding_hash) + goto err; + + batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + &batadv_nc_coding_hash_lock_class_key); + + bat_priv->nc.decoding_hash = batadv_hash_new(128); + if (!bat_priv->nc.decoding_hash) + goto err; + + batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + &batadv_nc_decoding_hash_lock_class_key); + + /* Register our packet type */ + if (batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet) < 0) + goto err; + + INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); + batadv_nc_start_timer(bat_priv); + + return 0; + +err: + return -ENOMEM; +} + +/** + * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) +{ + atomic_set(&bat_priv->network_coding, 1); + bat_priv->nc.min_tq = 200; + bat_priv->nc.max_fwd_delay = 10; + bat_priv->nc.max_buffer_time = 200; +} + +/** + * batadv_nc_init_orig - initialise the nc fields of an orig_node + * @orig_node: the orig_node which is going to be initialised + */ +void batadv_nc_init_orig(struct batadv_orig_node *orig_node) +{ + INIT_LIST_HEAD(&orig_node->in_coding_list); + INIT_LIST_HEAD(&orig_node->out_coding_list); + spin_lock_init(&orig_node->in_coding_list_lock); + spin_lock_init(&orig_node->out_coding_list_lock); +} + +/** + * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove + * its refcount on the orig_node + * @rcu: rcu pointer of the nc node + */ +static void batadv_nc_node_free_rcu(struct rcu_head *rcu) +{ + struct batadv_nc_node *nc_node; + + nc_node = container_of(rcu, struct batadv_nc_node, rcu); + batadv_orig_node_free_ref(nc_node->orig_node); + kfree(nc_node); +} + +/** + * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly + * frees it + * @nc_node: the nc node to free + */ +static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) +{ + if (atomic_dec_and_test(&nc_node->refcount)) + call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); +} + +/** + * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly + * frees it + * @nc_path: the nc node to free + */ +static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) +{ + if (atomic_dec_and_test(&nc_path->refcount)) + kfree_rcu(nc_path, rcu); +} + +/** + * batadv_nc_packet_free - frees nc packet + * @nc_packet: the nc packet to free + */ +static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) +{ + if (nc_packet->skb) + kfree_skb(nc_packet->skb); + + batadv_nc_path_free_ref(nc_packet->nc_path); + kfree(nc_packet); +} + +/** + * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged + * @bat_priv: the bat priv with all the soft interface information + * @nc_node: the nc node to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, + struct batadv_nc_node *nc_node) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT); +} + +/** + * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + /* purge the path when no packets has been added for 10 times the + * max_fwd_delay time + */ + return batadv_has_timed_out(nc_path->last_valid, + bat_priv->nc.max_fwd_delay * 10); +} + +/** + * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + /* purge the path when no packets has been added for 10 times the + * max_buffer time + */ + return batadv_has_timed_out(nc_path->last_valid, + bat_priv->nc.max_buffer_time*10); +} + +/** + * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale + * entries + * @bat_priv: the bat priv with all the soft interface information + * @list: list of nc nodes + * @lock: nc node list lock + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true if the entry has to be deleted, false + * otherwise + */ +static void +batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, + struct list_head *list, + spinlock_t *lock, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + struct batadv_nc_node *nc_node, *nc_node_tmp; + + /* For each nc_node in list */ + spin_lock_bh(lock); + list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(bat_priv, nc_node)) + continue; + + batadv_dbg(BATADV_DBG_NC, bat_priv, + "Removing nc_node %pM -> %pM\n", + nc_node->addr, nc_node->orig_node->orig); + list_del_rcu(&nc_node->list); + batadv_nc_node_free_ref(nc_node); + } + spin_unlock_bh(lock); +} + +/** + * batadv_nc_purge_orig - purges all nc node data attached of the given + * originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig_node with the nc node entries to be purged + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true is the entry has to be deleted, false + * otherwise + */ +void batadv_nc_purge_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + /* Check ingoing nc_node's of this orig_node */ + batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list, + &orig_node->in_coding_list_lock, + to_purge); + + /* Check outgoing nc_node's of this orig_node */ + batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list, + &orig_node->out_coding_list_lock, + to_purge); +} + +/** + * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they + * have timed out nc nodes + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + uint32_t i; + + if (!hash) + return; + + /* For each orig_node */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) + batadv_nc_purge_orig(bat_priv, orig_node, + batadv_nc_to_purge_nc_node); + rcu_read_unlock(); + } +} + +/** + * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove + * unused ones + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc paths to check + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true is the entry has to be deleted, false + * otherwise + */ +static void batadv_nc_purge_paths(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_path *)) +{ + struct hlist_head *head; + struct hlist_node *node_tmp; + struct batadv_nc_path *nc_path; + spinlock_t *lock; /* Protects lists in hash */ + uint32_t i; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + lock = &hash->list_locks[i]; + + /* For each nc_path in this bin */ + spin_lock_bh(lock); + hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(bat_priv, nc_path)) + continue; + + /* purging an non-empty nc_path should never happen, but + * is observed under high CPU load. Delay the purging + * until next iteration to allow the packet_list to be + * emptied first. + */ + if (!unlikely(list_empty(&nc_path->packet_list))) { + net_ratelimited_function(printk, + KERN_WARNING + "Skipping free of non-empty nc_path (%pM -> %pM)!\n", + nc_path->prev_hop, + nc_path->next_hop); + continue; + } + + /* nc_path is unused, so remove it */ + batadv_dbg(BATADV_DBG_NC, bat_priv, + "Remove nc_path %pM -> %pM\n", + nc_path->prev_hop, nc_path->next_hop); + hlist_del_rcu(&nc_path->hash_entry); + batadv_nc_path_free_ref(nc_path); + } + spin_unlock_bh(lock); + } +} + +/** + * batadv_nc_hash_key_gen - computes the nc_path hash key + * @key: buffer to hold the final hash key + * @src: source ethernet mac address going into the hash key + * @dst: destination ethernet mac address going into the hash key + */ +static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, + const char *dst) +{ + memcpy(key->prev_hop, src, sizeof(key->prev_hop)); + memcpy(key->next_hop, dst, sizeof(key->next_hop)); +} + +/** + * batadv_nc_hash_choose - compute the hash value for an nc path + * @data: data to hash + * @size: size of the hash table + * + * Returns the selected index in the hash table for the given data. + */ +static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size) +{ + const struct batadv_nc_path *nc_path = data; + uint32_t hash = 0; + + hash = batadv_hash_bytes(hash, &nc_path->prev_hop, + sizeof(nc_path->prev_hop)); + hash = batadv_hash_bytes(hash, &nc_path->next_hop, + sizeof(nc_path->next_hop)); + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +/** + * batadv_nc_hash_compare - comparing function used in the network coding hash + * tables + * @node: node in the local table + * @data2: second object to compare the node to + * + * Returns 1 if the two entry are the same, 0 otherwise + */ +static int batadv_nc_hash_compare(const struct hlist_node *node, + const void *data2) +{ + const struct batadv_nc_path *nc_path1, *nc_path2; + + nc_path1 = container_of(node, struct batadv_nc_path, hash_entry); + nc_path2 = data2; + + /* Return 1 if the two keys are identical */ + if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop, + sizeof(nc_path1->prev_hop)) != 0) + return 0; + + if (memcmp(nc_path1->next_hop, nc_path2->next_hop, + sizeof(nc_path1->next_hop)) != 0) + return 0; + + return 1; +} + +/** + * batadv_nc_hash_find - search for an existing nc path and return it + * @hash: hash table containing the nc path + * @data: search key + * + * Returns the nc_path if found, NULL otherwise. + */ +static struct batadv_nc_path * +batadv_nc_hash_find(struct batadv_hashtable *hash, + void *data) +{ + struct hlist_head *head; + struct batadv_nc_path *nc_path, *nc_path_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = batadv_nc_hash_choose(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, head, hash_entry) { + if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) + continue; + + if (!atomic_inc_not_zero(&nc_path->refcount)) + continue; + + nc_path_tmp = nc_path; + break; + } + rcu_read_unlock(); + + return nc_path_tmp; +} + +/** + * batadv_nc_send_packet - send non-coded packet and free nc_packet struct + * @nc_packet: the nc packet to send + */ +static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) +{ + batadv_send_skb_packet(nc_packet->skb, + nc_packet->neigh_node->if_incoming, + nc_packet->nc_path->next_hop); + nc_packet->skb = NULL; + batadv_nc_packet_free(nc_packet); +} + +/** + * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given sniffed (overheard) nc_packet has hit its buffering + * timeout. If so, the packet is no longer kept and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path, + struct batadv_nc_packet *nc_packet) +{ + unsigned long timeout = bat_priv->nc.max_buffer_time; + bool res = false; + + /* Packets are added to tail, so the remaining packets did not time + * out and we can stop processing the current queue + */ + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && + !batadv_has_timed_out(nc_packet->timestamp, timeout)) + goto out; + + /* purge nc packet */ + list_del(&nc_packet->list); + batadv_nc_packet_free(nc_packet); + + res = true; + +out: + return res; +} + +/** + * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given nc packet has hit its forward timeout. If so, the + * packet is no longer delayed, immediately sent and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path, + struct batadv_nc_packet *nc_packet) +{ + unsigned long timeout = bat_priv->nc.max_fwd_delay; + + /* Packets are added to tail, so the remaining packets did not time + * out and we can stop processing the current queue + */ + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && + !batadv_has_timed_out(nc_packet->timestamp, timeout)) + return false; + + /* Send packet */ + batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); + batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, + nc_packet->skb->len + ETH_HLEN); + list_del(&nc_packet->list); + batadv_nc_send_packet(nc_packet); + + return true; +} + +/** + * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out + * nc packets + * @bat_priv: the bat priv with all the soft interface information + * @hash: to be processed hash table + * @process_fn: Function called to process given nc packet. Should return true + * to encourage this function to proceed with the next packet. + * Otherwise the rest of the current queue is skipped. + */ +static void +batadv_nc_process_nc_paths(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + bool (*process_fn)(struct batadv_priv *, + struct batadv_nc_path *, + struct batadv_nc_packet *)) +{ + struct hlist_head *head; + struct batadv_nc_packet *nc_packet, *nc_packet_tmp; + struct batadv_nc_path *nc_path; + bool ret; + int i; + + if (!hash) + return; + + /* Loop hash table bins */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + /* Loop coding paths */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, head, hash_entry) { + /* Loop packets */ + spin_lock_bh(&nc_path->packet_list_lock); + list_for_each_entry_safe(nc_packet, nc_packet_tmp, + &nc_path->packet_list, list) { + ret = process_fn(bat_priv, nc_path, nc_packet); + if (!ret) + break; + } + spin_unlock_bh(&nc_path->packet_list_lock); + } + rcu_read_unlock(); + } +} + +/** + * batadv_nc_worker - periodic task for house keeping related to network coding + * @work: kernel work struct + */ +static void batadv_nc_worker(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_priv_nc *priv_nc; + struct batadv_priv *bat_priv; + unsigned long timeout; + + delayed_work = container_of(work, struct delayed_work, work); + priv_nc = container_of(delayed_work, struct batadv_priv_nc, work); + bat_priv = container_of(priv_nc, struct batadv_priv, nc); + + batadv_nc_purge_orig_hash(bat_priv); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, + batadv_nc_to_purge_nc_path_coding); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, + batadv_nc_to_purge_nc_path_decoding); + + timeout = bat_priv->nc.max_fwd_delay; + + if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) { + batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash, + batadv_nc_fwd_flush); + bat_priv->nc.timestamp_fwd_flush = jiffies; + } + + if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge, + bat_priv->nc.max_buffer_time)) { + batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash, + batadv_nc_sniffed_purge); + bat_priv->nc.timestamp_sniffed_purge = jiffies; + } + + /* Schedule a new check */ + batadv_nc_start_timer(bat_priv); +} + +/** + * batadv_can_nc_with_orig - checks whether the given orig node is suitable for + * coding or not + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: neighboring orig node which may be used as nc candidate + * @ogm_packet: incoming ogm packet also used for the checks + * + * Returns true if: + * 1) The OGM must have the most recent sequence number. + * 2) The TTL must be decremented by one and only one. + * 3) The OGM must be received from the first hop from orig_node. + * 4) The TQ value of the OGM must be above bat_priv->nc.min_tq. + */ +static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_ogm_packet *ogm_packet) +{ + if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) + return false; + if (orig_node->last_ttl != ogm_packet->header.ttl + 1) + return false; + if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) + return false; + if (ogm_packet->tq < bat_priv->nc.min_tq) + return false; + + return true; +} + +/** + * batadv_nc_find_nc_node - search for an existing nc node and return it + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found, NULL otherwise. + */ +static struct batadv_nc_node +*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) +{ + struct batadv_nc_node *nc_node, *nc_node_out = NULL; + struct list_head *list; + + if (in_coding) + list = &orig_neigh_node->in_coding_list; + else + list = &orig_neigh_node->out_coding_list; + + /* Traverse list of nc_nodes to orig_node */ + rcu_read_lock(); + list_for_each_entry_rcu(nc_node, list, list) { + if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) + continue; + + if (!atomic_inc_not_zero(&nc_node->refcount)) + continue; + + /* Found a match */ + nc_node_out = nc_node; + break; + } + rcu_read_unlock(); + + return nc_node_out; +} + +/** + * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was + * not found + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found or created, NULL in case of an error. + */ +static struct batadv_nc_node +*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) +{ + struct batadv_nc_node *nc_node; + spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ + struct list_head *list; + + /* Check if nc_node is already added */ + nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding); + + /* Node found */ + if (nc_node) + return nc_node; + + nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); + if (!nc_node) + return NULL; + + if (!atomic_inc_not_zero(&orig_neigh_node->refcount)) + goto free; + + /* Initialize nc_node */ + INIT_LIST_HEAD(&nc_node->list); + memcpy(nc_node->addr, orig_node->orig, ETH_ALEN); + nc_node->orig_node = orig_neigh_node; + atomic_set(&nc_node->refcount, 2); + + /* Select ingoing or outgoing coding node */ + if (in_coding) { + lock = &orig_neigh_node->in_coding_list_lock; + list = &orig_neigh_node->in_coding_list; + } else { + lock = &orig_neigh_node->out_coding_list_lock; + list = &orig_neigh_node->out_coding_list; + } + + batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", + nc_node->addr, nc_node->orig_node->orig); + + /* Add nc_node to orig_node */ + spin_lock_bh(lock); + list_add_tail_rcu(&nc_node->list, list); + spin_unlock_bh(lock); + + return nc_node; + +free: + kfree(nc_node); + return NULL; +} + +/** + * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs + * (best called on incoming OGMs) + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @ogm_packet: incoming ogm packet + * @is_single_hop_neigh: orig_node is a single hop neighbor + */ +void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *ogm_packet, + int is_single_hop_neigh) +{ + struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* accept ogms from 'good' neighbors and single hop neighbors */ + if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) && + !is_single_hop_neigh) + goto out; + + /* Add orig_node as in_nc_node on hop */ + in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node, + orig_neigh_node, true); + if (!in_nc_node) + goto out; + + in_nc_node->last_seen = jiffies; + + /* Add hop as out_nc_node on orig_node */ + out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node, + orig_node, false); + if (!out_nc_node) + goto out; + + out_nc_node->last_seen = jiffies; + +out: + if (in_nc_node) + batadv_nc_node_free_ref(in_nc_node); + if (out_nc_node) + batadv_nc_node_free_ref(out_nc_node); +} + +/** + * batadv_nc_get_path - get existing nc_path or allocate a new one + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc path + * @src: ethernet source address - first half of the nc path search key + * @dst: ethernet destination address - second half of the nc path search key + * + * Returns pointer to nc_path if the path was found or created, returns NULL + * on error. + */ +static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + uint8_t *src, + uint8_t *dst) +{ + int hash_added; + struct batadv_nc_path *nc_path, nc_path_key; + + batadv_nc_hash_key_gen(&nc_path_key, src, dst); + + /* Search for existing nc_path */ + nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key); + + if (nc_path) { + /* Set timestamp to delay removal of nc_path */ + nc_path->last_valid = jiffies; + return nc_path; + } + + /* No existing nc_path was found; create a new */ + nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC); + + if (!nc_path) + return NULL; + + /* Initialize nc_path */ + INIT_LIST_HEAD(&nc_path->packet_list); + spin_lock_init(&nc_path->packet_list_lock); + atomic_set(&nc_path->refcount, 2); + nc_path->last_valid = jiffies; + memcpy(nc_path->next_hop, dst, ETH_ALEN); + memcpy(nc_path->prev_hop, src, ETH_ALEN); + + batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n", + nc_path->prev_hop, + nc_path->next_hop); + + /* Add nc_path to hash table */ + hash_added = batadv_hash_add(hash, batadv_nc_hash_compare, + batadv_nc_hash_choose, &nc_path_key, + &nc_path->hash_entry); + + if (hash_added < 0) { + kfree(nc_path); + return NULL; + } + + return nc_path; +} + +/** + * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair + * selection of a receiver with slightly lower TQ than the other + * @tq: to be weighted tq value + */ +static uint8_t batadv_nc_random_weight_tq(uint8_t tq) +{ + uint8_t rand_val, rand_tq; + + get_random_bytes(&rand_val, sizeof(rand_val)); + + /* randomize the estimated packet loss (max TQ - estimated TQ) */ + rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq); + + /* normalize the randomized packet loss */ + rand_tq /= BATADV_TQ_MAX_VALUE; + + /* convert to (randomized) estimated tq again */ + return BATADV_TQ_MAX_VALUE - rand_tq; +} + +/** + * batadv_nc_memxor - XOR destination with source + * @dst: byte array to XOR into + * @src: byte array to XOR from + * @len: length of destination array + */ +static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len; ++i) + dst[i] ^= src[i]; +} + +/** + * batadv_nc_code_packets - code a received unicast_packet with an nc packet + * into a coded_packet and send it + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @ethhdr: pointer to the ethernet header inside the skb + * @nc_packet: structure containing the packet to the skb can be coded with + * @neigh_node: next hop to forward packet to + * + * Returns true if both packets are consumed, false otherwise. + */ +static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct ethhdr *ethhdr, + struct batadv_nc_packet *nc_packet, + struct batadv_neigh_node *neigh_node) +{ + uint8_t tq_weighted_neigh, tq_weighted_coding; + struct sk_buff *skb_dest, *skb_src; + struct batadv_unicast_packet *packet1; + struct batadv_unicast_packet *packet2; + struct batadv_coded_packet *coded_packet; + struct batadv_neigh_node *neigh_tmp, *router_neigh; + struct batadv_neigh_node *router_coding = NULL; + uint8_t *first_source, *first_dest, *second_source, *second_dest; + __be32 packet_id1, packet_id2; + size_t count; + bool res = false; + int coding_len; + int unicast_size = sizeof(*packet1); + int coded_size = sizeof(*coded_packet); + int header_add = coded_size - unicast_size; + + router_neigh = batadv_orig_node_get_router(neigh_node->orig_node); + if (!router_neigh) + goto out; + + neigh_tmp = nc_packet->neigh_node; + router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node); + if (!router_coding) + goto out; + + tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg); + tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg); + + /* Select one destination for the MAC-header dst-field based on + * weighted TQ-values. + */ + if (tq_weighted_neigh >= tq_weighted_coding) { + /* Destination from nc_packet is selected for MAC-header */ + first_dest = nc_packet->nc_path->next_hop; + first_source = nc_packet->nc_path->prev_hop; + second_dest = neigh_node->addr; + second_source = ethhdr->h_source; + packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data; + packet2 = (struct batadv_unicast_packet *)skb->data; + packet_id1 = nc_packet->packet_id; + packet_id2 = batadv_skb_crc32(skb, + skb->data + sizeof(*packet2)); + } else { + /* Destination for skb is selected for MAC-header */ + first_dest = neigh_node->addr; + first_source = ethhdr->h_source; + second_dest = nc_packet->nc_path->next_hop; + second_source = nc_packet->nc_path->prev_hop; + packet1 = (struct batadv_unicast_packet *)skb->data; + packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data; + packet_id1 = batadv_skb_crc32(skb, + skb->data + sizeof(*packet1)); + packet_id2 = nc_packet->packet_id; + } + + /* Instead of zero padding the smallest data buffer, we + * code into the largest. + */ + if (skb->len <= nc_packet->skb->len) { + skb_dest = nc_packet->skb; + skb_src = skb; + } else { + skb_dest = skb; + skb_src = nc_packet->skb; + } + + /* coding_len is used when decoding the packet shorter packet */ + coding_len = skb_src->len - unicast_size; + + if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0) + goto out; + + skb_push(skb_dest, header_add); + + coded_packet = (struct batadv_coded_packet *)skb_dest->data; + skb_reset_mac_header(skb_dest); + + coded_packet->header.packet_type = BATADV_CODED; + coded_packet->header.version = BATADV_COMPAT_VERSION; + coded_packet->header.ttl = packet1->header.ttl; + + /* Info about first unicast packet */ + memcpy(coded_packet->first_source, first_source, ETH_ALEN); + memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN); + coded_packet->first_crc = packet_id1; + coded_packet->first_ttvn = packet1->ttvn; + + /* Info about second unicast packet */ + memcpy(coded_packet->second_dest, second_dest, ETH_ALEN); + memcpy(coded_packet->second_source, second_source, ETH_ALEN); + memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); + coded_packet->second_crc = packet_id2; + coded_packet->second_ttl = packet2->header.ttl; + coded_packet->second_ttvn = packet2->ttvn; + coded_packet->coded_len = htons(coding_len); + + /* This is where the magic happens: Code skb_src into skb_dest */ + batadv_nc_memxor(skb_dest->data + coded_size, + skb_src->data + unicast_size, coding_len); + + /* Update counters accordingly */ + if (BATADV_SKB_CB(skb_src)->decoded && + BATADV_SKB_CB(skb_dest)->decoded) { + /* Both packets are recoded */ + count = skb_src->len + ETH_HLEN; + count += skb_dest->len + ETH_HLEN; + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count); + } else if (!BATADV_SKB_CB(skb_src)->decoded && + !BATADV_SKB_CB(skb_dest)->decoded) { + /* Both packets are newly coded */ + count = skb_src->len + ETH_HLEN; + count += skb_dest->len + ETH_HLEN; + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count); + } else if (BATADV_SKB_CB(skb_src)->decoded && + !BATADV_SKB_CB(skb_dest)->decoded) { + /* skb_src recoded and skb_dest is newly coded */ + batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, + skb_src->len + ETH_HLEN); + batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, + skb_dest->len + ETH_HLEN); + } else if (!BATADV_SKB_CB(skb_src)->decoded && + BATADV_SKB_CB(skb_dest)->decoded) { + /* skb_src is newly coded and skb_dest is recoded */ + batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, + skb_src->len + ETH_HLEN); + batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, + skb_dest->len + ETH_HLEN); + } + + /* skb_src is now coded into skb_dest, so free it */ + kfree_skb(skb_src); + + /* avoid duplicate free of skb from nc_packet */ + nc_packet->skb = NULL; + batadv_nc_packet_free(nc_packet); + + /* Send the coded packet and return true */ + batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest); + res = true; +out: + if (router_neigh) + batadv_neigh_node_free_ref(router_neigh); + if (router_coding) + batadv_neigh_node_free_ref(router_coding); + return res; +} + +/** + * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst. + * @skb: data skb to forward + * @dst: destination mac address of the other skb to code with + * @src: source mac address of skb + * + * Whenever we network code a packet we have to check whether we received it in + * a network coded form. If so, we may not be able to use it for coding because + * some neighbors may also have received (overheard) the packet in the network + * coded form without being able to decode it. It is hard to know which of the + * neighboring nodes was able to decode the packet, therefore we can only + * re-code the packet if the source of the previous encoded packet is involved. + * Since the source encoded the packet we can be certain it has all necessary + * decode information. + * + * Returns true if coding of a decoded packet is allowed. + */ +static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, + uint8_t *dst, uint8_t *src) +{ + if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) + return false; + else + return true; +} + +/** + * batadv_nc_path_search - Find the coding path matching in_nc_node and + * out_nc_node to retrieve a buffered packet that can be used for coding. + * @bat_priv: the bat priv with all the soft interface information + * @in_nc_node: pointer to skb next hop's neighbor nc node + * @out_nc_node: pointer to skb source's neighbor nc node + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * + * Returns true if coding of a decoded skb is allowed. + */ +static struct batadv_nc_packet * +batadv_nc_path_search(struct batadv_priv *bat_priv, + struct batadv_nc_node *in_nc_node, + struct batadv_nc_node *out_nc_node, + struct sk_buff *skb, + uint8_t *eth_dst) +{ + struct batadv_nc_path *nc_path, nc_path_key; + struct batadv_nc_packet *nc_packet_out = NULL; + struct batadv_nc_packet *nc_packet, *nc_packet_tmp; + struct batadv_hashtable *hash = bat_priv->nc.coding_hash; + int idx; + + if (!hash) + return NULL; + + /* Create almost path key */ + batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr, + out_nc_node->addr); + idx = batadv_nc_hash_choose(&nc_path_key, hash->size); + + /* Check for coding opportunities in this nc_path */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) { + if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr)) + continue; + + if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr)) + continue; + + spin_lock_bh(&nc_path->packet_list_lock); + if (list_empty(&nc_path->packet_list)) { + spin_unlock_bh(&nc_path->packet_list_lock); + continue; + } + + list_for_each_entry_safe(nc_packet, nc_packet_tmp, + &nc_path->packet_list, list) { + if (!batadv_nc_skb_coding_possible(nc_packet->skb, + eth_dst, + in_nc_node->addr)) + continue; + + /* Coding opportunity is found! */ + list_del(&nc_packet->list); + nc_packet_out = nc_packet; + break; + } + + spin_unlock_bh(&nc_path->packet_list_lock); + break; + } + rcu_read_unlock(); + + return nc_packet_out; +} + +/** + * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the + * skb's sender (may be equal to the originator). + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * @eth_src: source mac address of skb + * @in_nc_node: pointer to skb next hop's neighbor nc node + * + * Returns an nc packet if a suitable coding packet was found, NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_skb_src_search(struct batadv_priv *bat_priv, + struct sk_buff *skb, + uint8_t *eth_dst, + uint8_t *eth_src, + struct batadv_nc_node *in_nc_node) +{ + struct batadv_orig_node *orig_node; + struct batadv_nc_node *out_nc_node; + struct batadv_nc_packet *nc_packet = NULL; + + orig_node = batadv_orig_hash_find(bat_priv, eth_src); + if (!orig_node) + return NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(out_nc_node, + &orig_node->out_coding_list, list) { + /* Check if the skb is decoded and if recoding is possible */ + if (!batadv_nc_skb_coding_possible(skb, + out_nc_node->addr, eth_src)) + continue; + + /* Search for an opportunity in this nc_path */ + nc_packet = batadv_nc_path_search(bat_priv, in_nc_node, + out_nc_node, skb, eth_dst); + if (nc_packet) + break; + } + rcu_read_unlock(); + + batadv_orig_node_free_ref(orig_node); + return nc_packet; +} + +/** + * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the + * unicast skb before it is stored for use in later decoding + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + * @eth_dst_new: new destination mac address of skb + */ +static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, + struct sk_buff *skb, + uint8_t *eth_dst_new) +{ + struct ethhdr *ethhdr; + + /* Copy skb header to change the mac header */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (!skb) + return; + + /* Set the mac header as if we actually sent the packet uncoded */ + ethhdr = (struct ethhdr *)skb_mac_header(skb); + memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); + memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); + + /* Set data pointer to MAC header to mimic packets from our tx path */ + skb_push(skb, ETH_HLEN); + + /* Add the packet to the decoding packet pool */ + batadv_nc_skb_store_for_decoding(bat_priv, skb); + + /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free + * our ref + */ + kfree_skb(skb); +} + +/** + * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst. + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Loops through list of neighboring nodes the next hop has a good connection to + * (receives OGMs with a sufficient quality). We need to find a neighbor of our + * next hop that potentially sent a packet which our next hop also received + * (overheard) and has stored for later decoding. + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +static bool batadv_nc_skb_dst_search(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct batadv_priv *bat_priv = netdev_priv(netdev); + struct batadv_orig_node *orig_node = neigh_node->orig_node; + struct batadv_nc_node *nc_node; + struct batadv_nc_packet *nc_packet = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) { + /* Search for coding opportunity with this in_nc_node */ + nc_packet = batadv_nc_skb_src_search(bat_priv, skb, + neigh_node->addr, + ethhdr->h_source, nc_node); + + /* Opportunity was found, so stop searching */ + if (nc_packet) + break; + } + rcu_read_unlock(); + + if (!nc_packet) + return false; + + /* Save packets for later decoding */ + batadv_nc_skb_store_before_coding(bat_priv, skb, + neigh_node->addr); + batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb, + nc_packet->neigh_node->addr); + + /* Code and send packets */ + if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet, + neigh_node)) + return true; + + /* out of mem ? Coding failed - we have to free the buffered packet + * to avoid memleaks. The skb passed as argument will be dealt with + * by the calling function. + */ + batadv_nc_send_packet(nc_packet); + return false; +} + +/** + * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding + * @skb: skb to add to path + * @nc_path: path to add skb to + * @neigh_node: next hop to forward packet to + * @packet_id: checksum to identify packet + * + * Returns true if the packet was buffered or false in case of an error. + */ +static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, + struct batadv_nc_path *nc_path, + struct batadv_neigh_node *neigh_node, + __be32 packet_id) +{ + struct batadv_nc_packet *nc_packet; + + nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC); + if (!nc_packet) + return false; + + /* Initialize nc_packet */ + nc_packet->timestamp = jiffies; + nc_packet->packet_id = packet_id; + nc_packet->skb = skb; + nc_packet->neigh_node = neigh_node; + nc_packet->nc_path = nc_path; + + /* Add coding packet to list */ + spin_lock_bh(&nc_path->packet_list_lock); + list_add_tail(&nc_packet->list, &nc_path->packet_list); + spin_unlock_bh(&nc_path->packet_list_lock); + + return true; +} + +/** + * batadv_nc_skb_forward - try to code a packet or add it to the coding packet + * buffer + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +bool batadv_nc_skb_forward(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + const struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct batadv_priv *bat_priv = netdev_priv(netdev); + struct batadv_unicast_packet *packet; + struct batadv_nc_path *nc_path; + __be32 packet_id; + u8 *payload; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* We only handle unicast packets */ + payload = skb_network_header(skb); + packet = (struct batadv_unicast_packet *)payload; + if (packet->header.packet_type != BATADV_UNICAST) + goto out; + + /* Try to find a coding opportunity and send the skb if one is found */ + if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr)) + return true; + + /* Find or create a nc_path for this src-dst pair */ + nc_path = batadv_nc_get_path(bat_priv, + bat_priv->nc.coding_hash, + ethhdr->h_source, + neigh_node->addr); + + if (!nc_path) + goto out; + + /* Add skb to nc_path */ + packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); + if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id)) + goto free_nc_path; + + /* Packet is consumed */ + return true; + +free_nc_path: + batadv_nc_path_free_ref(nc_path); +out: + /* Packet is not consumed */ + return false; +} + +/** + * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used + * when decoding coded packets + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + */ +void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct batadv_unicast_packet *packet; + struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + __be32 packet_id; + u8 *payload; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* Check for supported packet type */ + payload = skb_network_header(skb); + packet = (struct batadv_unicast_packet *)payload; + if (packet->header.packet_type != BATADV_UNICAST) + goto out; + + /* Find existing nc_path or create a new */ + nc_path = batadv_nc_get_path(bat_priv, + bat_priv->nc.decoding_hash, + ethhdr->h_source, + ethhdr->h_dest); + + if (!nc_path) + goto out; + + /* Clone skb and adjust skb->data to point at batman header */ + skb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto free_nc_path; + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto free_skb; + + if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN))) + goto free_skb; + + /* Add skb to nc_path */ + packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); + if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id)) + goto free_skb; + + batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER); + return; + +free_skb: + kfree_skb(skb); +free_nc_path: + batadv_nc_path_free_ref(nc_path); +out: + return; +} + +/** + * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet + * should be saved in the decoding buffer and, if so, store it there + * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast skb to store + */ +void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + - if (batadv_is_my_mac(ethhdr->h_dest)) ++ if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) + return; + + /* Set data pointer to MAC header to mimic packets from our tx path */ + skb_push(skb, ETH_HLEN); + + batadv_nc_skb_store_for_decoding(bat_priv, skb); +} + +/** + * batadv_nc_skb_decode_packet - decode given skb using the decode data stored + * in nc_packet ++ * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast skb to decode + * @nc_packet: decode data needed to decode the skb + * + * Returns pointer to decoded unicast packet if the packet was decoded or NULL + * in case of an error. + */ +static struct batadv_unicast_packet * - batadv_nc_skb_decode_packet(struct sk_buff *skb, ++batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, + struct batadv_nc_packet *nc_packet) +{ + const int h_size = sizeof(struct batadv_unicast_packet); + const int h_diff = sizeof(struct batadv_coded_packet) - h_size; + struct batadv_unicast_packet *unicast_packet; + struct batadv_coded_packet coded_packet_tmp; + struct ethhdr *ethhdr, ethhdr_tmp; + uint8_t *orig_dest, ttl, ttvn; + unsigned int coding_len; + + /* Save headers temporarily */ + memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp)); + memcpy(ðhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp)); + + if (skb_cow(skb, 0) < 0) + return NULL; + + if (unlikely(!skb_pull_rcsum(skb, h_diff))) + return NULL; + + /* Data points to batman header, so set mac header 14 bytes before + * and network to data + */ + skb_set_mac_header(skb, -ETH_HLEN); + skb_reset_network_header(skb); + + /* Reconstruct original mac header */ + ethhdr = (struct ethhdr *)skb_mac_header(skb); + memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); + + /* Select the correct unicast header information based on the location + * of our mac address in the coded_packet header + */ - if (batadv_is_my_mac(coded_packet_tmp.second_dest)) { ++ if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) { + /* If we are the second destination the packet was overheard, + * so the Ethernet address must be copied to h_dest and + * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST + */ + memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN); + skb->pkt_type = PACKET_HOST; + + orig_dest = coded_packet_tmp.second_orig_dest; + ttl = coded_packet_tmp.second_ttl; + ttvn = coded_packet_tmp.second_ttvn; + } else { + orig_dest = coded_packet_tmp.first_orig_dest; + ttl = coded_packet_tmp.header.ttl; + ttvn = coded_packet_tmp.first_ttvn; + } + + coding_len = ntohs(coded_packet_tmp.coded_len); + + if (coding_len > skb->len) + return NULL; + + /* Here the magic is reversed: + * extract the missing packet from the received coded packet + */ + batadv_nc_memxor(skb->data + h_size, + nc_packet->skb->data + h_size, + coding_len); + + /* Resize decoded skb if decoded with larger packet */ + if (nc_packet->skb->len > coding_len + h_size) + pskb_trim_rcsum(skb, coding_len + h_size); + + /* Create decoded unicast packet */ + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_packet->header.packet_type = BATADV_UNICAST; + unicast_packet->header.version = BATADV_COMPAT_VERSION; + unicast_packet->header.ttl = ttl; + memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); + unicast_packet->ttvn = ttvn; + + batadv_nc_packet_free(nc_packet); + return unicast_packet; +} + +/** + * batadv_nc_find_decoding_packet - search through buffered decoding data to + * find the data needed to decode the coded packet + * @bat_priv: the bat priv with all the soft interface information + * @ethhdr: pointer to the ethernet header inside the coded packet + * @coded: coded packet we try to find decode data for + * + * Returns pointer to nc packet if the needed data was found or NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, + struct ethhdr *ethhdr, + struct batadv_coded_packet *coded) +{ + struct batadv_hashtable *hash = bat_priv->nc.decoding_hash; + struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL; + struct batadv_nc_path *nc_path, nc_path_key; + uint8_t *dest, *source; + __be32 packet_id; + int index; + + if (!hash) + return NULL; + + /* Select the correct packet id based on the location of our mac-addr */ + dest = ethhdr->h_source; - if (!batadv_is_my_mac(coded->second_dest)) { ++ if (!batadv_is_my_mac(bat_priv, coded->second_dest)) { + source = coded->second_source; + packet_id = coded->second_crc; + } else { + source = coded->first_source; + packet_id = coded->first_crc; + } + + batadv_nc_hash_key_gen(&nc_path_key, source, dest); + index = batadv_nc_hash_choose(&nc_path_key, hash->size); + + /* Search for matching coding path */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) { + /* Find matching nc_packet */ + spin_lock_bh(&nc_path->packet_list_lock); + list_for_each_entry(tmp_nc_packet, + &nc_path->packet_list, list) { + if (packet_id == tmp_nc_packet->packet_id) { + list_del(&tmp_nc_packet->list); + + nc_packet = tmp_nc_packet; + break; + } + } + spin_unlock_bh(&nc_path->packet_list_lock); + + if (nc_packet) + break; + } + rcu_read_unlock(); + + if (!nc_packet) + batadv_dbg(BATADV_DBG_NC, bat_priv, + "No decoding packet found for %u\n", packet_id); + + return nc_packet; +} + +/** + * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the + * resulting unicast packet + * @skb: incoming coded packet + * @recv_if: pointer to interface this packet was received on + */ +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_unicast_packet *unicast_packet; + struct batadv_coded_packet *coded_packet; + struct batadv_nc_packet *nc_packet; + struct ethhdr *ethhdr; + int hdr_size = sizeof(*coded_packet); + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + return NET_RX_DROP; + + /* Make sure we can access (and remove) header */ + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return NET_RX_DROP; + + coded_packet = (struct batadv_coded_packet *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* Verify frame is destined for us */ - if (!batadv_is_my_mac(ethhdr->h_dest) && - !batadv_is_my_mac(coded_packet->second_dest)) ++ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && ++ !batadv_is_my_mac(bat_priv, coded_packet->second_dest)) + return NET_RX_DROP; + + /* Update stat counter */ - if (batadv_is_my_mac(coded_packet->second_dest)) ++ if (batadv_is_my_mac(bat_priv, coded_packet->second_dest)) + batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED); + + nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr, + coded_packet); + if (!nc_packet) { + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); + return NET_RX_DROP; + } + + /* Make skb's linear, because decoding accesses the entire buffer */ + if (skb_linearize(skb) < 0) + goto free_nc_packet; + + if (skb_linearize(nc_packet->skb) < 0) + goto free_nc_packet; + + /* Decode the packet */ - unicast_packet = batadv_nc_skb_decode_packet(skb, nc_packet); ++ unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet); + if (!unicast_packet) { + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); + goto free_nc_packet; + } + + /* Mark packet as decoded to do correct recoding when forwarding */ + BATADV_SKB_CB(skb)->decoded = true; + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES, + skb->len + ETH_HLEN); + return batadv_recv_unicast_packet(skb, recv_if); + +free_nc_packet: + batadv_nc_packet_free(nc_packet); + return NET_RX_DROP; +} + +/** + * batadv_nc_free - clean up network coding memory + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_free(struct batadv_priv *bat_priv) +{ + batadv_recv_handler_unregister(BATADV_CODED); + cancel_delayed_work_sync(&bat_priv->nc.work); + + batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); + batadv_hash_destroy(bat_priv->nc.coding_hash); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL); + batadv_hash_destroy(bat_priv->nc.decoding_hash); +} + +/** + * batadv_nc_nodes_seq_print_text - print the nc node information + * @seq: seq file to print on + * @offset: not used + */ +int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct batadv_hard_iface *primary_if; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + struct batadv_nc_node *nc_node; + int i; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + goto out; + + /* Traverse list of originators */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + /* For each orig_node in this bin */ + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + seq_printf(seq, "Node: %pM\n", orig_node->orig); + + seq_puts(seq, " Ingoing: "); + /* For each in_nc_node to this orig_node */ + list_for_each_entry_rcu(nc_node, + &orig_node->in_coding_list, + list) + seq_printf(seq, "%pM ", + nc_node->addr); + seq_puts(seq, "\n"); + + seq_puts(seq, " Outgoing: "); + /* For out_nc_node to this orig_node */ + list_for_each_entry_rcu(nc_node, + &orig_node->out_coding_list, + list) + seq_printf(seq, "%pM ", + nc_node->addr); + seq_puts(seq, "\n\n"); + } + rcu_read_unlock(); + } + +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return 0; +} + +/** + * batadv_nc_init_debugfs - create nc folder and related files in debugfs + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) +{ + struct dentry *nc_dir, *file; + + nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir); + if (!nc_dir) + goto out; + + file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.min_tq); + if (!file) + goto out; + + file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.max_fwd_delay); + if (!file) + goto out; + + file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.max_buffer_time); + if (!file) + goto out; + + return 0; + +out: + return -ENOMEM; +} diff --cc net/batman-adv/routing.c index 8f88967ff14b,319f2906c71a..2f1f88923df8 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@@ -549,17 -548,8 +549,19 @@@ batadv_find_ifalter_router(struct batad return router; } +/** + * batadv_check_unicast_packet - Check for malformed unicast packets ++ * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * @hdr_size: size of header to pull + * + * Check for short header and bad addresses in given packet. Returns negative + * value when check fails and 0 otherwise. The negative value depends on the + * reason: -ENODATA for bad header, -EBADR for broadcast destination or source, + * and -EREMOTE for non-local (other host) destination. + */ - static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) + static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) { struct ethhdr *ethhdr; @@@ -575,11 -565,11 +577,11 @@@ /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) - return -1; + return -EBADR; /* not for me */ - if (!batadv_is_my_mac(ethhdr->h_dest)) + if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) - return -1; + return -EREMOTE; return 0; } @@@ -1058,16 -1046,7 +1061,16 @@@ int batadv_recv_unicast_packet(struct s if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); - if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) + /* function returns -EREMOTE for promiscuous packets */ - check = batadv_check_unicast_packet(skb, hdr_size); ++ check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); + + /* Even though the packet is not for us, we might save it to use for + * decoding a later received coded packet + */ + if (check == -EREMOTE) + batadv_nc_skb_store_sniffed_unicast(bat_priv, skb); + + if (check < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) diff --cc net/ipv4/tcp_input.c index 6d9ca35f0c35,13b9c08fc158..aafd052865ba --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@@ -107,10 -108,12 +107,11 @@@ int sysctl_tcp_early_retrans __read_mos #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ -#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ -#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ + #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@@ -3265,38 -3565,27 +3266,59 @@@ static void tcp_send_challenge_ack(stru } } + static void tcp_store_ts_recent(struct tcp_sock *tp) + { + tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; + tp->rx_opt.ts_recent_stamp = get_seconds(); + } + + static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) + { + if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) { + /* PAWS bug workaround wrt. ACK frames, the PAWS discard + * extra check below makes sure this can only happen + * for pure ACK frames. -DaveM + * + * Not only, also it occurs for expired timestamps. + */ + + if (tcp_paws_check(&tp->rx_opt, 0)) + tcp_store_ts_recent(tp); + } + } + +/* This routine deals with acks during a TLP episode. + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. + */ +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool is_tlp_dupack = (ack == tp->tlp_high_seq) && + !(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED)); + + /* Mark the end of TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. + */ + if (is_tlp_dupack) { + tp->tlp_high_seq = 0; + return; + } + + if (after(ack, tp->tlp_high_seq)) { + tp->tlp_high_seq = 0; + /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ + if (!(flag & FLAG_DSACKING_ACK)) { + tcp_init_cwnd_reduction(sk, true); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_Open); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { diff --cc net/openvswitch/datapath.c index 7bb5d4f6bb90,6980c3e6f066..d2f9f2e57298 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@@ -1821,20 -1738,20 +1826,21 @@@ static int ovs_vport_cmd_set(struct sk_ if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); - if (IS_ERR(reply)) { - netlink_set_err(sock_net(skb->sk)->genl_sock, 0, - ovs_dp_vport_multicast_group.id, PTR_ERR(reply)); - goto exit_unlock; - } + err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, + info->snd_seq, 0, OVS_VPORT_CMD_NEW); + BUG_ON(err < 0); - genl_notify(reply, genl_info_net(info), info->snd_portid, - ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + ovs_unlock(); + ovs_notify(reply, info, &ovs_dp_vport_multicast_group); + return 0; + rtnl_unlock(); + return 0; + + exit_free: + kfree_skb(reply); exit_unlock: - rtnl_unlock(); + ovs_unlock(); return err; } diff --cc net/unix/af_unix.c index 5ca1631de7ef,2db702d82e7d..9efe01113c5c --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@@ -1414,7 -1415,7 +1414,7 @@@ static void maybe_add_creds(struct sk_b !other->sk_socket || test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); - current_euid_egid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); - UNIXCB(skb).cred = get_current_cred(); ++ current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } }