From 345415138d15a42460edb42423c67644ec8bb4a1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Feb 2019 20:19:32 -0800 Subject: [PATCH] nfp: nsp: allow the use of DMA buffer Newer versions of NSP can access host memory. Simplest access type requires all data to be in one contiguous area. Since we don't have the guarantee on where callers of the NSP ABI will allocate their buffers we allocate a bounce buffer and copy the data in and out. Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfpcore/nfp_nsp.c | 196 +++++++++++++++++- 1 file changed, 191 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 28262b0fc89a..dd6256841a37 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -37,6 +38,7 @@ #define NSP_COMMAND 0x08 #define NSP_COMMAND_OPTION GENMASK_ULL(63, 32) #define NSP_COMMAND_CODE GENMASK_ULL(31, 16) +#define NSP_COMMAND_DMA_BUF BIT_ULL(1) #define NSP_COMMAND_START BIT_ULL(0) /* CPP address to retrieve the data from */ @@ -49,9 +51,12 @@ #define NSP_DFLT_BUFFER_ADDRESS GENMASK_ULL(39, 0) #define NSP_DFLT_BUFFER_CONFIG 0x20 +#define NSP_DFLT_BUFFER_DMA_CHUNK_ORDER GENMASK_ULL(63, 58) #define NSP_DFLT_BUFFER_SIZE_4KB GENMASK_ULL(15, 8) #define NSP_DFLT_BUFFER_SIZE_MB GENMASK_ULL(7, 0) +#define NFP_CAP_CMD_DMA_SG 0x28 + #define NSP_MAGIC 0xab10 #define NSP_MAJOR 0 #define NSP_MINOR 8 @@ -92,6 +97,16 @@ enum nfp_nsp_cmd { SPCODE_VERSIONS = 21, /* Report FW versions */ }; +struct nfp_nsp_dma_buf { + __le32 chunk_cnt; + __le32 reserved[3]; + struct { + __le32 size; + __le32 reserved; + __le64 addr; + } descs[]; +}; + static const struct { int code; const char *msg; @@ -120,6 +135,7 @@ struct nfp_nsp { /** * struct nfp_nsp_command_arg - NFP command argument structure * @code: NFP SP Command Code + * @dma: @buf points to a host buffer, not NSP buffer * @timeout_sec:Timeout value to wait for completion in seconds * @option: NFP SP Command Argument * @buf: NFP SP Buffer Address @@ -127,6 +143,7 @@ struct nfp_nsp { */ struct nfp_nsp_command_arg { u16 code; + bool dma; unsigned int timeout_sec; u32 option; u64 buf; @@ -350,6 +367,7 @@ __nfp_nsp_command(struct nfp_nsp *state, const struct nfp_nsp_command_arg *arg) err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_command, FIELD_PREP(NSP_COMMAND_OPTION, arg->option) | FIELD_PREP(NSP_COMMAND_CODE, arg->code) | + FIELD_PREP(NSP_COMMAND_DMA_BUF, arg->dma) | FIELD_PREP(NSP_COMMAND_START, 1)); if (err < 0) return err; @@ -456,10 +474,174 @@ nfp_nsp_command_buf_def(struct nfp_nsp *nsp, return ret; } +static int +nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp, + struct nfp_nsp_command_buf_arg *arg, + unsigned int max_size, unsigned int chunk_order, + unsigned int dma_order) +{ + struct nfp_cpp *cpp = nsp->cpp; + struct nfp_nsp_dma_buf *desc; + struct { + dma_addr_t dma_addr; + unsigned long len; + void *chunk; + } *chunks; + size_t chunk_size, dma_size; + dma_addr_t dma_desc; + struct device *dev; + unsigned long off; + int i, ret, nseg; + size_t desc_sz; + + chunk_size = BIT_ULL(chunk_order); + dma_size = BIT_ULL(dma_order); + nseg = DIV_ROUND_UP(max_size, chunk_size); + + chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL); + if (!chunks) + return -ENOMEM; + + off = 0; + ret = -ENOMEM; + for (i = 0; i < nseg; i++) { + unsigned long coff; + + chunks[i].chunk = kmalloc(chunk_size, + GFP_KERNEL | __GFP_NOWARN); + if (!chunks[i].chunk) + goto exit_free_prev; + + chunks[i].len = min_t(u64, chunk_size, max_size - off); + + coff = 0; + if (arg->in_size > off) { + coff = min_t(u64, arg->in_size - off, chunk_size); + memcpy(chunks[i].chunk, arg->in_buf + off, coff); + } + memset(chunks[i].chunk + coff, 0, chunk_size - coff); + + off += chunks[i].len; + } + + dev = nfp_cpp_device(cpp)->parent; + + for (i = 0; i < nseg; i++) { + dma_addr_t addr; + + addr = dma_map_single(dev, chunks[i].chunk, chunks[i].len, + DMA_BIDIRECTIONAL); + chunks[i].dma_addr = addr; + + ret = dma_mapping_error(dev, addr); + if (ret) + goto exit_unmap_prev; + + if (WARN_ONCE(round_down(addr, dma_size) != + round_down(addr + chunks[i].len - 1, dma_size), + "unaligned DMA address: %pad %lu %zd\n", + &addr, chunks[i].len, dma_size)) { + ret = -EFAULT; + i++; + goto exit_unmap_prev; + } + } + + desc_sz = struct_size(desc, descs, nseg); + desc = kmalloc(desc_sz, GFP_KERNEL); + if (!desc) { + ret = -ENOMEM; + goto exit_unmap_all; + } + + desc->chunk_cnt = cpu_to_le32(nseg); + for (i = 0; i < nseg; i++) { + desc->descs[i].size = cpu_to_le32(chunks[i].len); + desc->descs[i].addr = cpu_to_le64(chunks[i].dma_addr); + } + + dma_desc = dma_map_single(dev, desc, desc_sz, DMA_TO_DEVICE); + ret = dma_mapping_error(dev, dma_desc); + if (ret) + goto exit_free_desc; + + arg->arg.dma = true; + arg->arg.buf = dma_desc; + ret = __nfp_nsp_command(nsp, &arg->arg); + if (ret < 0) + goto exit_unmap_desc; + + i = 0; + off = 0; + while (off < arg->out_size) { + unsigned int len; + + len = min_t(u64, chunks[i].len, arg->out_size - off); + memcpy(arg->out_buf + off, chunks[i].chunk, len); + off += len; + i++; + } + +exit_unmap_desc: + dma_unmap_single(dev, dma_desc, desc_sz, DMA_TO_DEVICE); +exit_free_desc: + kfree(desc); +exit_unmap_all: + i = nseg; +exit_unmap_prev: + while (--i >= 0) + dma_unmap_single(dev, chunks[i].dma_addr, chunks[i].len, + DMA_BIDIRECTIONAL); + i = nseg; +exit_free_prev: + while (--i >= 0) + kfree(chunks[i].chunk); + kfree(chunks); + if (ret < 0) + nfp_err(cpp, "NSP: SG DMA failed for command 0x%04x: %d (sz:%d cord:%d)\n", + arg->arg.code, ret, max_size, chunk_order); + return ret; +} + +static int +nfp_nsp_command_buf_dma(struct nfp_nsp *nsp, + struct nfp_nsp_command_buf_arg *arg, + unsigned int max_size, unsigned int dma_order) +{ + unsigned int chunk_order, buf_order; + struct nfp_cpp *cpp = nsp->cpp; + bool sg_ok; + u64 reg; + int err; + + buf_order = order_base_2(roundup_pow_of_two(max_size)); + + err = nfp_cpp_readq(cpp, nfp_resource_cpp_id(nsp->res), + nfp_resource_address(nsp->res) + NFP_CAP_CMD_DMA_SG, + ®); + if (err < 0) + return err; + sg_ok = reg & BIT_ULL(arg->arg.code - 1); + + if (!sg_ok) { + if (buf_order > dma_order) { + nfp_err(cpp, "NSP: can't service non-SG DMA for command 0x%04x\n", + arg->arg.code); + return -ENOMEM; + } + chunk_order = buf_order; + } else { + chunk_order = min_t(unsigned int, dma_order, PAGE_SHIFT); + } + + return nfp_nsp_command_buf_dma_sg(nsp, arg, max_size, chunk_order, + dma_order); +} + static int nfp_nsp_command_buf(struct nfp_nsp *nsp, struct nfp_nsp_command_buf_arg *arg) { - unsigned int def_size, max_size; + unsigned int dma_order, def_size, max_size; struct nfp_cpp *cpp = nsp->cpp; u64 reg; int err; @@ -484,12 +666,16 @@ nfp_nsp_command_buf(struct nfp_nsp *nsp, struct nfp_nsp_command_buf_arg *arg) max_size = max(arg->in_size, arg->out_size); def_size = FIELD_GET(NSP_DFLT_BUFFER_SIZE_MB, reg) * SZ_1M + FIELD_GET(NSP_DFLT_BUFFER_SIZE_4KB, reg) * SZ_4K; - if (def_size >= max_size) + dma_order = FIELD_GET(NSP_DFLT_BUFFER_DMA_CHUNK_ORDER, reg); + if (def_size >= max_size) { return nfp_nsp_command_buf_def(nsp, arg); + } else if (!dma_order) { + nfp_err(cpp, "NSP: default buffer too small for command 0x%04x (%u < %u)\n", + arg->arg.code, def_size, max_size); + return -EINVAL; + } - nfp_err(cpp, "NSP: default buffer too small for command 0x%04x (%u < %u)\n", - arg->arg.code, def_size, max_size); - return -EINVAL; + return nfp_nsp_command_buf_dma(nsp, arg, max_size, dma_order); } int nfp_nsp_wait(struct nfp_nsp *state) -- 2.30.2